1 // 2 // Copyright (c) 2011, 2024, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 // AVX3 Mask Registers. 632 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 633 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 634 635 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 636 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 637 638 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 639 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 640 641 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 642 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 643 644 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 645 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 646 647 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 648 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 649 650 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 651 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 652 653 654 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 655 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 656 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 657 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 658 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 659 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 660 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 661 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 662 #ifdef _LP64 663 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 664 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 665 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 666 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 667 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 668 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 669 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 670 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 671 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 672 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 673 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 674 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 675 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 676 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 677 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 678 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 679 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 680 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 681 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 682 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 683 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 684 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 685 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 686 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 687 #endif 688 ); 689 690 alloc_class chunk2(K7, K7_H, 691 K6, K6_H, 692 K5, K5_H, 693 K4, K4_H, 694 K3, K3_H, 695 K2, K2_H, 696 K1, K1_H); 697 698 reg_class vectmask_reg(K1, K1_H, 699 K2, K2_H, 700 K3, K3_H, 701 K4, K4_H, 702 K5, K5_H, 703 K6, K6_H, 704 K7, K7_H); 705 706 reg_class vectmask_reg_K1(K1, K1_H); 707 reg_class vectmask_reg_K2(K2, K2_H); 708 reg_class vectmask_reg_K3(K3, K3_H); 709 reg_class vectmask_reg_K4(K4, K4_H); 710 reg_class vectmask_reg_K5(K5, K5_H); 711 reg_class vectmask_reg_K6(K6, K6_H); 712 reg_class vectmask_reg_K7(K7, K7_H); 713 714 // flags allocation class should be last. 715 alloc_class chunk3(RFLAGS); 716 717 718 // Singleton class for condition codes 719 reg_class int_flags(RFLAGS); 720 721 // Class for pre evex float registers 722 reg_class float_reg_legacy(XMM0, 723 XMM1, 724 XMM2, 725 XMM3, 726 XMM4, 727 XMM5, 728 XMM6, 729 XMM7 730 #ifdef _LP64 731 ,XMM8, 732 XMM9, 733 XMM10, 734 XMM11, 735 XMM12, 736 XMM13, 737 XMM14, 738 XMM15 739 #endif 740 ); 741 742 // Class for evex float registers 743 reg_class float_reg_evex(XMM0, 744 XMM1, 745 XMM2, 746 XMM3, 747 XMM4, 748 XMM5, 749 XMM6, 750 XMM7 751 #ifdef _LP64 752 ,XMM8, 753 XMM9, 754 XMM10, 755 XMM11, 756 XMM12, 757 XMM13, 758 XMM14, 759 XMM15, 760 XMM16, 761 XMM17, 762 XMM18, 763 XMM19, 764 XMM20, 765 XMM21, 766 XMM22, 767 XMM23, 768 XMM24, 769 XMM25, 770 XMM26, 771 XMM27, 772 XMM28, 773 XMM29, 774 XMM30, 775 XMM31 776 #endif 777 ); 778 779 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 780 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 781 782 // Class for pre evex double registers 783 reg_class double_reg_legacy(XMM0, XMM0b, 784 XMM1, XMM1b, 785 XMM2, XMM2b, 786 XMM3, XMM3b, 787 XMM4, XMM4b, 788 XMM5, XMM5b, 789 XMM6, XMM6b, 790 XMM7, XMM7b 791 #ifdef _LP64 792 ,XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b 800 #endif 801 ); 802 803 // Class for evex double registers 804 reg_class double_reg_evex(XMM0, XMM0b, 805 XMM1, XMM1b, 806 XMM2, XMM2b, 807 XMM3, XMM3b, 808 XMM4, XMM4b, 809 XMM5, XMM5b, 810 XMM6, XMM6b, 811 XMM7, XMM7b 812 #ifdef _LP64 813 ,XMM8, XMM8b, 814 XMM9, XMM9b, 815 XMM10, XMM10b, 816 XMM11, XMM11b, 817 XMM12, XMM12b, 818 XMM13, XMM13b, 819 XMM14, XMM14b, 820 XMM15, XMM15b, 821 XMM16, XMM16b, 822 XMM17, XMM17b, 823 XMM18, XMM18b, 824 XMM19, XMM19b, 825 XMM20, XMM20b, 826 XMM21, XMM21b, 827 XMM22, XMM22b, 828 XMM23, XMM23b, 829 XMM24, XMM24b, 830 XMM25, XMM25b, 831 XMM26, XMM26b, 832 XMM27, XMM27b, 833 XMM28, XMM28b, 834 XMM29, XMM29b, 835 XMM30, XMM30b, 836 XMM31, XMM31b 837 #endif 838 ); 839 840 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 841 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 842 843 // Class for pre evex 32bit vector registers 844 reg_class vectors_reg_legacy(XMM0, 845 XMM1, 846 XMM2, 847 XMM3, 848 XMM4, 849 XMM5, 850 XMM6, 851 XMM7 852 #ifdef _LP64 853 ,XMM8, 854 XMM9, 855 XMM10, 856 XMM11, 857 XMM12, 858 XMM13, 859 XMM14, 860 XMM15 861 #endif 862 ); 863 864 // Class for evex 32bit vector registers 865 reg_class vectors_reg_evex(XMM0, 866 XMM1, 867 XMM2, 868 XMM3, 869 XMM4, 870 XMM5, 871 XMM6, 872 XMM7 873 #ifdef _LP64 874 ,XMM8, 875 XMM9, 876 XMM10, 877 XMM11, 878 XMM12, 879 XMM13, 880 XMM14, 881 XMM15, 882 XMM16, 883 XMM17, 884 XMM18, 885 XMM19, 886 XMM20, 887 XMM21, 888 XMM22, 889 XMM23, 890 XMM24, 891 XMM25, 892 XMM26, 893 XMM27, 894 XMM28, 895 XMM29, 896 XMM30, 897 XMM31 898 #endif 899 ); 900 901 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 902 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 903 904 // Class for all 64bit vector registers 905 reg_class vectord_reg_legacy(XMM0, XMM0b, 906 XMM1, XMM1b, 907 XMM2, XMM2b, 908 XMM3, XMM3b, 909 XMM4, XMM4b, 910 XMM5, XMM5b, 911 XMM6, XMM6b, 912 XMM7, XMM7b 913 #ifdef _LP64 914 ,XMM8, XMM8b, 915 XMM9, XMM9b, 916 XMM10, XMM10b, 917 XMM11, XMM11b, 918 XMM12, XMM12b, 919 XMM13, XMM13b, 920 XMM14, XMM14b, 921 XMM15, XMM15b 922 #endif 923 ); 924 925 // Class for all 64bit vector registers 926 reg_class vectord_reg_evex(XMM0, XMM0b, 927 XMM1, XMM1b, 928 XMM2, XMM2b, 929 XMM3, XMM3b, 930 XMM4, XMM4b, 931 XMM5, XMM5b, 932 XMM6, XMM6b, 933 XMM7, XMM7b 934 #ifdef _LP64 935 ,XMM8, XMM8b, 936 XMM9, XMM9b, 937 XMM10, XMM10b, 938 XMM11, XMM11b, 939 XMM12, XMM12b, 940 XMM13, XMM13b, 941 XMM14, XMM14b, 942 XMM15, XMM15b, 943 XMM16, XMM16b, 944 XMM17, XMM17b, 945 XMM18, XMM18b, 946 XMM19, XMM19b, 947 XMM20, XMM20b, 948 XMM21, XMM21b, 949 XMM22, XMM22b, 950 XMM23, XMM23b, 951 XMM24, XMM24b, 952 XMM25, XMM25b, 953 XMM26, XMM26b, 954 XMM27, XMM27b, 955 XMM28, XMM28b, 956 XMM29, XMM29b, 957 XMM30, XMM30b, 958 XMM31, XMM31b 959 #endif 960 ); 961 962 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 963 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 964 965 // Class for all 128bit vector registers 966 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 967 XMM1, XMM1b, XMM1c, XMM1d, 968 XMM2, XMM2b, XMM2c, XMM2d, 969 XMM3, XMM3b, XMM3c, XMM3d, 970 XMM4, XMM4b, XMM4c, XMM4d, 971 XMM5, XMM5b, XMM5c, XMM5d, 972 XMM6, XMM6b, XMM6c, XMM6d, 973 XMM7, XMM7b, XMM7c, XMM7d 974 #ifdef _LP64 975 ,XMM8, XMM8b, XMM8c, XMM8d, 976 XMM9, XMM9b, XMM9c, XMM9d, 977 XMM10, XMM10b, XMM10c, XMM10d, 978 XMM11, XMM11b, XMM11c, XMM11d, 979 XMM12, XMM12b, XMM12c, XMM12d, 980 XMM13, XMM13b, XMM13c, XMM13d, 981 XMM14, XMM14b, XMM14c, XMM14d, 982 XMM15, XMM15b, XMM15c, XMM15d 983 #endif 984 ); 985 986 // Class for all 128bit vector registers 987 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 988 XMM1, XMM1b, XMM1c, XMM1d, 989 XMM2, XMM2b, XMM2c, XMM2d, 990 XMM3, XMM3b, XMM3c, XMM3d, 991 XMM4, XMM4b, XMM4c, XMM4d, 992 XMM5, XMM5b, XMM5c, XMM5d, 993 XMM6, XMM6b, XMM6c, XMM6d, 994 XMM7, XMM7b, XMM7c, XMM7d 995 #ifdef _LP64 996 ,XMM8, XMM8b, XMM8c, XMM8d, 997 XMM9, XMM9b, XMM9c, XMM9d, 998 XMM10, XMM10b, XMM10c, XMM10d, 999 XMM11, XMM11b, XMM11c, XMM11d, 1000 XMM12, XMM12b, XMM12c, XMM12d, 1001 XMM13, XMM13b, XMM13c, XMM13d, 1002 XMM14, XMM14b, XMM14c, XMM14d, 1003 XMM15, XMM15b, XMM15c, XMM15d, 1004 XMM16, XMM16b, XMM16c, XMM16d, 1005 XMM17, XMM17b, XMM17c, XMM17d, 1006 XMM18, XMM18b, XMM18c, XMM18d, 1007 XMM19, XMM19b, XMM19c, XMM19d, 1008 XMM20, XMM20b, XMM20c, XMM20d, 1009 XMM21, XMM21b, XMM21c, XMM21d, 1010 XMM22, XMM22b, XMM22c, XMM22d, 1011 XMM23, XMM23b, XMM23c, XMM23d, 1012 XMM24, XMM24b, XMM24c, XMM24d, 1013 XMM25, XMM25b, XMM25c, XMM25d, 1014 XMM26, XMM26b, XMM26c, XMM26d, 1015 XMM27, XMM27b, XMM27c, XMM27d, 1016 XMM28, XMM28b, XMM28c, XMM28d, 1017 XMM29, XMM29b, XMM29c, XMM29d, 1018 XMM30, XMM30b, XMM30c, XMM30d, 1019 XMM31, XMM31b, XMM31c, XMM31d 1020 #endif 1021 ); 1022 1023 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1024 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1025 1026 // Class for all 256bit vector registers 1027 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1028 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1029 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1030 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1031 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1032 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1033 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1034 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1035 #ifdef _LP64 1036 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1037 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1038 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1039 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1040 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1041 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1042 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1043 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1044 #endif 1045 ); 1046 1047 // Class for all 256bit vector registers 1048 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1049 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1050 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1051 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1052 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1053 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1054 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1055 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1056 #ifdef _LP64 1057 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1058 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1059 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1060 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1061 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1062 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1063 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1064 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1065 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1066 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1067 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1068 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1069 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1070 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1071 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1072 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1073 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1074 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1075 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1076 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1077 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1078 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1079 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1080 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1081 #endif 1082 ); 1083 1084 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1085 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1086 1087 // Class for all 512bit vector registers 1088 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1089 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1090 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1091 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1092 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1093 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1094 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1095 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1096 #ifdef _LP64 1097 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1098 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1099 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1100 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1101 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1102 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1103 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1104 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1105 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1106 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1107 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1108 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1109 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1110 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1111 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1112 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1113 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1114 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1115 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1116 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1117 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1118 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1119 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1120 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1121 #endif 1122 ); 1123 1124 // Class for restricted 512bit vector registers 1125 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1126 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1127 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1128 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1129 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1130 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1131 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1132 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1133 #ifdef _LP64 1134 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1135 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1136 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1137 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1138 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1139 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1140 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1141 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1142 #endif 1143 ); 1144 1145 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1146 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1147 1148 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1149 %} 1150 1151 1152 //----------SOURCE BLOCK------------------------------------------------------- 1153 // This is a block of C++ code which provides values, functions, and 1154 // definitions necessary in the rest of the architecture description 1155 1156 source_hpp %{ 1157 // Header information of the source block. 1158 // Method declarations/definitions which are used outside 1159 // the ad-scope can conveniently be defined here. 1160 // 1161 // To keep related declarations/definitions/uses close together, 1162 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1163 1164 #include "runtime/vm_version.hpp" 1165 1166 class NativeJump; 1167 1168 class CallStubImpl { 1169 1170 //-------------------------------------------------------------- 1171 //---< Used for optimization in Compile::shorten_branches >--- 1172 //-------------------------------------------------------------- 1173 1174 public: 1175 // Size of call trampoline stub. 1176 static uint size_call_trampoline() { 1177 return 0; // no call trampolines on this platform 1178 } 1179 1180 // number of relocations needed by a call trampoline stub 1181 static uint reloc_call_trampoline() { 1182 return 0; // no call trampolines on this platform 1183 } 1184 }; 1185 1186 class HandlerImpl { 1187 1188 public: 1189 1190 static int emit_exception_handler(C2_MacroAssembler *masm); 1191 static int emit_deopt_handler(C2_MacroAssembler* masm); 1192 1193 static uint size_exception_handler() { 1194 // NativeCall instruction size is the same as NativeJump. 1195 // exception handler starts out as jump and can be patched to 1196 // a call be deoptimization. (4932387) 1197 // Note that this value is also credited (in output.cpp) to 1198 // the size of the code section. 1199 return NativeJump::instruction_size; 1200 } 1201 1202 #ifdef _LP64 1203 static uint size_deopt_handler() { 1204 // three 5 byte instructions plus one move for unreachable address. 1205 return 15+3; 1206 } 1207 #else 1208 static uint size_deopt_handler() { 1209 // NativeCall instruction size is the same as NativeJump. 1210 // exception handler starts out as jump and can be patched to 1211 // a call be deoptimization. (4932387) 1212 // Note that this value is also credited (in output.cpp) to 1213 // the size of the code section. 1214 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1215 } 1216 #endif 1217 }; 1218 1219 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1220 switch(bytes) { 1221 case 4: // fall-through 1222 case 8: // fall-through 1223 case 16: return Assembler::AVX_128bit; 1224 case 32: return Assembler::AVX_256bit; 1225 case 64: return Assembler::AVX_512bit; 1226 1227 default: { 1228 ShouldNotReachHere(); 1229 return Assembler::AVX_NoVec; 1230 } 1231 } 1232 } 1233 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1236 } 1237 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1239 uint def_idx = use->operand_index(opnd); 1240 Node* def = use->in(def_idx); 1241 return vector_length_encoding(def); 1242 } 1243 1244 static inline bool is_vector_popcount_predicate(BasicType bt) { 1245 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1246 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1247 } 1248 1249 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1250 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1251 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1252 } 1253 1254 class Node::PD { 1255 public: 1256 enum NodeFlags { 1257 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1258 Flag_sets_carry_flag = Node::_last_flag << 2, 1259 Flag_sets_parity_flag = Node::_last_flag << 3, 1260 Flag_sets_zero_flag = Node::_last_flag << 4, 1261 Flag_sets_overflow_flag = Node::_last_flag << 5, 1262 Flag_sets_sign_flag = Node::_last_flag << 6, 1263 Flag_clears_carry_flag = Node::_last_flag << 7, 1264 Flag_clears_parity_flag = Node::_last_flag << 8, 1265 Flag_clears_zero_flag = Node::_last_flag << 9, 1266 Flag_clears_overflow_flag = Node::_last_flag << 10, 1267 Flag_clears_sign_flag = Node::_last_flag << 11, 1268 _last_flag = Flag_clears_sign_flag 1269 }; 1270 }; 1271 1272 %} // end source_hpp 1273 1274 source %{ 1275 1276 #include "opto/addnode.hpp" 1277 #include "c2_intelJccErratum_x86.hpp" 1278 1279 void PhaseOutput::pd_perform_mach_node_analysis() { 1280 if (VM_Version::has_intel_jcc_erratum()) { 1281 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1282 _buf_sizes._code += extra_padding; 1283 } 1284 } 1285 1286 int MachNode::pd_alignment_required() const { 1287 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1288 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1289 return IntelJccErratum::largest_jcc_size() + 1; 1290 } else { 1291 return 1; 1292 } 1293 } 1294 1295 int MachNode::compute_padding(int current_offset) const { 1296 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1297 Compile* C = Compile::current(); 1298 PhaseOutput* output = C->output(); 1299 Block* block = output->block(); 1300 int index = output->index(); 1301 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1302 } else { 1303 return 0; 1304 } 1305 } 1306 1307 // Emit exception handler code. 1308 // Stuff framesize into a register and call a VM stub routine. 1309 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) { 1310 1311 // Note that the code buffer's insts_mark is always relative to insts. 1312 // That's why we must use the macroassembler to generate a handler. 1313 address base = __ start_a_stub(size_exception_handler()); 1314 if (base == nullptr) { 1315 ciEnv::current()->record_failure("CodeCache is full"); 1316 return 0; // CodeBuffer::expand failed 1317 } 1318 int offset = __ offset(); 1319 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1320 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1321 __ end_a_stub(); 1322 return offset; 1323 } 1324 1325 // Emit deopt handler code. 1326 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 address base = __ start_a_stub(size_deopt_handler()); 1331 if (base == nullptr) { 1332 ciEnv::current()->record_failure("CodeCache is full"); 1333 return 0; // CodeBuffer::expand failed 1334 } 1335 int offset = __ offset(); 1336 1337 #ifdef _LP64 1338 address the_pc = (address) __ pc(); 1339 Label next; 1340 // push a "the_pc" on the stack without destroying any registers 1341 // as they all may be live. 1342 1343 // push address of "next" 1344 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1345 __ bind(next); 1346 // adjust it so it matches "the_pc" 1347 __ subptr(Address(rsp, 0), __ offset() - offset); 1348 #else 1349 InternalAddress here(__ pc()); 1350 __ pushptr(here.addr(), noreg); 1351 #endif 1352 1353 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1354 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1355 __ end_a_stub(); 1356 return offset; 1357 } 1358 1359 static Assembler::Width widthForType(BasicType bt) { 1360 if (bt == T_BYTE) { 1361 return Assembler::B; 1362 } else if (bt == T_SHORT) { 1363 return Assembler::W; 1364 } else if (bt == T_INT) { 1365 return Assembler::D; 1366 } else { 1367 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1368 return Assembler::Q; 1369 } 1370 } 1371 1372 //============================================================================= 1373 1374 // Float masks come from different places depending on platform. 1375 #ifdef _LP64 1376 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1377 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1378 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1379 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1380 #else 1381 static address float_signmask() { return (address)float_signmask_pool; } 1382 static address float_signflip() { return (address)float_signflip_pool; } 1383 static address double_signmask() { return (address)double_signmask_pool; } 1384 static address double_signflip() { return (address)double_signflip_pool; } 1385 #endif 1386 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1387 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1388 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1389 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1390 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1391 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1392 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1393 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1394 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1395 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1396 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1397 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1398 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1399 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1400 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1401 1402 //============================================================================= 1403 bool Matcher::match_rule_supported(int opcode) { 1404 if (!has_match_rule(opcode)) { 1405 return false; // no match rule present 1406 } 1407 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1408 switch (opcode) { 1409 case Op_AbsVL: 1410 case Op_StoreVectorScatter: 1411 if (UseAVX < 3) { 1412 return false; 1413 } 1414 break; 1415 case Op_PopCountI: 1416 case Op_PopCountL: 1417 if (!UsePopCountInstruction) { 1418 return false; 1419 } 1420 break; 1421 case Op_PopCountVI: 1422 if (UseAVX < 2) { 1423 return false; 1424 } 1425 break; 1426 case Op_CompressV: 1427 case Op_ExpandV: 1428 case Op_PopCountVL: 1429 if (UseAVX < 2) { 1430 return false; 1431 } 1432 break; 1433 case Op_MulVI: 1434 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1435 return false; 1436 } 1437 break; 1438 case Op_MulVL: 1439 if (UseSSE < 4) { // only with SSE4_1 or AVX 1440 return false; 1441 } 1442 break; 1443 case Op_MulReductionVL: 1444 if (VM_Version::supports_avx512dq() == false) { 1445 return false; 1446 } 1447 break; 1448 case Op_AddReductionVL: 1449 if (UseSSE < 2) { // requires at least SSE2 1450 return false; 1451 } 1452 break; 1453 case Op_AbsVB: 1454 case Op_AbsVS: 1455 case Op_AbsVI: 1456 case Op_AddReductionVI: 1457 case Op_AndReductionV: 1458 case Op_OrReductionV: 1459 case Op_XorReductionV: 1460 if (UseSSE < 3) { // requires at least SSSE3 1461 return false; 1462 } 1463 break; 1464 case Op_VectorLoadShuffle: 1465 case Op_VectorRearrange: 1466 case Op_MulReductionVI: 1467 if (UseSSE < 4) { // requires at least SSE4 1468 return false; 1469 } 1470 break; 1471 case Op_IsInfiniteF: 1472 case Op_IsInfiniteD: 1473 if (!VM_Version::supports_avx512dq()) { 1474 return false; 1475 } 1476 break; 1477 case Op_SqrtVD: 1478 case Op_SqrtVF: 1479 case Op_VectorMaskCmp: 1480 case Op_VectorCastB2X: 1481 case Op_VectorCastS2X: 1482 case Op_VectorCastI2X: 1483 case Op_VectorCastL2X: 1484 case Op_VectorCastF2X: 1485 case Op_VectorCastD2X: 1486 case Op_VectorUCastB2X: 1487 case Op_VectorUCastS2X: 1488 case Op_VectorUCastI2X: 1489 case Op_VectorMaskCast: 1490 if (UseAVX < 1) { // enabled for AVX only 1491 return false; 1492 } 1493 break; 1494 case Op_PopulateIndex: 1495 if (!is_LP64 || (UseAVX < 2)) { 1496 return false; 1497 } 1498 break; 1499 case Op_RoundVF: 1500 if (UseAVX < 2) { // enabled for AVX2 only 1501 return false; 1502 } 1503 break; 1504 case Op_RoundVD: 1505 if (UseAVX < 3) { 1506 return false; // enabled for AVX3 only 1507 } 1508 break; 1509 case Op_CompareAndSwapL: 1510 #ifdef _LP64 1511 case Op_CompareAndSwapP: 1512 #endif 1513 break; 1514 case Op_StrIndexOf: 1515 if (!UseSSE42Intrinsics) { 1516 return false; 1517 } 1518 break; 1519 case Op_StrIndexOfChar: 1520 if (!UseSSE42Intrinsics) { 1521 return false; 1522 } 1523 break; 1524 case Op_OnSpinWait: 1525 if (VM_Version::supports_on_spin_wait() == false) { 1526 return false; 1527 } 1528 break; 1529 case Op_MulVB: 1530 case Op_LShiftVB: 1531 case Op_RShiftVB: 1532 case Op_URShiftVB: 1533 case Op_VectorInsert: 1534 case Op_VectorLoadMask: 1535 case Op_VectorStoreMask: 1536 case Op_VectorBlend: 1537 if (UseSSE < 4) { 1538 return false; 1539 } 1540 break; 1541 #ifdef _LP64 1542 case Op_MaxD: 1543 case Op_MaxF: 1544 case Op_MinD: 1545 case Op_MinF: 1546 if (UseAVX < 1) { // enabled for AVX only 1547 return false; 1548 } 1549 break; 1550 #endif 1551 case Op_CacheWB: 1552 case Op_CacheWBPreSync: 1553 case Op_CacheWBPostSync: 1554 if (!VM_Version::supports_data_cache_line_flush()) { 1555 return false; 1556 } 1557 break; 1558 case Op_ExtractB: 1559 case Op_ExtractL: 1560 case Op_ExtractI: 1561 case Op_RoundDoubleMode: 1562 if (UseSSE < 4) { 1563 return false; 1564 } 1565 break; 1566 case Op_RoundDoubleModeV: 1567 if (VM_Version::supports_avx() == false) { 1568 return false; // 128bit vroundpd is not available 1569 } 1570 break; 1571 case Op_LoadVectorGather: 1572 case Op_LoadVectorGatherMasked: 1573 if (UseAVX < 2) { 1574 return false; 1575 } 1576 break; 1577 case Op_FmaF: 1578 case Op_FmaD: 1579 case Op_FmaVD: 1580 case Op_FmaVF: 1581 if (!UseFMA) { 1582 return false; 1583 } 1584 break; 1585 case Op_MacroLogicV: 1586 if (UseAVX < 3 || !UseVectorMacroLogic) { 1587 return false; 1588 } 1589 break; 1590 1591 case Op_VectorCmpMasked: 1592 case Op_VectorMaskGen: 1593 if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { 1594 return false; 1595 } 1596 break; 1597 case Op_VectorMaskFirstTrue: 1598 case Op_VectorMaskLastTrue: 1599 case Op_VectorMaskTrueCount: 1600 case Op_VectorMaskToLong: 1601 if (!is_LP64 || UseAVX < 1) { 1602 return false; 1603 } 1604 break; 1605 case Op_RoundF: 1606 case Op_RoundD: 1607 if (!is_LP64) { 1608 return false; 1609 } 1610 break; 1611 case Op_CopySignD: 1612 case Op_CopySignF: 1613 if (UseAVX < 3 || !is_LP64) { 1614 return false; 1615 } 1616 if (!VM_Version::supports_avx512vl()) { 1617 return false; 1618 } 1619 break; 1620 #ifndef _LP64 1621 case Op_AddReductionVF: 1622 case Op_AddReductionVD: 1623 case Op_MulReductionVF: 1624 case Op_MulReductionVD: 1625 if (UseSSE < 1) { // requires at least SSE 1626 return false; 1627 } 1628 break; 1629 case Op_MulAddVS2VI: 1630 case Op_RShiftVL: 1631 case Op_AbsVD: 1632 case Op_NegVD: 1633 if (UseSSE < 2) { 1634 return false; 1635 } 1636 break; 1637 #endif // !LP64 1638 case Op_CompressBits: 1639 if (!VM_Version::supports_bmi2() || (!is_LP64 && UseSSE < 2)) { 1640 return false; 1641 } 1642 break; 1643 case Op_ExpandBits: 1644 if (!VM_Version::supports_bmi2() || (!is_LP64 && (UseSSE < 2 || !VM_Version::supports_bmi1()))) { 1645 return false; 1646 } 1647 break; 1648 case Op_SignumF: 1649 if (UseSSE < 1) { 1650 return false; 1651 } 1652 break; 1653 case Op_SignumD: 1654 if (UseSSE < 2) { 1655 return false; 1656 } 1657 break; 1658 case Op_CompressM: 1659 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1660 return false; 1661 } 1662 break; 1663 case Op_SqrtF: 1664 if (UseSSE < 1) { 1665 return false; 1666 } 1667 break; 1668 case Op_SqrtD: 1669 #ifdef _LP64 1670 if (UseSSE < 2) { 1671 return false; 1672 } 1673 #else 1674 // x86_32.ad has a special match rule for SqrtD. 1675 // Together with common x86 rules, this handles all UseSSE cases. 1676 #endif 1677 break; 1678 case Op_ConvF2HF: 1679 case Op_ConvHF2F: 1680 if (!VM_Version::supports_float16()) { 1681 return false; 1682 } 1683 break; 1684 case Op_VectorCastF2HF: 1685 case Op_VectorCastHF2F: 1686 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1687 return false; 1688 } 1689 break; 1690 } 1691 return true; // Match rules are supported by default. 1692 } 1693 1694 //------------------------------------------------------------------------ 1695 1696 static inline bool is_pop_count_instr_target(BasicType bt) { 1697 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1698 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1699 } 1700 1701 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) { 1702 return match_rule_supported_vector(opcode, vlen, bt); 1703 } 1704 1705 // Identify extra cases that we might want to provide match rules for vector nodes and 1706 // other intrinsics guarded with vector length (vlen) and element type (bt). 1707 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1708 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1709 if (!match_rule_supported(opcode)) { 1710 return false; 1711 } 1712 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1713 // * SSE2 supports 128bit vectors for all types; 1714 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1715 // * AVX2 supports 256bit vectors for all types; 1716 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1717 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1718 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1719 // And MaxVectorSize is taken into account as well. 1720 if (!vector_size_supported(bt, vlen)) { 1721 return false; 1722 } 1723 // Special cases which require vector length follow: 1724 // * implementation limitations 1725 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1726 // * 128bit vroundpd instruction is present only in AVX1 1727 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1728 switch (opcode) { 1729 case Op_AbsVF: 1730 case Op_NegVF: 1731 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1732 return false; // 512bit vandps and vxorps are not available 1733 } 1734 break; 1735 case Op_AbsVD: 1736 case Op_NegVD: 1737 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1738 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1739 } 1740 break; 1741 case Op_RotateRightV: 1742 case Op_RotateLeftV: 1743 if (bt != T_INT && bt != T_LONG) { 1744 return false; 1745 } // fallthrough 1746 case Op_MacroLogicV: 1747 if (!VM_Version::supports_evex() || 1748 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1749 return false; 1750 } 1751 break; 1752 case Op_ClearArray: 1753 case Op_VectorMaskGen: 1754 case Op_VectorCmpMasked: 1755 if (!is_LP64 || !VM_Version::supports_avx512bw()) { 1756 return false; 1757 } 1758 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1759 return false; 1760 } 1761 break; 1762 case Op_LoadVectorMasked: 1763 case Op_StoreVectorMasked: 1764 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1765 return false; 1766 } 1767 break; 1768 case Op_UMinV: 1769 case Op_UMaxV: 1770 if (UseAVX == 0) { 1771 return false; 1772 } 1773 break; 1774 case Op_MaxV: 1775 case Op_MinV: 1776 if (UseSSE < 4 && is_integral_type(bt)) { 1777 return false; 1778 } 1779 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1780 // Float/Double intrinsics are enabled for AVX family currently. 1781 if (UseAVX == 0) { 1782 return false; 1783 } 1784 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1785 return false; 1786 } 1787 } 1788 break; 1789 case Op_CallLeafVector: 1790 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1791 return false; 1792 } 1793 break; 1794 case Op_AddReductionVI: 1795 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1796 return false; 1797 } 1798 // fallthrough 1799 case Op_AndReductionV: 1800 case Op_OrReductionV: 1801 case Op_XorReductionV: 1802 if (is_subword_type(bt) && (UseSSE < 4)) { 1803 return false; 1804 } 1805 #ifndef _LP64 1806 if (bt == T_BYTE || bt == T_LONG) { 1807 return false; 1808 } 1809 #endif 1810 break; 1811 #ifndef _LP64 1812 case Op_VectorInsert: 1813 if (bt == T_LONG || bt == T_DOUBLE) { 1814 return false; 1815 } 1816 break; 1817 #endif 1818 case Op_MinReductionV: 1819 case Op_MaxReductionV: 1820 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1821 return false; 1822 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1823 return false; 1824 } 1825 // Float/Double intrinsics enabled for AVX family. 1826 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1827 return false; 1828 } 1829 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1830 return false; 1831 } 1832 #ifndef _LP64 1833 if (bt == T_BYTE || bt == T_LONG) { 1834 return false; 1835 } 1836 #endif 1837 break; 1838 case Op_VectorTest: 1839 if (UseSSE < 4) { 1840 return false; // Implementation limitation 1841 } else if (size_in_bits < 32) { 1842 return false; // Implementation limitation 1843 } 1844 break; 1845 case Op_VectorLoadShuffle: 1846 case Op_VectorRearrange: 1847 if(vlen == 2) { 1848 return false; // Implementation limitation due to how shuffle is loaded 1849 } else if (size_in_bits == 256 && UseAVX < 2) { 1850 return false; // Implementation limitation 1851 } 1852 break; 1853 case Op_VectorLoadMask: 1854 case Op_VectorMaskCast: 1855 if (size_in_bits == 256 && UseAVX < 2) { 1856 return false; // Implementation limitation 1857 } 1858 // fallthrough 1859 case Op_VectorStoreMask: 1860 if (vlen == 2) { 1861 return false; // Implementation limitation 1862 } 1863 break; 1864 case Op_PopulateIndex: 1865 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1866 return false; 1867 } 1868 break; 1869 case Op_VectorCastB2X: 1870 case Op_VectorCastS2X: 1871 case Op_VectorCastI2X: 1872 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1873 return false; 1874 } 1875 break; 1876 case Op_VectorCastL2X: 1877 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1878 return false; 1879 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1880 return false; 1881 } 1882 break; 1883 case Op_VectorCastF2X: { 1884 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1885 // happen after intermediate conversion to integer and special handling 1886 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1887 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1888 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1889 return false; 1890 } 1891 } 1892 // fallthrough 1893 case Op_VectorCastD2X: 1894 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1895 return false; 1896 } 1897 break; 1898 case Op_VectorCastF2HF: 1899 case Op_VectorCastHF2F: 1900 if (!VM_Version::supports_f16c() && 1901 ((!VM_Version::supports_evex() || 1902 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1903 return false; 1904 } 1905 break; 1906 case Op_RoundVD: 1907 if (!VM_Version::supports_avx512dq()) { 1908 return false; 1909 } 1910 break; 1911 case Op_MulReductionVI: 1912 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1913 return false; 1914 } 1915 break; 1916 case Op_LoadVectorGatherMasked: 1917 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1918 return false; 1919 } 1920 if (is_subword_type(bt) && 1921 (!is_LP64 || 1922 (size_in_bits > 256 && !VM_Version::supports_avx512bw()) || 1923 (size_in_bits < 64) || 1924 (bt == T_SHORT && !VM_Version::supports_bmi2()))) { 1925 return false; 1926 } 1927 break; 1928 case Op_StoreVectorScatterMasked: 1929 case Op_StoreVectorScatter: 1930 if (is_subword_type(bt)) { 1931 return false; 1932 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1933 return false; 1934 } 1935 // fallthrough 1936 case Op_LoadVectorGather: 1937 if (!is_subword_type(bt) && size_in_bits == 64) { 1938 return false; 1939 } 1940 if (is_subword_type(bt) && size_in_bits < 64) { 1941 return false; 1942 } 1943 break; 1944 case Op_SaturatingAddV: 1945 case Op_SaturatingSubV: 1946 if (UseAVX < 1) { 1947 return false; // Implementation limitation 1948 } 1949 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1950 return false; 1951 } 1952 break; 1953 case Op_SelectFromTwoVector: 1954 if (size_in_bits < 128 || (size_in_bits < 512 && !VM_Version::supports_avx512vl())) { 1955 return false; 1956 } 1957 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 1958 return false; 1959 } 1960 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 1961 return false; 1962 } 1963 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) { 1964 return false; 1965 } 1966 break; 1967 case Op_MaskAll: 1968 if (!VM_Version::supports_evex()) { 1969 return false; 1970 } 1971 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1972 return false; 1973 } 1974 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1975 return false; 1976 } 1977 break; 1978 case Op_VectorMaskCmp: 1979 if (vlen < 2 || size_in_bits < 32) { 1980 return false; 1981 } 1982 break; 1983 case Op_CompressM: 1984 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1985 return false; 1986 } 1987 break; 1988 case Op_CompressV: 1989 case Op_ExpandV: 1990 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1991 return false; 1992 } 1993 if (!is_LP64 && !VM_Version::supports_avx512vl() && size_in_bits < 512) { 1994 return false; 1995 } 1996 if (size_in_bits < 128 ) { 1997 return false; 1998 } 1999 case Op_VectorLongToMask: 2000 if (UseAVX < 1 || !is_LP64) { 2001 return false; 2002 } 2003 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 2004 return false; 2005 } 2006 break; 2007 case Op_SignumVD: 2008 case Op_SignumVF: 2009 if (UseAVX < 1) { 2010 return false; 2011 } 2012 break; 2013 case Op_PopCountVI: 2014 case Op_PopCountVL: { 2015 if (!is_pop_count_instr_target(bt) && 2016 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 2017 return false; 2018 } 2019 } 2020 break; 2021 case Op_ReverseV: 2022 case Op_ReverseBytesV: 2023 if (UseAVX < 2) { 2024 return false; 2025 } 2026 break; 2027 case Op_CountTrailingZerosV: 2028 case Op_CountLeadingZerosV: 2029 if (UseAVX < 2) { 2030 return false; 2031 } 2032 break; 2033 } 2034 return true; // Per default match rules are supported. 2035 } 2036 2037 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 2038 // ADLC based match_rule_supported routine checks for the existence of pattern based 2039 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 2040 // of their non-masked counterpart with mask edge being the differentiator. 2041 // This routine does a strict check on the existence of masked operation patterns 2042 // by returning a default false value for all the other opcodes apart from the 2043 // ones whose masked instruction patterns are defined in this file. 2044 if (!match_rule_supported_vector(opcode, vlen, bt)) { 2045 return false; 2046 } 2047 2048 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 2049 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 2050 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 2051 return false; 2052 } 2053 switch(opcode) { 2054 // Unary masked operations 2055 case Op_AbsVB: 2056 case Op_AbsVS: 2057 if(!VM_Version::supports_avx512bw()) { 2058 return false; // Implementation limitation 2059 } 2060 case Op_AbsVI: 2061 case Op_AbsVL: 2062 return true; 2063 2064 // Ternary masked operations 2065 case Op_FmaVF: 2066 case Op_FmaVD: 2067 return true; 2068 2069 case Op_MacroLogicV: 2070 if(bt != T_INT && bt != T_LONG) { 2071 return false; 2072 } 2073 return true; 2074 2075 // Binary masked operations 2076 case Op_AddVB: 2077 case Op_AddVS: 2078 case Op_SubVB: 2079 case Op_SubVS: 2080 case Op_MulVS: 2081 case Op_LShiftVS: 2082 case Op_RShiftVS: 2083 case Op_URShiftVS: 2084 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2085 if (!VM_Version::supports_avx512bw()) { 2086 return false; // Implementation limitation 2087 } 2088 return true; 2089 2090 case Op_MulVL: 2091 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2092 if (!VM_Version::supports_avx512dq()) { 2093 return false; // Implementation limitation 2094 } 2095 return true; 2096 2097 case Op_AndV: 2098 case Op_OrV: 2099 case Op_XorV: 2100 case Op_RotateRightV: 2101 case Op_RotateLeftV: 2102 if (bt != T_INT && bt != T_LONG) { 2103 return false; // Implementation limitation 2104 } 2105 return true; 2106 2107 case Op_VectorLoadMask: 2108 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2109 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2110 return false; 2111 } 2112 return true; 2113 2114 case Op_AddVI: 2115 case Op_AddVL: 2116 case Op_AddVF: 2117 case Op_AddVD: 2118 case Op_SubVI: 2119 case Op_SubVL: 2120 case Op_SubVF: 2121 case Op_SubVD: 2122 case Op_MulVI: 2123 case Op_MulVF: 2124 case Op_MulVD: 2125 case Op_DivVF: 2126 case Op_DivVD: 2127 case Op_SqrtVF: 2128 case Op_SqrtVD: 2129 case Op_LShiftVI: 2130 case Op_LShiftVL: 2131 case Op_RShiftVI: 2132 case Op_RShiftVL: 2133 case Op_URShiftVI: 2134 case Op_URShiftVL: 2135 case Op_LoadVectorMasked: 2136 case Op_StoreVectorMasked: 2137 case Op_LoadVectorGatherMasked: 2138 case Op_StoreVectorScatterMasked: 2139 return true; 2140 2141 case Op_UMinV: 2142 case Op_UMaxV: 2143 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 2144 return false; 2145 } // fallthrough 2146 case Op_MaxV: 2147 case Op_MinV: 2148 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2149 return false; // Implementation limitation 2150 } 2151 if (is_floating_point_type(bt)) { 2152 return false; // Implementation limitation 2153 } 2154 return true; 2155 case Op_SaturatingAddV: 2156 case Op_SaturatingSubV: 2157 if (!is_subword_type(bt)) { 2158 return false; 2159 } 2160 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) { 2161 return false; // Implementation limitation 2162 } 2163 return true; 2164 2165 case Op_VectorMaskCmp: 2166 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2167 return false; // Implementation limitation 2168 } 2169 return true; 2170 2171 case Op_VectorRearrange: 2172 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2173 return false; // Implementation limitation 2174 } 2175 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2176 return false; // Implementation limitation 2177 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2178 return false; // Implementation limitation 2179 } 2180 return true; 2181 2182 // Binary Logical operations 2183 case Op_AndVMask: 2184 case Op_OrVMask: 2185 case Op_XorVMask: 2186 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2187 return false; // Implementation limitation 2188 } 2189 return true; 2190 2191 case Op_PopCountVI: 2192 case Op_PopCountVL: 2193 if (!is_pop_count_instr_target(bt)) { 2194 return false; 2195 } 2196 return true; 2197 2198 case Op_MaskAll: 2199 return true; 2200 2201 case Op_CountLeadingZerosV: 2202 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2203 return true; 2204 } 2205 default: 2206 return false; 2207 } 2208 } 2209 2210 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2211 return false; 2212 } 2213 2214 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2215 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2216 bool legacy = (generic_opnd->opcode() == LEGVEC); 2217 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2218 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2219 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2220 return new legVecZOper(); 2221 } 2222 if (legacy) { 2223 switch (ideal_reg) { 2224 case Op_VecS: return new legVecSOper(); 2225 case Op_VecD: return new legVecDOper(); 2226 case Op_VecX: return new legVecXOper(); 2227 case Op_VecY: return new legVecYOper(); 2228 case Op_VecZ: return new legVecZOper(); 2229 } 2230 } else { 2231 switch (ideal_reg) { 2232 case Op_VecS: return new vecSOper(); 2233 case Op_VecD: return new vecDOper(); 2234 case Op_VecX: return new vecXOper(); 2235 case Op_VecY: return new vecYOper(); 2236 case Op_VecZ: return new vecZOper(); 2237 } 2238 } 2239 ShouldNotReachHere(); 2240 return nullptr; 2241 } 2242 2243 bool Matcher::is_reg2reg_move(MachNode* m) { 2244 switch (m->rule()) { 2245 case MoveVec2Leg_rule: 2246 case MoveLeg2Vec_rule: 2247 case MoveF2VL_rule: 2248 case MoveF2LEG_rule: 2249 case MoveVL2F_rule: 2250 case MoveLEG2F_rule: 2251 case MoveD2VL_rule: 2252 case MoveD2LEG_rule: 2253 case MoveVL2D_rule: 2254 case MoveLEG2D_rule: 2255 return true; 2256 default: 2257 return false; 2258 } 2259 } 2260 2261 bool Matcher::is_generic_vector(MachOper* opnd) { 2262 switch (opnd->opcode()) { 2263 case VEC: 2264 case LEGVEC: 2265 return true; 2266 default: 2267 return false; 2268 } 2269 } 2270 2271 //------------------------------------------------------------------------ 2272 2273 const RegMask* Matcher::predicate_reg_mask(void) { 2274 return &_VECTMASK_REG_mask; 2275 } 2276 2277 // Max vector size in bytes. 0 if not supported. 2278 int Matcher::vector_width_in_bytes(BasicType bt) { 2279 assert(is_java_primitive(bt), "only primitive type vectors"); 2280 if (UseSSE < 2) return 0; 2281 // SSE2 supports 128bit vectors for all types. 2282 // AVX2 supports 256bit vectors for all types. 2283 // AVX2/EVEX supports 512bit vectors for all types. 2284 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2285 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2286 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2287 size = (UseAVX > 2) ? 64 : 32; 2288 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2289 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2290 // Use flag to limit vector size. 2291 size = MIN2(size,(int)MaxVectorSize); 2292 // Minimum 2 values in vector (or 4 for bytes). 2293 switch (bt) { 2294 case T_DOUBLE: 2295 case T_LONG: 2296 if (size < 16) return 0; 2297 break; 2298 case T_FLOAT: 2299 case T_INT: 2300 if (size < 8) return 0; 2301 break; 2302 case T_BOOLEAN: 2303 if (size < 4) return 0; 2304 break; 2305 case T_CHAR: 2306 if (size < 4) return 0; 2307 break; 2308 case T_BYTE: 2309 if (size < 4) return 0; 2310 break; 2311 case T_SHORT: 2312 if (size < 4) return 0; 2313 break; 2314 default: 2315 ShouldNotReachHere(); 2316 } 2317 return size; 2318 } 2319 2320 // Limits on vector size (number of elements) loaded into vector. 2321 int Matcher::max_vector_size(const BasicType bt) { 2322 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2323 } 2324 int Matcher::min_vector_size(const BasicType bt) { 2325 int max_size = max_vector_size(bt); 2326 // Min size which can be loaded into vector is 4 bytes. 2327 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2328 // Support for calling svml double64 vectors 2329 if (bt == T_DOUBLE) { 2330 size = 1; 2331 } 2332 return MIN2(size,max_size); 2333 } 2334 2335 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) { 2336 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2337 // by default on Cascade Lake 2338 if (VM_Version::is_default_intel_cascade_lake()) { 2339 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2340 } 2341 return Matcher::max_vector_size(bt); 2342 } 2343 2344 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2345 return -1; 2346 } 2347 2348 // Vector ideal reg corresponding to specified size in bytes 2349 uint Matcher::vector_ideal_reg(int size) { 2350 assert(MaxVectorSize >= size, ""); 2351 switch(size) { 2352 case 4: return Op_VecS; 2353 case 8: return Op_VecD; 2354 case 16: return Op_VecX; 2355 case 32: return Op_VecY; 2356 case 64: return Op_VecZ; 2357 } 2358 ShouldNotReachHere(); 2359 return 0; 2360 } 2361 2362 // Check for shift by small constant as well 2363 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2364 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2365 shift->in(2)->get_int() <= 3 && 2366 // Are there other uses besides address expressions? 2367 !matcher->is_visited(shift)) { 2368 address_visited.set(shift->_idx); // Flag as address_visited 2369 mstack.push(shift->in(2), Matcher::Visit); 2370 Node *conv = shift->in(1); 2371 #ifdef _LP64 2372 // Allow Matcher to match the rule which bypass 2373 // ConvI2L operation for an array index on LP64 2374 // if the index value is positive. 2375 if (conv->Opcode() == Op_ConvI2L && 2376 conv->as_Type()->type()->is_long()->_lo >= 0 && 2377 // Are there other uses besides address expressions? 2378 !matcher->is_visited(conv)) { 2379 address_visited.set(conv->_idx); // Flag as address_visited 2380 mstack.push(conv->in(1), Matcher::Pre_Visit); 2381 } else 2382 #endif 2383 mstack.push(conv, Matcher::Pre_Visit); 2384 return true; 2385 } 2386 return false; 2387 } 2388 2389 // This function identifies sub-graphs in which a 'load' node is 2390 // input to two different nodes, and such that it can be matched 2391 // with BMI instructions like blsi, blsr, etc. 2392 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2393 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2394 // refers to the same node. 2395 // 2396 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2397 // This is a temporary solution until we make DAGs expressible in ADL. 2398 template<typename ConType> 2399 class FusedPatternMatcher { 2400 Node* _op1_node; 2401 Node* _mop_node; 2402 int _con_op; 2403 2404 static int match_next(Node* n, int next_op, int next_op_idx) { 2405 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2406 return -1; 2407 } 2408 2409 if (next_op_idx == -1) { // n is commutative, try rotations 2410 if (n->in(1)->Opcode() == next_op) { 2411 return 1; 2412 } else if (n->in(2)->Opcode() == next_op) { 2413 return 2; 2414 } 2415 } else { 2416 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2417 if (n->in(next_op_idx)->Opcode() == next_op) { 2418 return next_op_idx; 2419 } 2420 } 2421 return -1; 2422 } 2423 2424 public: 2425 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2426 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2427 2428 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2429 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2430 typename ConType::NativeType con_value) { 2431 if (_op1_node->Opcode() != op1) { 2432 return false; 2433 } 2434 if (_mop_node->outcnt() > 2) { 2435 return false; 2436 } 2437 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2438 if (op1_op2_idx == -1) { 2439 return false; 2440 } 2441 // Memory operation must be the other edge 2442 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2443 2444 // Check that the mop node is really what we want 2445 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2446 Node* op2_node = _op1_node->in(op1_op2_idx); 2447 if (op2_node->outcnt() > 1) { 2448 return false; 2449 } 2450 assert(op2_node->Opcode() == op2, "Should be"); 2451 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2452 if (op2_con_idx == -1) { 2453 return false; 2454 } 2455 // Memory operation must be the other edge 2456 int op2_mop_idx = (op2_con_idx & 1) + 1; 2457 // Check that the memory operation is the same node 2458 if (op2_node->in(op2_mop_idx) == _mop_node) { 2459 // Now check the constant 2460 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2461 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2462 return true; 2463 } 2464 } 2465 } 2466 return false; 2467 } 2468 }; 2469 2470 static bool is_bmi_pattern(Node* n, Node* m) { 2471 assert(UseBMI1Instructions, "sanity"); 2472 if (n != nullptr && m != nullptr) { 2473 if (m->Opcode() == Op_LoadI) { 2474 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2475 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2476 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2477 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2478 } else if (m->Opcode() == Op_LoadL) { 2479 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2480 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2481 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2482 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2483 } 2484 } 2485 return false; 2486 } 2487 2488 // Should the matcher clone input 'm' of node 'n'? 2489 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2490 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2491 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2492 mstack.push(m, Visit); 2493 return true; 2494 } 2495 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2496 mstack.push(m, Visit); // m = ShiftCntV 2497 return true; 2498 } 2499 if (is_encode_and_store_pattern(n, m)) { 2500 mstack.push(m, Visit); 2501 return true; 2502 } 2503 return false; 2504 } 2505 2506 // Should the Matcher clone shifts on addressing modes, expecting them 2507 // to be subsumed into complex addressing expressions or compute them 2508 // into registers? 2509 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2510 Node *off = m->in(AddPNode::Offset); 2511 if (off->is_Con()) { 2512 address_visited.test_set(m->_idx); // Flag as address_visited 2513 Node *adr = m->in(AddPNode::Address); 2514 2515 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset. 2516 // AtomicAdd is not an addressing expression. 2517 // Cheap to find it by looking for screwy base. 2518 if (adr->is_AddP() && 2519 !adr->in(AddPNode::Base)->is_top() && 2520 !adr->in(AddPNode::Offset)->is_Con() && 2521 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2522 // Are there other uses besides address expressions? 2523 !is_visited(adr)) { 2524 address_visited.set(adr->_idx); // Flag as address_visited 2525 Node *shift = adr->in(AddPNode::Offset); 2526 if (!clone_shift(shift, this, mstack, address_visited)) { 2527 mstack.push(shift, Pre_Visit); 2528 } 2529 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2530 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2531 } else { 2532 mstack.push(adr, Pre_Visit); 2533 } 2534 2535 // Clone X+offset as it also folds into most addressing expressions 2536 mstack.push(off, Visit); 2537 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2538 return true; 2539 } else if (clone_shift(off, this, mstack, address_visited)) { 2540 address_visited.test_set(m->_idx); // Flag as address_visited 2541 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2542 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2543 return true; 2544 } 2545 return false; 2546 } 2547 2548 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2549 switch (bt) { 2550 case BoolTest::eq: 2551 return Assembler::eq; 2552 case BoolTest::ne: 2553 return Assembler::neq; 2554 case BoolTest::le: 2555 case BoolTest::ule: 2556 return Assembler::le; 2557 case BoolTest::ge: 2558 case BoolTest::uge: 2559 return Assembler::nlt; 2560 case BoolTest::lt: 2561 case BoolTest::ult: 2562 return Assembler::lt; 2563 case BoolTest::gt: 2564 case BoolTest::ugt: 2565 return Assembler::nle; 2566 default : ShouldNotReachHere(); return Assembler::_false; 2567 } 2568 } 2569 2570 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2571 switch (bt) { 2572 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2573 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2574 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2575 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2576 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2577 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2578 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2579 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2580 } 2581 } 2582 2583 // Helper methods for MachSpillCopyNode::implementation(). 2584 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 2585 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2586 assert(ireg == Op_VecS || // 32bit vector 2587 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2588 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi), 2589 "no non-adjacent vector moves" ); 2590 if (masm) { 2591 switch (ireg) { 2592 case Op_VecS: // copy whole register 2593 case Op_VecD: 2594 case Op_VecX: 2595 #ifndef _LP64 2596 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2597 #else 2598 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2599 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2600 } else { 2601 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2602 } 2603 #endif 2604 break; 2605 case Op_VecY: 2606 #ifndef _LP64 2607 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2608 #else 2609 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2610 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2611 } else { 2612 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2613 } 2614 #endif 2615 break; 2616 case Op_VecZ: 2617 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2618 break; 2619 default: 2620 ShouldNotReachHere(); 2621 } 2622 #ifndef PRODUCT 2623 } else { 2624 switch (ireg) { 2625 case Op_VecS: 2626 case Op_VecD: 2627 case Op_VecX: 2628 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2629 break; 2630 case Op_VecY: 2631 case Op_VecZ: 2632 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2633 break; 2634 default: 2635 ShouldNotReachHere(); 2636 } 2637 #endif 2638 } 2639 } 2640 2641 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 2642 int stack_offset, int reg, uint ireg, outputStream* st) { 2643 if (masm) { 2644 if (is_load) { 2645 switch (ireg) { 2646 case Op_VecS: 2647 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2648 break; 2649 case Op_VecD: 2650 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2651 break; 2652 case Op_VecX: 2653 #ifndef _LP64 2654 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2655 #else 2656 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2657 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2658 } else { 2659 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2660 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2661 } 2662 #endif 2663 break; 2664 case Op_VecY: 2665 #ifndef _LP64 2666 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2667 #else 2668 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2669 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2670 } else { 2671 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2672 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2673 } 2674 #endif 2675 break; 2676 case Op_VecZ: 2677 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2678 break; 2679 default: 2680 ShouldNotReachHere(); 2681 } 2682 } else { // store 2683 switch (ireg) { 2684 case Op_VecS: 2685 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2686 break; 2687 case Op_VecD: 2688 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2689 break; 2690 case Op_VecX: 2691 #ifndef _LP64 2692 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2693 #else 2694 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2695 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2696 } 2697 else { 2698 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2699 } 2700 #endif 2701 break; 2702 case Op_VecY: 2703 #ifndef _LP64 2704 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2705 #else 2706 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2707 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2708 } 2709 else { 2710 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2711 } 2712 #endif 2713 break; 2714 case Op_VecZ: 2715 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2716 break; 2717 default: 2718 ShouldNotReachHere(); 2719 } 2720 } 2721 #ifndef PRODUCT 2722 } else { 2723 if (is_load) { 2724 switch (ireg) { 2725 case Op_VecS: 2726 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2727 break; 2728 case Op_VecD: 2729 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2730 break; 2731 case Op_VecX: 2732 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2733 break; 2734 case Op_VecY: 2735 case Op_VecZ: 2736 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2737 break; 2738 default: 2739 ShouldNotReachHere(); 2740 } 2741 } else { // store 2742 switch (ireg) { 2743 case Op_VecS: 2744 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2745 break; 2746 case Op_VecD: 2747 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2748 break; 2749 case Op_VecX: 2750 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2751 break; 2752 case Op_VecY: 2753 case Op_VecZ: 2754 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2755 break; 2756 default: 2757 ShouldNotReachHere(); 2758 } 2759 } 2760 #endif 2761 } 2762 } 2763 2764 template <class T> 2765 static inline GrowableArray<jvalue>* vreplicate_imm(BasicType bt, T con, int len) { 2766 GrowableArray<jvalue>* val = new GrowableArray<jvalue>(len); 2767 jvalue ele; 2768 switch (bt) { 2769 case T_BYTE: ele.b = con; break; 2770 case T_SHORT: ele.s = con; break; 2771 case T_INT: ele.i = con; break; 2772 case T_LONG: ele.j = con; break; 2773 case T_FLOAT: ele.f = con; break; 2774 case T_DOUBLE: ele.d = con; break; 2775 default: ShouldNotReachHere(); 2776 } 2777 for (int i = 0; i < len; i++) { 2778 val->append(ele); 2779 } 2780 return val; 2781 } 2782 2783 static inline jlong high_bit_set(BasicType bt) { 2784 switch (bt) { 2785 case T_BYTE: return 0x8080808080808080; 2786 case T_SHORT: return 0x8000800080008000; 2787 case T_INT: return 0x8000000080000000; 2788 case T_LONG: return 0x8000000000000000; 2789 default: 2790 ShouldNotReachHere(); 2791 return 0; 2792 } 2793 } 2794 2795 #ifndef PRODUCT 2796 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2797 st->print("nop \t# %d bytes pad for loops and calls", _count); 2798 } 2799 #endif 2800 2801 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const { 2802 __ nop(_count); 2803 } 2804 2805 uint MachNopNode::size(PhaseRegAlloc*) const { 2806 return _count; 2807 } 2808 2809 #ifndef PRODUCT 2810 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2811 st->print("# breakpoint"); 2812 } 2813 #endif 2814 2815 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const { 2816 __ int3(); 2817 } 2818 2819 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2820 return MachNode::size(ra_); 2821 } 2822 2823 %} 2824 2825 encode %{ 2826 2827 enc_class call_epilog %{ 2828 if (VerifyStackAtCalls) { 2829 // Check that stack depth is unchanged: find majik cookie on stack 2830 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2831 Label L; 2832 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2833 __ jccb(Assembler::equal, L); 2834 // Die if stack mismatch 2835 __ int3(); 2836 __ bind(L); 2837 } 2838 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) { 2839 // The last return value is not set by the callee but used to pass IsInit information to compiled code. 2840 // Search for the corresponding projection, get the register and emit code that initialized it. 2841 uint con = (tf()->range_cc()->cnt() - 1); 2842 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { 2843 ProjNode* proj = fast_out(i)->as_Proj(); 2844 if (proj->_con == con) { 2845 // Set IsInit if rax is non-null (a non-null value is returned buffered or scalarized) 2846 OptoReg::Name optoReg = ra_->get_reg_first(proj); 2847 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP)); 2848 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1; 2849 __ testq(rax, rax); 2850 __ setb(Assembler::notZero, toReg); 2851 __ movzbl(toReg, toReg); 2852 if (reg->is_stack()) { 2853 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size; 2854 __ movq(Address(rsp, st_off), toReg); 2855 } 2856 break; 2857 } 2858 } 2859 if (return_value_is_used()) { 2860 // An inline type is returned as fields in multiple registers. 2861 // Rax either contains an oop if the inline type is buffered or a pointer 2862 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax 2863 // if the lowest bit is set to allow C2 to use the oop after null checking. 2864 // rax &= (rax & 1) - 1 2865 __ movptr(rscratch1, rax); 2866 __ andptr(rscratch1, 0x1); 2867 __ subptr(rscratch1, 0x1); 2868 __ andptr(rax, rscratch1); 2869 } 2870 } 2871 %} 2872 2873 %} 2874 2875 // Operands for bound floating pointer register arguments 2876 operand rxmm0() %{ 2877 constraint(ALLOC_IN_RC(xmm0_reg)); 2878 match(VecX); 2879 format%{%} 2880 interface(REG_INTER); 2881 %} 2882 2883 //----------OPERANDS----------------------------------------------------------- 2884 // Operand definitions must precede instruction definitions for correct parsing 2885 // in the ADLC because operands constitute user defined types which are used in 2886 // instruction definitions. 2887 2888 // Vectors 2889 2890 // Dummy generic vector class. Should be used for all vector operands. 2891 // Replaced with vec[SDXYZ] during post-selection pass. 2892 operand vec() %{ 2893 constraint(ALLOC_IN_RC(dynamic)); 2894 match(VecX); 2895 match(VecY); 2896 match(VecZ); 2897 match(VecS); 2898 match(VecD); 2899 2900 format %{ %} 2901 interface(REG_INTER); 2902 %} 2903 2904 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2905 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2906 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2907 // runtime code generation via reg_class_dynamic. 2908 operand legVec() %{ 2909 constraint(ALLOC_IN_RC(dynamic)); 2910 match(VecX); 2911 match(VecY); 2912 match(VecZ); 2913 match(VecS); 2914 match(VecD); 2915 2916 format %{ %} 2917 interface(REG_INTER); 2918 %} 2919 2920 // Replaces vec during post-selection cleanup. See above. 2921 operand vecS() %{ 2922 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2923 match(VecS); 2924 2925 format %{ %} 2926 interface(REG_INTER); 2927 %} 2928 2929 // Replaces legVec during post-selection cleanup. See above. 2930 operand legVecS() %{ 2931 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2932 match(VecS); 2933 2934 format %{ %} 2935 interface(REG_INTER); 2936 %} 2937 2938 // Replaces vec during post-selection cleanup. See above. 2939 operand vecD() %{ 2940 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2941 match(VecD); 2942 2943 format %{ %} 2944 interface(REG_INTER); 2945 %} 2946 2947 // Replaces legVec during post-selection cleanup. See above. 2948 operand legVecD() %{ 2949 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2950 match(VecD); 2951 2952 format %{ %} 2953 interface(REG_INTER); 2954 %} 2955 2956 // Replaces vec during post-selection cleanup. See above. 2957 operand vecX() %{ 2958 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2959 match(VecX); 2960 2961 format %{ %} 2962 interface(REG_INTER); 2963 %} 2964 2965 // Replaces legVec during post-selection cleanup. See above. 2966 operand legVecX() %{ 2967 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2968 match(VecX); 2969 2970 format %{ %} 2971 interface(REG_INTER); 2972 %} 2973 2974 // Replaces vec during post-selection cleanup. See above. 2975 operand vecY() %{ 2976 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2977 match(VecY); 2978 2979 format %{ %} 2980 interface(REG_INTER); 2981 %} 2982 2983 // Replaces legVec during post-selection cleanup. See above. 2984 operand legVecY() %{ 2985 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2986 match(VecY); 2987 2988 format %{ %} 2989 interface(REG_INTER); 2990 %} 2991 2992 // Replaces vec during post-selection cleanup. See above. 2993 operand vecZ() %{ 2994 constraint(ALLOC_IN_RC(vectorz_reg)); 2995 match(VecZ); 2996 2997 format %{ %} 2998 interface(REG_INTER); 2999 %} 3000 3001 // Replaces legVec during post-selection cleanup. See above. 3002 operand legVecZ() %{ 3003 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 3004 match(VecZ); 3005 3006 format %{ %} 3007 interface(REG_INTER); 3008 %} 3009 3010 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 3011 3012 // ============================================================================ 3013 3014 instruct ShouldNotReachHere() %{ 3015 match(Halt); 3016 format %{ "stop\t# ShouldNotReachHere" %} 3017 ins_encode %{ 3018 if (is_reachable()) { 3019 __ stop(_halt_reason); 3020 } 3021 %} 3022 ins_pipe(pipe_slow); 3023 %} 3024 3025 // ============================================================================ 3026 3027 instruct addF_reg(regF dst, regF src) %{ 3028 predicate((UseSSE>=1) && (UseAVX == 0)); 3029 match(Set dst (AddF dst src)); 3030 3031 format %{ "addss $dst, $src" %} 3032 ins_cost(150); 3033 ins_encode %{ 3034 __ addss($dst$$XMMRegister, $src$$XMMRegister); 3035 %} 3036 ins_pipe(pipe_slow); 3037 %} 3038 3039 instruct addF_mem(regF dst, memory src) %{ 3040 predicate((UseSSE>=1) && (UseAVX == 0)); 3041 match(Set dst (AddF dst (LoadF src))); 3042 3043 format %{ "addss $dst, $src" %} 3044 ins_cost(150); 3045 ins_encode %{ 3046 __ addss($dst$$XMMRegister, $src$$Address); 3047 %} 3048 ins_pipe(pipe_slow); 3049 %} 3050 3051 instruct addF_imm(regF dst, immF con) %{ 3052 predicate((UseSSE>=1) && (UseAVX == 0)); 3053 match(Set dst (AddF dst con)); 3054 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3055 ins_cost(150); 3056 ins_encode %{ 3057 __ addss($dst$$XMMRegister, $constantaddress($con)); 3058 %} 3059 ins_pipe(pipe_slow); 3060 %} 3061 3062 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 3063 predicate(UseAVX > 0); 3064 match(Set dst (AddF src1 src2)); 3065 3066 format %{ "vaddss $dst, $src1, $src2" %} 3067 ins_cost(150); 3068 ins_encode %{ 3069 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3070 %} 3071 ins_pipe(pipe_slow); 3072 %} 3073 3074 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 3075 predicate(UseAVX > 0); 3076 match(Set dst (AddF src1 (LoadF src2))); 3077 3078 format %{ "vaddss $dst, $src1, $src2" %} 3079 ins_cost(150); 3080 ins_encode %{ 3081 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3082 %} 3083 ins_pipe(pipe_slow); 3084 %} 3085 3086 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 3087 predicate(UseAVX > 0); 3088 match(Set dst (AddF src con)); 3089 3090 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3091 ins_cost(150); 3092 ins_encode %{ 3093 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3094 %} 3095 ins_pipe(pipe_slow); 3096 %} 3097 3098 instruct addD_reg(regD dst, regD src) %{ 3099 predicate((UseSSE>=2) && (UseAVX == 0)); 3100 match(Set dst (AddD dst src)); 3101 3102 format %{ "addsd $dst, $src" %} 3103 ins_cost(150); 3104 ins_encode %{ 3105 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 3106 %} 3107 ins_pipe(pipe_slow); 3108 %} 3109 3110 instruct addD_mem(regD dst, memory src) %{ 3111 predicate((UseSSE>=2) && (UseAVX == 0)); 3112 match(Set dst (AddD dst (LoadD src))); 3113 3114 format %{ "addsd $dst, $src" %} 3115 ins_cost(150); 3116 ins_encode %{ 3117 __ addsd($dst$$XMMRegister, $src$$Address); 3118 %} 3119 ins_pipe(pipe_slow); 3120 %} 3121 3122 instruct addD_imm(regD dst, immD con) %{ 3123 predicate((UseSSE>=2) && (UseAVX == 0)); 3124 match(Set dst (AddD dst con)); 3125 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3126 ins_cost(150); 3127 ins_encode %{ 3128 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3129 %} 3130 ins_pipe(pipe_slow); 3131 %} 3132 3133 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3134 predicate(UseAVX > 0); 3135 match(Set dst (AddD src1 src2)); 3136 3137 format %{ "vaddsd $dst, $src1, $src2" %} 3138 ins_cost(150); 3139 ins_encode %{ 3140 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3141 %} 3142 ins_pipe(pipe_slow); 3143 %} 3144 3145 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3146 predicate(UseAVX > 0); 3147 match(Set dst (AddD src1 (LoadD src2))); 3148 3149 format %{ "vaddsd $dst, $src1, $src2" %} 3150 ins_cost(150); 3151 ins_encode %{ 3152 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3153 %} 3154 ins_pipe(pipe_slow); 3155 %} 3156 3157 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3158 predicate(UseAVX > 0); 3159 match(Set dst (AddD src con)); 3160 3161 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3162 ins_cost(150); 3163 ins_encode %{ 3164 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3165 %} 3166 ins_pipe(pipe_slow); 3167 %} 3168 3169 instruct subF_reg(regF dst, regF src) %{ 3170 predicate((UseSSE>=1) && (UseAVX == 0)); 3171 match(Set dst (SubF dst src)); 3172 3173 format %{ "subss $dst, $src" %} 3174 ins_cost(150); 3175 ins_encode %{ 3176 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3177 %} 3178 ins_pipe(pipe_slow); 3179 %} 3180 3181 instruct subF_mem(regF dst, memory src) %{ 3182 predicate((UseSSE>=1) && (UseAVX == 0)); 3183 match(Set dst (SubF dst (LoadF src))); 3184 3185 format %{ "subss $dst, $src" %} 3186 ins_cost(150); 3187 ins_encode %{ 3188 __ subss($dst$$XMMRegister, $src$$Address); 3189 %} 3190 ins_pipe(pipe_slow); 3191 %} 3192 3193 instruct subF_imm(regF dst, immF con) %{ 3194 predicate((UseSSE>=1) && (UseAVX == 0)); 3195 match(Set dst (SubF dst con)); 3196 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3197 ins_cost(150); 3198 ins_encode %{ 3199 __ subss($dst$$XMMRegister, $constantaddress($con)); 3200 %} 3201 ins_pipe(pipe_slow); 3202 %} 3203 3204 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3205 predicate(UseAVX > 0); 3206 match(Set dst (SubF src1 src2)); 3207 3208 format %{ "vsubss $dst, $src1, $src2" %} 3209 ins_cost(150); 3210 ins_encode %{ 3211 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3212 %} 3213 ins_pipe(pipe_slow); 3214 %} 3215 3216 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3217 predicate(UseAVX > 0); 3218 match(Set dst (SubF src1 (LoadF src2))); 3219 3220 format %{ "vsubss $dst, $src1, $src2" %} 3221 ins_cost(150); 3222 ins_encode %{ 3223 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3224 %} 3225 ins_pipe(pipe_slow); 3226 %} 3227 3228 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3229 predicate(UseAVX > 0); 3230 match(Set dst (SubF src con)); 3231 3232 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3233 ins_cost(150); 3234 ins_encode %{ 3235 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3236 %} 3237 ins_pipe(pipe_slow); 3238 %} 3239 3240 instruct subD_reg(regD dst, regD src) %{ 3241 predicate((UseSSE>=2) && (UseAVX == 0)); 3242 match(Set dst (SubD dst src)); 3243 3244 format %{ "subsd $dst, $src" %} 3245 ins_cost(150); 3246 ins_encode %{ 3247 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3248 %} 3249 ins_pipe(pipe_slow); 3250 %} 3251 3252 instruct subD_mem(regD dst, memory src) %{ 3253 predicate((UseSSE>=2) && (UseAVX == 0)); 3254 match(Set dst (SubD dst (LoadD src))); 3255 3256 format %{ "subsd $dst, $src" %} 3257 ins_cost(150); 3258 ins_encode %{ 3259 __ subsd($dst$$XMMRegister, $src$$Address); 3260 %} 3261 ins_pipe(pipe_slow); 3262 %} 3263 3264 instruct subD_imm(regD dst, immD con) %{ 3265 predicate((UseSSE>=2) && (UseAVX == 0)); 3266 match(Set dst (SubD dst con)); 3267 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3268 ins_cost(150); 3269 ins_encode %{ 3270 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3271 %} 3272 ins_pipe(pipe_slow); 3273 %} 3274 3275 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3276 predicate(UseAVX > 0); 3277 match(Set dst (SubD src1 src2)); 3278 3279 format %{ "vsubsd $dst, $src1, $src2" %} 3280 ins_cost(150); 3281 ins_encode %{ 3282 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3283 %} 3284 ins_pipe(pipe_slow); 3285 %} 3286 3287 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3288 predicate(UseAVX > 0); 3289 match(Set dst (SubD src1 (LoadD src2))); 3290 3291 format %{ "vsubsd $dst, $src1, $src2" %} 3292 ins_cost(150); 3293 ins_encode %{ 3294 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3295 %} 3296 ins_pipe(pipe_slow); 3297 %} 3298 3299 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3300 predicate(UseAVX > 0); 3301 match(Set dst (SubD src con)); 3302 3303 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3304 ins_cost(150); 3305 ins_encode %{ 3306 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3307 %} 3308 ins_pipe(pipe_slow); 3309 %} 3310 3311 instruct mulF_reg(regF dst, regF src) %{ 3312 predicate((UseSSE>=1) && (UseAVX == 0)); 3313 match(Set dst (MulF dst src)); 3314 3315 format %{ "mulss $dst, $src" %} 3316 ins_cost(150); 3317 ins_encode %{ 3318 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3319 %} 3320 ins_pipe(pipe_slow); 3321 %} 3322 3323 instruct mulF_mem(regF dst, memory src) %{ 3324 predicate((UseSSE>=1) && (UseAVX == 0)); 3325 match(Set dst (MulF dst (LoadF src))); 3326 3327 format %{ "mulss $dst, $src" %} 3328 ins_cost(150); 3329 ins_encode %{ 3330 __ mulss($dst$$XMMRegister, $src$$Address); 3331 %} 3332 ins_pipe(pipe_slow); 3333 %} 3334 3335 instruct mulF_imm(regF dst, immF con) %{ 3336 predicate((UseSSE>=1) && (UseAVX == 0)); 3337 match(Set dst (MulF dst con)); 3338 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3339 ins_cost(150); 3340 ins_encode %{ 3341 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3342 %} 3343 ins_pipe(pipe_slow); 3344 %} 3345 3346 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3347 predicate(UseAVX > 0); 3348 match(Set dst (MulF src1 src2)); 3349 3350 format %{ "vmulss $dst, $src1, $src2" %} 3351 ins_cost(150); 3352 ins_encode %{ 3353 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3354 %} 3355 ins_pipe(pipe_slow); 3356 %} 3357 3358 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3359 predicate(UseAVX > 0); 3360 match(Set dst (MulF src1 (LoadF src2))); 3361 3362 format %{ "vmulss $dst, $src1, $src2" %} 3363 ins_cost(150); 3364 ins_encode %{ 3365 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3366 %} 3367 ins_pipe(pipe_slow); 3368 %} 3369 3370 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3371 predicate(UseAVX > 0); 3372 match(Set dst (MulF src con)); 3373 3374 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3375 ins_cost(150); 3376 ins_encode %{ 3377 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3378 %} 3379 ins_pipe(pipe_slow); 3380 %} 3381 3382 instruct mulD_reg(regD dst, regD src) %{ 3383 predicate((UseSSE>=2) && (UseAVX == 0)); 3384 match(Set dst (MulD dst src)); 3385 3386 format %{ "mulsd $dst, $src" %} 3387 ins_cost(150); 3388 ins_encode %{ 3389 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3390 %} 3391 ins_pipe(pipe_slow); 3392 %} 3393 3394 instruct mulD_mem(regD dst, memory src) %{ 3395 predicate((UseSSE>=2) && (UseAVX == 0)); 3396 match(Set dst (MulD dst (LoadD src))); 3397 3398 format %{ "mulsd $dst, $src" %} 3399 ins_cost(150); 3400 ins_encode %{ 3401 __ mulsd($dst$$XMMRegister, $src$$Address); 3402 %} 3403 ins_pipe(pipe_slow); 3404 %} 3405 3406 instruct mulD_imm(regD dst, immD con) %{ 3407 predicate((UseSSE>=2) && (UseAVX == 0)); 3408 match(Set dst (MulD dst con)); 3409 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3410 ins_cost(150); 3411 ins_encode %{ 3412 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3413 %} 3414 ins_pipe(pipe_slow); 3415 %} 3416 3417 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3418 predicate(UseAVX > 0); 3419 match(Set dst (MulD src1 src2)); 3420 3421 format %{ "vmulsd $dst, $src1, $src2" %} 3422 ins_cost(150); 3423 ins_encode %{ 3424 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3425 %} 3426 ins_pipe(pipe_slow); 3427 %} 3428 3429 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3430 predicate(UseAVX > 0); 3431 match(Set dst (MulD src1 (LoadD src2))); 3432 3433 format %{ "vmulsd $dst, $src1, $src2" %} 3434 ins_cost(150); 3435 ins_encode %{ 3436 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3437 %} 3438 ins_pipe(pipe_slow); 3439 %} 3440 3441 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3442 predicate(UseAVX > 0); 3443 match(Set dst (MulD src con)); 3444 3445 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3446 ins_cost(150); 3447 ins_encode %{ 3448 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3449 %} 3450 ins_pipe(pipe_slow); 3451 %} 3452 3453 instruct divF_reg(regF dst, regF src) %{ 3454 predicate((UseSSE>=1) && (UseAVX == 0)); 3455 match(Set dst (DivF dst src)); 3456 3457 format %{ "divss $dst, $src" %} 3458 ins_cost(150); 3459 ins_encode %{ 3460 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3461 %} 3462 ins_pipe(pipe_slow); 3463 %} 3464 3465 instruct divF_mem(regF dst, memory src) %{ 3466 predicate((UseSSE>=1) && (UseAVX == 0)); 3467 match(Set dst (DivF dst (LoadF src))); 3468 3469 format %{ "divss $dst, $src" %} 3470 ins_cost(150); 3471 ins_encode %{ 3472 __ divss($dst$$XMMRegister, $src$$Address); 3473 %} 3474 ins_pipe(pipe_slow); 3475 %} 3476 3477 instruct divF_imm(regF dst, immF con) %{ 3478 predicate((UseSSE>=1) && (UseAVX == 0)); 3479 match(Set dst (DivF dst con)); 3480 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3481 ins_cost(150); 3482 ins_encode %{ 3483 __ divss($dst$$XMMRegister, $constantaddress($con)); 3484 %} 3485 ins_pipe(pipe_slow); 3486 %} 3487 3488 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3489 predicate(UseAVX > 0); 3490 match(Set dst (DivF src1 src2)); 3491 3492 format %{ "vdivss $dst, $src1, $src2" %} 3493 ins_cost(150); 3494 ins_encode %{ 3495 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3496 %} 3497 ins_pipe(pipe_slow); 3498 %} 3499 3500 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3501 predicate(UseAVX > 0); 3502 match(Set dst (DivF src1 (LoadF src2))); 3503 3504 format %{ "vdivss $dst, $src1, $src2" %} 3505 ins_cost(150); 3506 ins_encode %{ 3507 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3508 %} 3509 ins_pipe(pipe_slow); 3510 %} 3511 3512 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3513 predicate(UseAVX > 0); 3514 match(Set dst (DivF src con)); 3515 3516 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3517 ins_cost(150); 3518 ins_encode %{ 3519 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3520 %} 3521 ins_pipe(pipe_slow); 3522 %} 3523 3524 instruct divD_reg(regD dst, regD src) %{ 3525 predicate((UseSSE>=2) && (UseAVX == 0)); 3526 match(Set dst (DivD dst src)); 3527 3528 format %{ "divsd $dst, $src" %} 3529 ins_cost(150); 3530 ins_encode %{ 3531 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3532 %} 3533 ins_pipe(pipe_slow); 3534 %} 3535 3536 instruct divD_mem(regD dst, memory src) %{ 3537 predicate((UseSSE>=2) && (UseAVX == 0)); 3538 match(Set dst (DivD dst (LoadD src))); 3539 3540 format %{ "divsd $dst, $src" %} 3541 ins_cost(150); 3542 ins_encode %{ 3543 __ divsd($dst$$XMMRegister, $src$$Address); 3544 %} 3545 ins_pipe(pipe_slow); 3546 %} 3547 3548 instruct divD_imm(regD dst, immD con) %{ 3549 predicate((UseSSE>=2) && (UseAVX == 0)); 3550 match(Set dst (DivD dst con)); 3551 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3552 ins_cost(150); 3553 ins_encode %{ 3554 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3555 %} 3556 ins_pipe(pipe_slow); 3557 %} 3558 3559 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3560 predicate(UseAVX > 0); 3561 match(Set dst (DivD src1 src2)); 3562 3563 format %{ "vdivsd $dst, $src1, $src2" %} 3564 ins_cost(150); 3565 ins_encode %{ 3566 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3567 %} 3568 ins_pipe(pipe_slow); 3569 %} 3570 3571 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3572 predicate(UseAVX > 0); 3573 match(Set dst (DivD src1 (LoadD src2))); 3574 3575 format %{ "vdivsd $dst, $src1, $src2" %} 3576 ins_cost(150); 3577 ins_encode %{ 3578 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3579 %} 3580 ins_pipe(pipe_slow); 3581 %} 3582 3583 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3584 predicate(UseAVX > 0); 3585 match(Set dst (DivD src con)); 3586 3587 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3588 ins_cost(150); 3589 ins_encode %{ 3590 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3591 %} 3592 ins_pipe(pipe_slow); 3593 %} 3594 3595 instruct absF_reg(regF dst) %{ 3596 predicate((UseSSE>=1) && (UseAVX == 0)); 3597 match(Set dst (AbsF dst)); 3598 ins_cost(150); 3599 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3600 ins_encode %{ 3601 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3602 %} 3603 ins_pipe(pipe_slow); 3604 %} 3605 3606 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3607 predicate(UseAVX > 0); 3608 match(Set dst (AbsF src)); 3609 ins_cost(150); 3610 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3611 ins_encode %{ 3612 int vlen_enc = Assembler::AVX_128bit; 3613 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3614 ExternalAddress(float_signmask()), vlen_enc); 3615 %} 3616 ins_pipe(pipe_slow); 3617 %} 3618 3619 instruct absD_reg(regD dst) %{ 3620 predicate((UseSSE>=2) && (UseAVX == 0)); 3621 match(Set dst (AbsD dst)); 3622 ins_cost(150); 3623 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3624 "# abs double by sign masking" %} 3625 ins_encode %{ 3626 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3627 %} 3628 ins_pipe(pipe_slow); 3629 %} 3630 3631 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3632 predicate(UseAVX > 0); 3633 match(Set dst (AbsD src)); 3634 ins_cost(150); 3635 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3636 "# abs double by sign masking" %} 3637 ins_encode %{ 3638 int vlen_enc = Assembler::AVX_128bit; 3639 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3640 ExternalAddress(double_signmask()), vlen_enc); 3641 %} 3642 ins_pipe(pipe_slow); 3643 %} 3644 3645 instruct negF_reg(regF dst) %{ 3646 predicate((UseSSE>=1) && (UseAVX == 0)); 3647 match(Set dst (NegF dst)); 3648 ins_cost(150); 3649 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3650 ins_encode %{ 3651 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3652 %} 3653 ins_pipe(pipe_slow); 3654 %} 3655 3656 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3657 predicate(UseAVX > 0); 3658 match(Set dst (NegF src)); 3659 ins_cost(150); 3660 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3661 ins_encode %{ 3662 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3663 ExternalAddress(float_signflip())); 3664 %} 3665 ins_pipe(pipe_slow); 3666 %} 3667 3668 instruct negD_reg(regD dst) %{ 3669 predicate((UseSSE>=2) && (UseAVX == 0)); 3670 match(Set dst (NegD dst)); 3671 ins_cost(150); 3672 format %{ "xorpd $dst, [0x8000000000000000]\t" 3673 "# neg double by sign flipping" %} 3674 ins_encode %{ 3675 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3676 %} 3677 ins_pipe(pipe_slow); 3678 %} 3679 3680 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3681 predicate(UseAVX > 0); 3682 match(Set dst (NegD src)); 3683 ins_cost(150); 3684 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3685 "# neg double by sign flipping" %} 3686 ins_encode %{ 3687 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3688 ExternalAddress(double_signflip())); 3689 %} 3690 ins_pipe(pipe_slow); 3691 %} 3692 3693 // sqrtss instruction needs destination register to be pre initialized for best performance 3694 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3695 instruct sqrtF_reg(regF dst) %{ 3696 predicate(UseSSE>=1); 3697 match(Set dst (SqrtF dst)); 3698 format %{ "sqrtss $dst, $dst" %} 3699 ins_encode %{ 3700 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3701 %} 3702 ins_pipe(pipe_slow); 3703 %} 3704 3705 // sqrtsd instruction needs destination register to be pre initialized for best performance 3706 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3707 instruct sqrtD_reg(regD dst) %{ 3708 predicate(UseSSE>=2); 3709 match(Set dst (SqrtD dst)); 3710 format %{ "sqrtsd $dst, $dst" %} 3711 ins_encode %{ 3712 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3713 %} 3714 ins_pipe(pipe_slow); 3715 %} 3716 3717 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3718 effect(TEMP tmp); 3719 match(Set dst (ConvF2HF src)); 3720 ins_cost(125); 3721 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3722 ins_encode %{ 3723 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3724 %} 3725 ins_pipe( pipe_slow ); 3726 %} 3727 3728 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3729 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3730 effect(TEMP ktmp, TEMP rtmp); 3731 match(Set mem (StoreC mem (ConvF2HF src))); 3732 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3733 ins_encode %{ 3734 __ movl($rtmp$$Register, 0x1); 3735 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3736 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3737 %} 3738 ins_pipe( pipe_slow ); 3739 %} 3740 3741 instruct vconvF2HF(vec dst, vec src) %{ 3742 match(Set dst (VectorCastF2HF src)); 3743 format %{ "vector_conv_F2HF $dst $src" %} 3744 ins_encode %{ 3745 int vlen_enc = vector_length_encoding(this, $src); 3746 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3747 %} 3748 ins_pipe( pipe_slow ); 3749 %} 3750 3751 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3752 predicate(n->as_StoreVector()->memory_size() >= 16); 3753 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3754 format %{ "vcvtps2ph $mem,$src" %} 3755 ins_encode %{ 3756 int vlen_enc = vector_length_encoding(this, $src); 3757 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3758 %} 3759 ins_pipe( pipe_slow ); 3760 %} 3761 3762 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3763 match(Set dst (ConvHF2F src)); 3764 format %{ "vcvtph2ps $dst,$src" %} 3765 ins_encode %{ 3766 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3767 %} 3768 ins_pipe( pipe_slow ); 3769 %} 3770 3771 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3772 match(Set dst (VectorCastHF2F (LoadVector mem))); 3773 format %{ "vcvtph2ps $dst,$mem" %} 3774 ins_encode %{ 3775 int vlen_enc = vector_length_encoding(this); 3776 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3777 %} 3778 ins_pipe( pipe_slow ); 3779 %} 3780 3781 instruct vconvHF2F(vec dst, vec src) %{ 3782 match(Set dst (VectorCastHF2F src)); 3783 ins_cost(125); 3784 format %{ "vector_conv_HF2F $dst,$src" %} 3785 ins_encode %{ 3786 int vlen_enc = vector_length_encoding(this); 3787 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3788 %} 3789 ins_pipe( pipe_slow ); 3790 %} 3791 3792 // ---------------------------------------- VectorReinterpret ------------------------------------ 3793 instruct reinterpret_mask(kReg dst) %{ 3794 predicate(n->bottom_type()->isa_vectmask() && 3795 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3796 match(Set dst (VectorReinterpret dst)); 3797 ins_cost(125); 3798 format %{ "vector_reinterpret $dst\t!" %} 3799 ins_encode %{ 3800 // empty 3801 %} 3802 ins_pipe( pipe_slow ); 3803 %} 3804 3805 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3806 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3807 n->bottom_type()->isa_vectmask() && 3808 n->in(1)->bottom_type()->isa_vectmask() && 3809 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3810 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3811 match(Set dst (VectorReinterpret src)); 3812 effect(TEMP xtmp); 3813 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3814 ins_encode %{ 3815 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3816 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3817 assert(src_sz == dst_sz , "src and dst size mismatch"); 3818 int vlen_enc = vector_length_encoding(src_sz); 3819 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3820 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3821 %} 3822 ins_pipe( pipe_slow ); 3823 %} 3824 3825 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3826 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3827 n->bottom_type()->isa_vectmask() && 3828 n->in(1)->bottom_type()->isa_vectmask() && 3829 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3830 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3831 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3832 match(Set dst (VectorReinterpret src)); 3833 effect(TEMP xtmp); 3834 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3835 ins_encode %{ 3836 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3837 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3838 assert(src_sz == dst_sz , "src and dst size mismatch"); 3839 int vlen_enc = vector_length_encoding(src_sz); 3840 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3841 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3842 %} 3843 ins_pipe( pipe_slow ); 3844 %} 3845 3846 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3847 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3848 n->bottom_type()->isa_vectmask() && 3849 n->in(1)->bottom_type()->isa_vectmask() && 3850 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3851 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3852 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3853 match(Set dst (VectorReinterpret src)); 3854 effect(TEMP xtmp); 3855 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3856 ins_encode %{ 3857 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3858 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3859 assert(src_sz == dst_sz , "src and dst size mismatch"); 3860 int vlen_enc = vector_length_encoding(src_sz); 3861 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3862 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3863 %} 3864 ins_pipe( pipe_slow ); 3865 %} 3866 3867 instruct reinterpret(vec dst) %{ 3868 predicate(!n->bottom_type()->isa_vectmask() && 3869 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3870 match(Set dst (VectorReinterpret dst)); 3871 ins_cost(125); 3872 format %{ "vector_reinterpret $dst\t!" %} 3873 ins_encode %{ 3874 // empty 3875 %} 3876 ins_pipe( pipe_slow ); 3877 %} 3878 3879 instruct reinterpret_expand(vec dst, vec src) %{ 3880 predicate(UseAVX == 0 && 3881 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3882 match(Set dst (VectorReinterpret src)); 3883 ins_cost(125); 3884 effect(TEMP dst); 3885 format %{ "vector_reinterpret_expand $dst,$src" %} 3886 ins_encode %{ 3887 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3888 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3889 3890 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3891 if (src_vlen_in_bytes == 4) { 3892 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3893 } else { 3894 assert(src_vlen_in_bytes == 8, ""); 3895 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3896 } 3897 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3898 %} 3899 ins_pipe( pipe_slow ); 3900 %} 3901 3902 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3903 predicate(UseAVX > 0 && 3904 !n->bottom_type()->isa_vectmask() && 3905 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3906 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3907 match(Set dst (VectorReinterpret src)); 3908 ins_cost(125); 3909 format %{ "vector_reinterpret_expand $dst,$src" %} 3910 ins_encode %{ 3911 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3912 %} 3913 ins_pipe( pipe_slow ); 3914 %} 3915 3916 3917 instruct vreinterpret_expand(legVec dst, vec src) %{ 3918 predicate(UseAVX > 0 && 3919 !n->bottom_type()->isa_vectmask() && 3920 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3921 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3922 match(Set dst (VectorReinterpret src)); 3923 ins_cost(125); 3924 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3925 ins_encode %{ 3926 switch (Matcher::vector_length_in_bytes(this, $src)) { 3927 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3928 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3929 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3930 default: ShouldNotReachHere(); 3931 } 3932 %} 3933 ins_pipe( pipe_slow ); 3934 %} 3935 3936 instruct reinterpret_shrink(vec dst, legVec src) %{ 3937 predicate(!n->bottom_type()->isa_vectmask() && 3938 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3939 match(Set dst (VectorReinterpret src)); 3940 ins_cost(125); 3941 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3942 ins_encode %{ 3943 switch (Matcher::vector_length_in_bytes(this)) { 3944 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3945 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3946 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3947 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3948 default: ShouldNotReachHere(); 3949 } 3950 %} 3951 ins_pipe( pipe_slow ); 3952 %} 3953 3954 // ---------------------------------------------------------------------------------------------------- 3955 3956 #ifdef _LP64 3957 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3958 match(Set dst (RoundDoubleMode src rmode)); 3959 format %{ "roundsd $dst,$src" %} 3960 ins_cost(150); 3961 ins_encode %{ 3962 assert(UseSSE >= 4, "required"); 3963 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) { 3964 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3965 } 3966 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3967 %} 3968 ins_pipe(pipe_slow); 3969 %} 3970 3971 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3972 match(Set dst (RoundDoubleMode con rmode)); 3973 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3974 ins_cost(150); 3975 ins_encode %{ 3976 assert(UseSSE >= 4, "required"); 3977 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3978 %} 3979 ins_pipe(pipe_slow); 3980 %} 3981 3982 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3983 predicate(Matcher::vector_length(n) < 8); 3984 match(Set dst (RoundDoubleModeV src rmode)); 3985 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3986 ins_encode %{ 3987 assert(UseAVX > 0, "required"); 3988 int vlen_enc = vector_length_encoding(this); 3989 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3990 %} 3991 ins_pipe( pipe_slow ); 3992 %} 3993 3994 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3995 predicate(Matcher::vector_length(n) == 8); 3996 match(Set dst (RoundDoubleModeV src rmode)); 3997 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3998 ins_encode %{ 3999 assert(UseAVX > 2, "required"); 4000 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 4001 %} 4002 ins_pipe( pipe_slow ); 4003 %} 4004 4005 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 4006 predicate(Matcher::vector_length(n) < 8); 4007 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 4008 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 4009 ins_encode %{ 4010 assert(UseAVX > 0, "required"); 4011 int vlen_enc = vector_length_encoding(this); 4012 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 4013 %} 4014 ins_pipe( pipe_slow ); 4015 %} 4016 4017 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 4018 predicate(Matcher::vector_length(n) == 8); 4019 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 4020 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 4021 ins_encode %{ 4022 assert(UseAVX > 2, "required"); 4023 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 4024 %} 4025 ins_pipe( pipe_slow ); 4026 %} 4027 #endif // _LP64 4028 4029 instruct onspinwait() %{ 4030 match(OnSpinWait); 4031 ins_cost(200); 4032 4033 format %{ 4034 $$template 4035 $$emit$$"pause\t! membar_onspinwait" 4036 %} 4037 ins_encode %{ 4038 __ pause(); 4039 %} 4040 ins_pipe(pipe_slow); 4041 %} 4042 4043 // a * b + c 4044 instruct fmaD_reg(regD a, regD b, regD c) %{ 4045 match(Set c (FmaD c (Binary a b))); 4046 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 4047 ins_cost(150); 4048 ins_encode %{ 4049 assert(UseFMA, "Needs FMA instructions support."); 4050 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4051 %} 4052 ins_pipe( pipe_slow ); 4053 %} 4054 4055 // a * b + c 4056 instruct fmaF_reg(regF a, regF b, regF c) %{ 4057 match(Set c (FmaF c (Binary a b))); 4058 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 4059 ins_cost(150); 4060 ins_encode %{ 4061 assert(UseFMA, "Needs FMA instructions support."); 4062 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4063 %} 4064 ins_pipe( pipe_slow ); 4065 %} 4066 4067 // ====================VECTOR INSTRUCTIONS===================================== 4068 4069 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4070 instruct MoveVec2Leg(legVec dst, vec src) %{ 4071 match(Set dst src); 4072 format %{ "" %} 4073 ins_encode %{ 4074 ShouldNotReachHere(); 4075 %} 4076 ins_pipe( fpu_reg_reg ); 4077 %} 4078 4079 instruct MoveLeg2Vec(vec dst, legVec src) %{ 4080 match(Set dst src); 4081 format %{ "" %} 4082 ins_encode %{ 4083 ShouldNotReachHere(); 4084 %} 4085 ins_pipe( fpu_reg_reg ); 4086 %} 4087 4088 // ============================================================================ 4089 4090 // Load vectors generic operand pattern 4091 instruct loadV(vec dst, memory mem) %{ 4092 match(Set dst (LoadVector mem)); 4093 ins_cost(125); 4094 format %{ "load_vector $dst,$mem" %} 4095 ins_encode %{ 4096 __ load_vector($dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 4097 %} 4098 ins_pipe( pipe_slow ); 4099 %} 4100 4101 // Store vectors generic operand pattern. 4102 instruct storeV(memory mem, vec src) %{ 4103 match(Set mem (StoreVector mem src)); 4104 ins_cost(145); 4105 format %{ "store_vector $mem,$src\n\t" %} 4106 ins_encode %{ 4107 switch (Matcher::vector_length_in_bytes(this, $src)) { 4108 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 4109 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 4110 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 4111 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 4112 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 4113 default: ShouldNotReachHere(); 4114 } 4115 %} 4116 ins_pipe( pipe_slow ); 4117 %} 4118 4119 // ---------------------------------------- Gather ------------------------------------ 4120 4121 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE 4122 4123 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 4124 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) && 4125 Matcher::vector_length_in_bytes(n) <= 32); 4126 match(Set dst (LoadVectorGather mem idx)); 4127 effect(TEMP dst, TEMP tmp, TEMP mask); 4128 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 4129 ins_encode %{ 4130 int vlen_enc = vector_length_encoding(this); 4131 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4132 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4133 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4134 __ lea($tmp$$Register, $mem$$Address); 4135 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4136 %} 4137 ins_pipe( pipe_slow ); 4138 %} 4139 4140 4141 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 4142 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4143 !is_subword_type(Matcher::vector_element_basic_type(n))); 4144 match(Set dst (LoadVectorGather mem idx)); 4145 effect(TEMP dst, TEMP tmp, TEMP ktmp); 4146 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 4147 ins_encode %{ 4148 int vlen_enc = vector_length_encoding(this); 4149 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4150 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister); 4151 __ lea($tmp$$Register, $mem$$Address); 4152 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4153 %} 4154 ins_pipe( pipe_slow ); 4155 %} 4156 4157 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4158 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4159 !is_subword_type(Matcher::vector_element_basic_type(n))); 4160 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4161 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4162 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4163 ins_encode %{ 4164 assert(UseAVX > 2, "sanity"); 4165 int vlen_enc = vector_length_encoding(this); 4166 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4167 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4168 // Note: Since gather instruction partially updates the opmask register used 4169 // for predication hense moving mask operand to a temporary. 4170 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4171 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4172 __ lea($tmp$$Register, $mem$$Address); 4173 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4174 %} 4175 ins_pipe( pipe_slow ); 4176 %} 4177 4178 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegI rtmp) %{ 4179 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4180 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4181 effect(TEMP tmp, TEMP rtmp); 4182 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %} 4183 ins_encode %{ 4184 int vlen_enc = vector_length_encoding(this); 4185 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4186 __ lea($tmp$$Register, $mem$$Address); 4187 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp$$Register, vlen_enc); 4188 %} 4189 ins_pipe( pipe_slow ); 4190 %} 4191 4192 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegP idx_base_temp, 4193 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4194 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4195 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4196 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4197 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4198 ins_encode %{ 4199 int vlen_enc = vector_length_encoding(this); 4200 int vector_len = Matcher::vector_length(this); 4201 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4202 __ lea($tmp$$Register, $mem$$Address); 4203 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4204 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, noreg, $xtmp1$$XMMRegister, 4205 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4206 %} 4207 ins_pipe( pipe_slow ); 4208 %} 4209 4210 instruct vgather_subwordLE8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegI rtmp, rFlagsReg cr) %{ 4211 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4212 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4213 effect(TEMP tmp, TEMP rtmp, KILL cr); 4214 format %{ "vector_gatherLE8_off $dst, $mem, $idx_base, $offset\t! using $tmp and $rtmp as TEMP" %} 4215 ins_encode %{ 4216 int vlen_enc = vector_length_encoding(this); 4217 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4218 __ lea($tmp$$Register, $mem$$Address); 4219 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, $rtmp$$Register, vlen_enc); 4220 %} 4221 ins_pipe( pipe_slow ); 4222 %} 4223 4224 4225 instruct vgather_subwordGT8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegP idx_base_temp, 4226 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4227 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4228 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4229 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4230 format %{ "vector_gatherGT8_off $dst, $mem, $idx_base, $offset\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4231 ins_encode %{ 4232 int vlen_enc = vector_length_encoding(this); 4233 int vector_len = Matcher::vector_length(this); 4234 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4235 __ lea($tmp$$Register, $mem$$Address); 4236 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4237 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, noreg, $xtmp1$$XMMRegister, 4238 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4239 %} 4240 ins_pipe( pipe_slow ); 4241 %} 4242 4243 4244 #ifdef _LP64 4245 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4246 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4247 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4248 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4249 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4250 ins_encode %{ 4251 int vlen_enc = vector_length_encoding(this); 4252 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4253 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4254 __ lea($tmp$$Register, $mem$$Address); 4255 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4256 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4257 %} 4258 ins_pipe( pipe_slow ); 4259 %} 4260 4261 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4262 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4263 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4264 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4265 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4266 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4267 ins_encode %{ 4268 int vlen_enc = vector_length_encoding(this); 4269 int vector_len = Matcher::vector_length(this); 4270 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4271 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4272 __ lea($tmp$$Register, $mem$$Address); 4273 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4274 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4275 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4276 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4277 %} 4278 ins_pipe( pipe_slow ); 4279 %} 4280 4281 instruct vgather_masked_subwordLE8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4282 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4283 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4284 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4285 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4286 ins_encode %{ 4287 int vlen_enc = vector_length_encoding(this); 4288 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4289 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4290 __ lea($tmp$$Register, $mem$$Address); 4291 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4292 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4293 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4294 %} 4295 ins_pipe( pipe_slow ); 4296 %} 4297 4298 instruct vgather_masked_subwordGT8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4299 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4300 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4301 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4302 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4303 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4304 ins_encode %{ 4305 int vlen_enc = vector_length_encoding(this); 4306 int vector_len = Matcher::vector_length(this); 4307 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4308 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4309 __ lea($tmp$$Register, $mem$$Address); 4310 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4311 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4312 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4313 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4314 %} 4315 ins_pipe( pipe_slow ); 4316 %} 4317 4318 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4319 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4320 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4321 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4322 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4323 ins_encode %{ 4324 int vlen_enc = vector_length_encoding(this); 4325 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4326 __ lea($tmp$$Register, $mem$$Address); 4327 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4328 if (elem_bt == T_SHORT) { 4329 __ movl($mask_idx$$Register, 0x55555555); 4330 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4331 } 4332 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4333 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4334 %} 4335 ins_pipe( pipe_slow ); 4336 %} 4337 4338 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4339 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4340 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4341 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4342 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4343 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4344 ins_encode %{ 4345 int vlen_enc = vector_length_encoding(this); 4346 int vector_len = Matcher::vector_length(this); 4347 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4348 __ lea($tmp$$Register, $mem$$Address); 4349 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4350 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4351 if (elem_bt == T_SHORT) { 4352 __ movl($mask_idx$$Register, 0x55555555); 4353 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4354 } 4355 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4356 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4357 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4358 %} 4359 ins_pipe( pipe_slow ); 4360 %} 4361 4362 instruct vgather_masked_subwordLE8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4363 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4364 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4365 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4366 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4367 ins_encode %{ 4368 int vlen_enc = vector_length_encoding(this); 4369 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4370 __ lea($tmp$$Register, $mem$$Address); 4371 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4372 if (elem_bt == T_SHORT) { 4373 __ movl($mask_idx$$Register, 0x55555555); 4374 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4375 } 4376 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4377 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4378 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4379 %} 4380 ins_pipe( pipe_slow ); 4381 %} 4382 4383 instruct vgather_masked_subwordGT8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4384 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4385 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4386 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4387 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4388 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4389 ins_encode %{ 4390 int vlen_enc = vector_length_encoding(this); 4391 int vector_len = Matcher::vector_length(this); 4392 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4393 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4394 __ lea($tmp$$Register, $mem$$Address); 4395 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4396 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4397 if (elem_bt == T_SHORT) { 4398 __ movl($mask_idx$$Register, 0x55555555); 4399 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4400 } 4401 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4402 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4403 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4404 %} 4405 ins_pipe( pipe_slow ); 4406 %} 4407 #endif 4408 4409 // ====================Scatter======================================= 4410 4411 // Scatter INT, LONG, FLOAT, DOUBLE 4412 4413 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4414 predicate(UseAVX > 2); 4415 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4416 effect(TEMP tmp, TEMP ktmp); 4417 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4418 ins_encode %{ 4419 int vlen_enc = vector_length_encoding(this, $src); 4420 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4421 4422 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4423 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4424 4425 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4426 __ lea($tmp$$Register, $mem$$Address); 4427 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4428 %} 4429 ins_pipe( pipe_slow ); 4430 %} 4431 4432 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4433 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4434 effect(TEMP tmp, TEMP ktmp); 4435 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4436 ins_encode %{ 4437 int vlen_enc = vector_length_encoding(this, $src); 4438 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4439 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4440 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4441 // Note: Since scatter instruction partially updates the opmask register used 4442 // for predication hense moving mask operand to a temporary. 4443 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4444 __ lea($tmp$$Register, $mem$$Address); 4445 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4446 %} 4447 ins_pipe( pipe_slow ); 4448 %} 4449 4450 // ====================REPLICATE======================================= 4451 4452 // Replicate byte scalar to be vector 4453 instruct vReplB_reg(vec dst, rRegI src) %{ 4454 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4455 match(Set dst (Replicate src)); 4456 format %{ "replicateB $dst,$src" %} 4457 ins_encode %{ 4458 uint vlen = Matcher::vector_length(this); 4459 if (UseAVX >= 2) { 4460 int vlen_enc = vector_length_encoding(this); 4461 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4462 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4463 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4464 } else { 4465 __ movdl($dst$$XMMRegister, $src$$Register); 4466 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4467 } 4468 } else { 4469 assert(UseAVX < 2, ""); 4470 __ movdl($dst$$XMMRegister, $src$$Register); 4471 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4472 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4473 if (vlen >= 16) { 4474 assert(vlen == 16, ""); 4475 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4476 } 4477 } 4478 %} 4479 ins_pipe( pipe_slow ); 4480 %} 4481 4482 instruct ReplB_mem(vec dst, memory mem) %{ 4483 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4484 match(Set dst (Replicate (LoadB mem))); 4485 format %{ "replicateB $dst,$mem" %} 4486 ins_encode %{ 4487 int vlen_enc = vector_length_encoding(this); 4488 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4489 %} 4490 ins_pipe( pipe_slow ); 4491 %} 4492 4493 // ====================ReplicateS======================================= 4494 4495 instruct vReplS_reg(vec dst, rRegI src) %{ 4496 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4497 match(Set dst (Replicate src)); 4498 format %{ "replicateS $dst,$src" %} 4499 ins_encode %{ 4500 uint vlen = Matcher::vector_length(this); 4501 int vlen_enc = vector_length_encoding(this); 4502 if (UseAVX >= 2) { 4503 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4504 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4505 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4506 } else { 4507 __ movdl($dst$$XMMRegister, $src$$Register); 4508 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4509 } 4510 } else { 4511 assert(UseAVX < 2, ""); 4512 __ movdl($dst$$XMMRegister, $src$$Register); 4513 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4514 if (vlen >= 8) { 4515 assert(vlen == 8, ""); 4516 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4517 } 4518 } 4519 %} 4520 ins_pipe( pipe_slow ); 4521 %} 4522 4523 instruct ReplS_mem(vec dst, memory mem) %{ 4524 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4525 match(Set dst (Replicate (LoadS mem))); 4526 format %{ "replicateS $dst,$mem" %} 4527 ins_encode %{ 4528 int vlen_enc = vector_length_encoding(this); 4529 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4530 %} 4531 ins_pipe( pipe_slow ); 4532 %} 4533 4534 // ====================ReplicateI======================================= 4535 4536 instruct ReplI_reg(vec dst, rRegI src) %{ 4537 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4538 match(Set dst (Replicate src)); 4539 format %{ "replicateI $dst,$src" %} 4540 ins_encode %{ 4541 uint vlen = Matcher::vector_length(this); 4542 int vlen_enc = vector_length_encoding(this); 4543 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4544 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4545 } else if (VM_Version::supports_avx2()) { 4546 __ movdl($dst$$XMMRegister, $src$$Register); 4547 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4548 } else { 4549 __ movdl($dst$$XMMRegister, $src$$Register); 4550 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4551 } 4552 %} 4553 ins_pipe( pipe_slow ); 4554 %} 4555 4556 instruct ReplI_mem(vec dst, memory mem) %{ 4557 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4558 match(Set dst (Replicate (LoadI mem))); 4559 format %{ "replicateI $dst,$mem" %} 4560 ins_encode %{ 4561 int vlen_enc = vector_length_encoding(this); 4562 if (VM_Version::supports_avx2()) { 4563 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4564 } else if (VM_Version::supports_avx()) { 4565 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4566 } else { 4567 __ movdl($dst$$XMMRegister, $mem$$Address); 4568 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4569 } 4570 %} 4571 ins_pipe( pipe_slow ); 4572 %} 4573 4574 instruct ReplI_imm(vec dst, immI con) %{ 4575 predicate(Matcher::is_non_long_integral_vector(n)); 4576 match(Set dst (Replicate con)); 4577 format %{ "replicateI $dst,$con" %} 4578 ins_encode %{ 4579 InternalAddress addr = $constantaddress(Matcher::vector_element_basic_type(this), 4580 vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4581 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 8) / 4582 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4583 BasicType bt = Matcher::vector_element_basic_type(this); 4584 int vlen = Matcher::vector_length_in_bytes(this); 4585 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4586 %} 4587 ins_pipe( pipe_slow ); 4588 %} 4589 4590 // Replicate scalar zero to be vector 4591 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4592 predicate(Matcher::is_non_long_integral_vector(n)); 4593 match(Set dst (Replicate zero)); 4594 format %{ "replicateI $dst,$zero" %} 4595 ins_encode %{ 4596 int vlen_enc = vector_length_encoding(this); 4597 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4598 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4599 } else { 4600 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4601 } 4602 %} 4603 ins_pipe( fpu_reg_reg ); 4604 %} 4605 4606 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4607 predicate(UseSSE >= 2 && Matcher::is_non_long_integral_vector(n)); 4608 match(Set dst (Replicate con)); 4609 format %{ "vallones $dst" %} 4610 ins_encode %{ 4611 int vector_len = vector_length_encoding(this); 4612 __ vallones($dst$$XMMRegister, vector_len); 4613 %} 4614 ins_pipe( pipe_slow ); 4615 %} 4616 4617 // ====================ReplicateL======================================= 4618 4619 #ifdef _LP64 4620 // Replicate long (8 byte) scalar to be vector 4621 instruct ReplL_reg(vec dst, rRegL src) %{ 4622 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4623 match(Set dst (Replicate src)); 4624 format %{ "replicateL $dst,$src" %} 4625 ins_encode %{ 4626 int vlen = Matcher::vector_length(this); 4627 int vlen_enc = vector_length_encoding(this); 4628 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4629 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4630 } else if (VM_Version::supports_avx2()) { 4631 __ movdq($dst$$XMMRegister, $src$$Register); 4632 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4633 } else { 4634 __ movdq($dst$$XMMRegister, $src$$Register); 4635 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4636 } 4637 %} 4638 ins_pipe( pipe_slow ); 4639 %} 4640 #else // _LP64 4641 // Replicate long (8 byte) scalar to be vector 4642 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 4643 predicate(Matcher::vector_length(n) <= 4 && Matcher::vector_element_basic_type(n) == T_LONG); 4644 match(Set dst (Replicate src)); 4645 effect(TEMP dst, USE src, TEMP tmp); 4646 format %{ "replicateL $dst,$src" %} 4647 ins_encode %{ 4648 uint vlen = Matcher::vector_length(this); 4649 if (vlen == 2) { 4650 __ movdl($dst$$XMMRegister, $src$$Register); 4651 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4652 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4653 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4654 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4655 int vlen_enc = Assembler::AVX_256bit; 4656 __ movdl($dst$$XMMRegister, $src$$Register); 4657 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4658 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4659 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4660 } else { 4661 __ movdl($dst$$XMMRegister, $src$$Register); 4662 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4663 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4664 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4665 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4666 } 4667 %} 4668 ins_pipe( pipe_slow ); 4669 %} 4670 4671 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 4672 predicate(Matcher::vector_length(n) == 8 && Matcher::vector_element_basic_type(n) == T_LONG); 4673 match(Set dst (Replicate src)); 4674 effect(TEMP dst, USE src, TEMP tmp); 4675 format %{ "replicateL $dst,$src" %} 4676 ins_encode %{ 4677 if (VM_Version::supports_avx512vl()) { 4678 __ movdl($dst$$XMMRegister, $src$$Register); 4679 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4680 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4681 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4682 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4683 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4684 } else { 4685 int vlen_enc = Assembler::AVX_512bit; 4686 __ movdl($dst$$XMMRegister, $src$$Register); 4687 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4688 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4689 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4690 } 4691 %} 4692 ins_pipe( pipe_slow ); 4693 %} 4694 #endif // _LP64 4695 4696 instruct ReplL_mem(vec dst, memory mem) %{ 4697 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4698 match(Set dst (Replicate (LoadL mem))); 4699 format %{ "replicateL $dst,$mem" %} 4700 ins_encode %{ 4701 int vlen_enc = vector_length_encoding(this); 4702 if (VM_Version::supports_avx2()) { 4703 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4704 } else if (VM_Version::supports_sse3()) { 4705 __ movddup($dst$$XMMRegister, $mem$$Address); 4706 } else { 4707 __ movq($dst$$XMMRegister, $mem$$Address); 4708 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4709 } 4710 %} 4711 ins_pipe( pipe_slow ); 4712 %} 4713 4714 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4715 instruct ReplL_imm(vec dst, immL con) %{ 4716 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4717 match(Set dst (Replicate con)); 4718 format %{ "replicateL $dst,$con" %} 4719 ins_encode %{ 4720 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, 1)); 4721 int vlen = Matcher::vector_length_in_bytes(this); 4722 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4723 %} 4724 ins_pipe( pipe_slow ); 4725 %} 4726 4727 instruct ReplL_zero(vec dst, immL0 zero) %{ 4728 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4729 match(Set dst (Replicate zero)); 4730 format %{ "replicateL $dst,$zero" %} 4731 ins_encode %{ 4732 int vlen_enc = vector_length_encoding(this); 4733 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4734 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4735 } else { 4736 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4737 } 4738 %} 4739 ins_pipe( fpu_reg_reg ); 4740 %} 4741 4742 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4743 predicate(UseSSE >= 2 && Matcher::vector_element_basic_type(n) == T_LONG); 4744 match(Set dst (Replicate con)); 4745 format %{ "vallones $dst" %} 4746 ins_encode %{ 4747 int vector_len = vector_length_encoding(this); 4748 __ vallones($dst$$XMMRegister, vector_len); 4749 %} 4750 ins_pipe( pipe_slow ); 4751 %} 4752 4753 // ====================ReplicateF======================================= 4754 4755 instruct vReplF_reg(vec dst, vlRegF src) %{ 4756 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4757 match(Set dst (Replicate src)); 4758 format %{ "replicateF $dst,$src" %} 4759 ins_encode %{ 4760 uint vlen = Matcher::vector_length(this); 4761 int vlen_enc = vector_length_encoding(this); 4762 if (vlen <= 4) { 4763 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4764 } else if (VM_Version::supports_avx2()) { 4765 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4766 } else { 4767 assert(vlen == 8, "sanity"); 4768 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4769 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4770 } 4771 %} 4772 ins_pipe( pipe_slow ); 4773 %} 4774 4775 instruct ReplF_reg(vec dst, vlRegF src) %{ 4776 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4777 match(Set dst (Replicate src)); 4778 format %{ "replicateF $dst,$src" %} 4779 ins_encode %{ 4780 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4781 %} 4782 ins_pipe( pipe_slow ); 4783 %} 4784 4785 instruct ReplF_mem(vec dst, memory mem) %{ 4786 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4787 match(Set dst (Replicate (LoadF mem))); 4788 format %{ "replicateF $dst,$mem" %} 4789 ins_encode %{ 4790 int vlen_enc = vector_length_encoding(this); 4791 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4792 %} 4793 ins_pipe( pipe_slow ); 4794 %} 4795 4796 // Replicate float scalar immediate to be vector by loading from const table. 4797 instruct ReplF_imm(vec dst, immF con) %{ 4798 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4799 match(Set dst (Replicate con)); 4800 format %{ "replicateF $dst,$con" %} 4801 ins_encode %{ 4802 InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant, 4803 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 2)); 4804 int vlen = Matcher::vector_length_in_bytes(this); 4805 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4806 %} 4807 ins_pipe( pipe_slow ); 4808 %} 4809 4810 instruct ReplF_zero(vec dst, immF0 zero) %{ 4811 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4812 match(Set dst (Replicate zero)); 4813 format %{ "replicateF $dst,$zero" %} 4814 ins_encode %{ 4815 int vlen_enc = vector_length_encoding(this); 4816 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4817 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4818 } else { 4819 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4820 } 4821 %} 4822 ins_pipe( fpu_reg_reg ); 4823 %} 4824 4825 // ====================ReplicateD======================================= 4826 4827 // Replicate double (8 bytes) scalar to be vector 4828 instruct vReplD_reg(vec dst, vlRegD src) %{ 4829 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4830 match(Set dst (Replicate src)); 4831 format %{ "replicateD $dst,$src" %} 4832 ins_encode %{ 4833 uint vlen = Matcher::vector_length(this); 4834 int vlen_enc = vector_length_encoding(this); 4835 if (vlen <= 2) { 4836 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4837 } else if (VM_Version::supports_avx2()) { 4838 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4839 } else { 4840 assert(vlen == 4, "sanity"); 4841 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4842 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4843 } 4844 %} 4845 ins_pipe( pipe_slow ); 4846 %} 4847 4848 instruct ReplD_reg(vec dst, vlRegD src) %{ 4849 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4850 match(Set dst (Replicate src)); 4851 format %{ "replicateD $dst,$src" %} 4852 ins_encode %{ 4853 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4854 %} 4855 ins_pipe( pipe_slow ); 4856 %} 4857 4858 instruct ReplD_mem(vec dst, memory mem) %{ 4859 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4860 match(Set dst (Replicate (LoadD mem))); 4861 format %{ "replicateD $dst,$mem" %} 4862 ins_encode %{ 4863 if (Matcher::vector_length(this) >= 4) { 4864 int vlen_enc = vector_length_encoding(this); 4865 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4866 } else { 4867 __ movddup($dst$$XMMRegister, $mem$$Address); 4868 } 4869 %} 4870 ins_pipe( pipe_slow ); 4871 %} 4872 4873 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4874 instruct ReplD_imm(vec dst, immD con) %{ 4875 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4876 match(Set dst (Replicate con)); 4877 format %{ "replicateD $dst,$con" %} 4878 ins_encode %{ 4879 InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, 1)); 4880 int vlen = Matcher::vector_length_in_bytes(this); 4881 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4882 %} 4883 ins_pipe( pipe_slow ); 4884 %} 4885 4886 instruct ReplD_zero(vec dst, immD0 zero) %{ 4887 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4888 match(Set dst (Replicate zero)); 4889 format %{ "replicateD $dst,$zero" %} 4890 ins_encode %{ 4891 int vlen_enc = vector_length_encoding(this); 4892 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4893 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4894 } else { 4895 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4896 } 4897 %} 4898 ins_pipe( fpu_reg_reg ); 4899 %} 4900 4901 // ====================VECTOR INSERT======================================= 4902 4903 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4904 predicate(Matcher::vector_length_in_bytes(n) < 32); 4905 match(Set dst (VectorInsert (Binary dst val) idx)); 4906 format %{ "vector_insert $dst,$val,$idx" %} 4907 ins_encode %{ 4908 assert(UseSSE >= 4, "required"); 4909 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4910 4911 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4912 4913 assert(is_integral_type(elem_bt), ""); 4914 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4915 4916 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4917 %} 4918 ins_pipe( pipe_slow ); 4919 %} 4920 4921 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4922 predicate(Matcher::vector_length_in_bytes(n) == 32); 4923 match(Set dst (VectorInsert (Binary src val) idx)); 4924 effect(TEMP vtmp); 4925 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4926 ins_encode %{ 4927 int vlen_enc = Assembler::AVX_256bit; 4928 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4929 int elem_per_lane = 16/type2aelembytes(elem_bt); 4930 int log2epr = log2(elem_per_lane); 4931 4932 assert(is_integral_type(elem_bt), "sanity"); 4933 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4934 4935 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4936 uint y_idx = ($idx$$constant >> log2epr) & 1; 4937 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4938 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4939 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4940 %} 4941 ins_pipe( pipe_slow ); 4942 %} 4943 4944 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4945 predicate(Matcher::vector_length_in_bytes(n) == 64); 4946 match(Set dst (VectorInsert (Binary src val) idx)); 4947 effect(TEMP vtmp); 4948 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4949 ins_encode %{ 4950 assert(UseAVX > 2, "sanity"); 4951 4952 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4953 int elem_per_lane = 16/type2aelembytes(elem_bt); 4954 int log2epr = log2(elem_per_lane); 4955 4956 assert(is_integral_type(elem_bt), ""); 4957 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4958 4959 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4960 uint y_idx = ($idx$$constant >> log2epr) & 3; 4961 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4962 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4963 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4964 %} 4965 ins_pipe( pipe_slow ); 4966 %} 4967 4968 #ifdef _LP64 4969 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4970 predicate(Matcher::vector_length(n) == 2); 4971 match(Set dst (VectorInsert (Binary dst val) idx)); 4972 format %{ "vector_insert $dst,$val,$idx" %} 4973 ins_encode %{ 4974 assert(UseSSE >= 4, "required"); 4975 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4976 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4977 4978 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4979 %} 4980 ins_pipe( pipe_slow ); 4981 %} 4982 4983 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4984 predicate(Matcher::vector_length(n) == 4); 4985 match(Set dst (VectorInsert (Binary src val) idx)); 4986 effect(TEMP vtmp); 4987 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4988 ins_encode %{ 4989 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4990 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4991 4992 uint x_idx = $idx$$constant & right_n_bits(1); 4993 uint y_idx = ($idx$$constant >> 1) & 1; 4994 int vlen_enc = Assembler::AVX_256bit; 4995 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4996 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4997 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4998 %} 4999 ins_pipe( pipe_slow ); 5000 %} 5001 5002 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 5003 predicate(Matcher::vector_length(n) == 8); 5004 match(Set dst (VectorInsert (Binary src val) idx)); 5005 effect(TEMP vtmp); 5006 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5007 ins_encode %{ 5008 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 5009 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5010 5011 uint x_idx = $idx$$constant & right_n_bits(1); 5012 uint y_idx = ($idx$$constant >> 1) & 3; 5013 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5014 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 5015 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5016 %} 5017 ins_pipe( pipe_slow ); 5018 %} 5019 #endif 5020 5021 instruct insertF(vec dst, regF val, immU8 idx) %{ 5022 predicate(Matcher::vector_length(n) < 8); 5023 match(Set dst (VectorInsert (Binary dst val) idx)); 5024 format %{ "vector_insert $dst,$val,$idx" %} 5025 ins_encode %{ 5026 assert(UseSSE >= 4, "sanity"); 5027 5028 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 5029 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5030 5031 uint x_idx = $idx$$constant & right_n_bits(2); 5032 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5033 %} 5034 ins_pipe( pipe_slow ); 5035 %} 5036 5037 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 5038 predicate(Matcher::vector_length(n) >= 8); 5039 match(Set dst (VectorInsert (Binary src val) idx)); 5040 effect(TEMP vtmp); 5041 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5042 ins_encode %{ 5043 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 5044 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5045 5046 int vlen = Matcher::vector_length(this); 5047 uint x_idx = $idx$$constant & right_n_bits(2); 5048 if (vlen == 8) { 5049 uint y_idx = ($idx$$constant >> 2) & 1; 5050 int vlen_enc = Assembler::AVX_256bit; 5051 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5052 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5053 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5054 } else { 5055 assert(vlen == 16, "sanity"); 5056 uint y_idx = ($idx$$constant >> 2) & 3; 5057 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5058 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5059 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5060 } 5061 %} 5062 ins_pipe( pipe_slow ); 5063 %} 5064 5065 #ifdef _LP64 5066 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 5067 predicate(Matcher::vector_length(n) == 2); 5068 match(Set dst (VectorInsert (Binary dst val) idx)); 5069 effect(TEMP tmp); 5070 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 5071 ins_encode %{ 5072 assert(UseSSE >= 4, "sanity"); 5073 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5074 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5075 5076 __ movq($tmp$$Register, $val$$XMMRegister); 5077 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 5078 %} 5079 ins_pipe( pipe_slow ); 5080 %} 5081 5082 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 5083 predicate(Matcher::vector_length(n) == 4); 5084 match(Set dst (VectorInsert (Binary src val) idx)); 5085 effect(TEMP vtmp, TEMP tmp); 5086 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 5087 ins_encode %{ 5088 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5089 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5090 5091 uint x_idx = $idx$$constant & right_n_bits(1); 5092 uint y_idx = ($idx$$constant >> 1) & 1; 5093 int vlen_enc = Assembler::AVX_256bit; 5094 __ movq($tmp$$Register, $val$$XMMRegister); 5095 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5096 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 5097 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5098 %} 5099 ins_pipe( pipe_slow ); 5100 %} 5101 5102 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 5103 predicate(Matcher::vector_length(n) == 8); 5104 match(Set dst (VectorInsert (Binary src val) idx)); 5105 effect(TEMP tmp, TEMP vtmp); 5106 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5107 ins_encode %{ 5108 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5109 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5110 5111 uint x_idx = $idx$$constant & right_n_bits(1); 5112 uint y_idx = ($idx$$constant >> 1) & 3; 5113 __ movq($tmp$$Register, $val$$XMMRegister); 5114 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5115 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 5116 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5117 %} 5118 ins_pipe( pipe_slow ); 5119 %} 5120 #endif 5121 5122 // ====================REDUCTION ARITHMETIC======================================= 5123 5124 // =======================Int Reduction========================================== 5125 5126 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5127 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 5128 match(Set dst (AddReductionVI src1 src2)); 5129 match(Set dst (MulReductionVI src1 src2)); 5130 match(Set dst (AndReductionV src1 src2)); 5131 match(Set dst ( OrReductionV src1 src2)); 5132 match(Set dst (XorReductionV src1 src2)); 5133 match(Set dst (MinReductionV src1 src2)); 5134 match(Set dst (MaxReductionV src1 src2)); 5135 effect(TEMP vtmp1, TEMP vtmp2); 5136 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5137 ins_encode %{ 5138 int opcode = this->ideal_Opcode(); 5139 int vlen = Matcher::vector_length(this, $src2); 5140 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5141 %} 5142 ins_pipe( pipe_slow ); 5143 %} 5144 5145 // =======================Long Reduction========================================== 5146 5147 #ifdef _LP64 5148 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5149 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 5150 match(Set dst (AddReductionVL src1 src2)); 5151 match(Set dst (MulReductionVL src1 src2)); 5152 match(Set dst (AndReductionV src1 src2)); 5153 match(Set dst ( OrReductionV src1 src2)); 5154 match(Set dst (XorReductionV src1 src2)); 5155 match(Set dst (MinReductionV src1 src2)); 5156 match(Set dst (MaxReductionV src1 src2)); 5157 effect(TEMP vtmp1, TEMP vtmp2); 5158 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5159 ins_encode %{ 5160 int opcode = this->ideal_Opcode(); 5161 int vlen = Matcher::vector_length(this, $src2); 5162 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5163 %} 5164 ins_pipe( pipe_slow ); 5165 %} 5166 5167 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 5168 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 5169 match(Set dst (AddReductionVL src1 src2)); 5170 match(Set dst (MulReductionVL src1 src2)); 5171 match(Set dst (AndReductionV src1 src2)); 5172 match(Set dst ( OrReductionV src1 src2)); 5173 match(Set dst (XorReductionV src1 src2)); 5174 match(Set dst (MinReductionV src1 src2)); 5175 match(Set dst (MaxReductionV src1 src2)); 5176 effect(TEMP vtmp1, TEMP vtmp2); 5177 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5178 ins_encode %{ 5179 int opcode = this->ideal_Opcode(); 5180 int vlen = Matcher::vector_length(this, $src2); 5181 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5182 %} 5183 ins_pipe( pipe_slow ); 5184 %} 5185 #endif // _LP64 5186 5187 // =======================Float Reduction========================================== 5188 5189 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 5190 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src 5191 match(Set dst (AddReductionVF dst src)); 5192 match(Set dst (MulReductionVF dst src)); 5193 effect(TEMP dst, TEMP vtmp); 5194 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 5195 ins_encode %{ 5196 int opcode = this->ideal_Opcode(); 5197 int vlen = Matcher::vector_length(this, $src); 5198 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5199 %} 5200 ins_pipe( pipe_slow ); 5201 %} 5202 5203 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 5204 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5205 match(Set dst (AddReductionVF dst src)); 5206 match(Set dst (MulReductionVF dst src)); 5207 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5208 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5209 ins_encode %{ 5210 int opcode = this->ideal_Opcode(); 5211 int vlen = Matcher::vector_length(this, $src); 5212 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5213 %} 5214 ins_pipe( pipe_slow ); 5215 %} 5216 5217 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5218 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src 5219 match(Set dst (AddReductionVF dst src)); 5220 match(Set dst (MulReductionVF dst src)); 5221 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5222 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5223 ins_encode %{ 5224 int opcode = this->ideal_Opcode(); 5225 int vlen = Matcher::vector_length(this, $src); 5226 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5227 %} 5228 ins_pipe( pipe_slow ); 5229 %} 5230 5231 5232 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{ 5233 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5234 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5235 // src1 contains reduction identity 5236 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5237 match(Set dst (AddReductionVF src1 src2)); 5238 match(Set dst (MulReductionVF src1 src2)); 5239 effect(TEMP dst); 5240 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %} 5241 ins_encode %{ 5242 int opcode = this->ideal_Opcode(); 5243 int vlen = Matcher::vector_length(this, $src2); 5244 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5245 %} 5246 ins_pipe( pipe_slow ); 5247 %} 5248 5249 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{ 5250 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5251 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5252 // src1 contains reduction identity 5253 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5254 match(Set dst (AddReductionVF src1 src2)); 5255 match(Set dst (MulReductionVF src1 src2)); 5256 effect(TEMP dst, TEMP vtmp); 5257 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5258 ins_encode %{ 5259 int opcode = this->ideal_Opcode(); 5260 int vlen = Matcher::vector_length(this, $src2); 5261 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5262 %} 5263 ins_pipe( pipe_slow ); 5264 %} 5265 5266 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{ 5267 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5268 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5269 // src1 contains reduction identity 5270 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5271 match(Set dst (AddReductionVF src1 src2)); 5272 match(Set dst (MulReductionVF src1 src2)); 5273 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5274 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5275 ins_encode %{ 5276 int opcode = this->ideal_Opcode(); 5277 int vlen = Matcher::vector_length(this, $src2); 5278 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5279 %} 5280 ins_pipe( pipe_slow ); 5281 %} 5282 5283 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5284 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5285 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5286 // src1 contains reduction identity 5287 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2 5288 match(Set dst (AddReductionVF src1 src2)); 5289 match(Set dst (MulReductionVF src1 src2)); 5290 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5291 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5292 ins_encode %{ 5293 int opcode = this->ideal_Opcode(); 5294 int vlen = Matcher::vector_length(this, $src2); 5295 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5296 %} 5297 ins_pipe( pipe_slow ); 5298 %} 5299 5300 // =======================Double Reduction========================================== 5301 5302 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 5303 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src 5304 match(Set dst (AddReductionVD dst src)); 5305 match(Set dst (MulReductionVD dst src)); 5306 effect(TEMP dst, TEMP vtmp); 5307 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 5308 ins_encode %{ 5309 int opcode = this->ideal_Opcode(); 5310 int vlen = Matcher::vector_length(this, $src); 5311 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5312 %} 5313 ins_pipe( pipe_slow ); 5314 %} 5315 5316 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 5317 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src 5318 match(Set dst (AddReductionVD dst src)); 5319 match(Set dst (MulReductionVD dst src)); 5320 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5321 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5322 ins_encode %{ 5323 int opcode = this->ideal_Opcode(); 5324 int vlen = Matcher::vector_length(this, $src); 5325 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5326 %} 5327 ins_pipe( pipe_slow ); 5328 %} 5329 5330 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5331 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5332 match(Set dst (AddReductionVD dst src)); 5333 match(Set dst (MulReductionVD dst src)); 5334 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5335 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5336 ins_encode %{ 5337 int opcode = this->ideal_Opcode(); 5338 int vlen = Matcher::vector_length(this, $src); 5339 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5340 %} 5341 ins_pipe( pipe_slow ); 5342 %} 5343 5344 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{ 5345 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5346 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5347 // src1 contains reduction identity 5348 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5349 match(Set dst (AddReductionVD src1 src2)); 5350 match(Set dst (MulReductionVD src1 src2)); 5351 effect(TEMP dst); 5352 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %} 5353 ins_encode %{ 5354 int opcode = this->ideal_Opcode(); 5355 int vlen = Matcher::vector_length(this, $src2); 5356 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5357 %} 5358 ins_pipe( pipe_slow ); 5359 %} 5360 5361 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{ 5362 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5363 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5364 // src1 contains reduction identity 5365 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5366 match(Set dst (AddReductionVD src1 src2)); 5367 match(Set dst (MulReductionVD src1 src2)); 5368 effect(TEMP dst, TEMP vtmp); 5369 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5370 ins_encode %{ 5371 int opcode = this->ideal_Opcode(); 5372 int vlen = Matcher::vector_length(this, $src2); 5373 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5374 %} 5375 ins_pipe( pipe_slow ); 5376 %} 5377 5378 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5379 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5380 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5381 // src1 contains reduction identity 5382 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5383 match(Set dst (AddReductionVD src1 src2)); 5384 match(Set dst (MulReductionVD src1 src2)); 5385 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5386 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5387 ins_encode %{ 5388 int opcode = this->ideal_Opcode(); 5389 int vlen = Matcher::vector_length(this, $src2); 5390 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5391 %} 5392 ins_pipe( pipe_slow ); 5393 %} 5394 5395 // =======================Byte Reduction========================================== 5396 5397 #ifdef _LP64 5398 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5399 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5400 match(Set dst (AddReductionVI src1 src2)); 5401 match(Set dst (AndReductionV src1 src2)); 5402 match(Set dst ( OrReductionV src1 src2)); 5403 match(Set dst (XorReductionV src1 src2)); 5404 match(Set dst (MinReductionV src1 src2)); 5405 match(Set dst (MaxReductionV src1 src2)); 5406 effect(TEMP vtmp1, TEMP vtmp2); 5407 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5408 ins_encode %{ 5409 int opcode = this->ideal_Opcode(); 5410 int vlen = Matcher::vector_length(this, $src2); 5411 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5412 %} 5413 ins_pipe( pipe_slow ); 5414 %} 5415 5416 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5417 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5418 match(Set dst (AddReductionVI src1 src2)); 5419 match(Set dst (AndReductionV src1 src2)); 5420 match(Set dst ( OrReductionV src1 src2)); 5421 match(Set dst (XorReductionV src1 src2)); 5422 match(Set dst (MinReductionV src1 src2)); 5423 match(Set dst (MaxReductionV src1 src2)); 5424 effect(TEMP vtmp1, TEMP vtmp2); 5425 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5426 ins_encode %{ 5427 int opcode = this->ideal_Opcode(); 5428 int vlen = Matcher::vector_length(this, $src2); 5429 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5430 %} 5431 ins_pipe( pipe_slow ); 5432 %} 5433 #endif 5434 5435 // =======================Short Reduction========================================== 5436 5437 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5438 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5439 match(Set dst (AddReductionVI src1 src2)); 5440 match(Set dst (MulReductionVI src1 src2)); 5441 match(Set dst (AndReductionV src1 src2)); 5442 match(Set dst ( OrReductionV src1 src2)); 5443 match(Set dst (XorReductionV src1 src2)); 5444 match(Set dst (MinReductionV src1 src2)); 5445 match(Set dst (MaxReductionV src1 src2)); 5446 effect(TEMP vtmp1, TEMP vtmp2); 5447 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5448 ins_encode %{ 5449 int opcode = this->ideal_Opcode(); 5450 int vlen = Matcher::vector_length(this, $src2); 5451 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5452 %} 5453 ins_pipe( pipe_slow ); 5454 %} 5455 5456 // =======================Mul Reduction========================================== 5457 5458 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5459 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5460 Matcher::vector_length(n->in(2)) <= 32); // src2 5461 match(Set dst (MulReductionVI src1 src2)); 5462 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5463 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5464 ins_encode %{ 5465 int opcode = this->ideal_Opcode(); 5466 int vlen = Matcher::vector_length(this, $src2); 5467 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5468 %} 5469 ins_pipe( pipe_slow ); 5470 %} 5471 5472 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5473 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5474 Matcher::vector_length(n->in(2)) == 64); // src2 5475 match(Set dst (MulReductionVI src1 src2)); 5476 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5477 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5478 ins_encode %{ 5479 int opcode = this->ideal_Opcode(); 5480 int vlen = Matcher::vector_length(this, $src2); 5481 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5482 %} 5483 ins_pipe( pipe_slow ); 5484 %} 5485 5486 //--------------------Min/Max Float Reduction -------------------- 5487 // Float Min Reduction 5488 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 5489 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5490 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5491 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5492 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5493 Matcher::vector_length(n->in(2)) == 2); 5494 match(Set dst (MinReductionV src1 src2)); 5495 match(Set dst (MaxReductionV src1 src2)); 5496 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5497 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5498 ins_encode %{ 5499 assert(UseAVX > 0, "sanity"); 5500 5501 int opcode = this->ideal_Opcode(); 5502 int vlen = Matcher::vector_length(this, $src2); 5503 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5504 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5505 %} 5506 ins_pipe( pipe_slow ); 5507 %} 5508 5509 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5510 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5511 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5512 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5513 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5514 Matcher::vector_length(n->in(2)) >= 4); 5515 match(Set dst (MinReductionV src1 src2)); 5516 match(Set dst (MaxReductionV src1 src2)); 5517 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5518 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5519 ins_encode %{ 5520 assert(UseAVX > 0, "sanity"); 5521 5522 int opcode = this->ideal_Opcode(); 5523 int vlen = Matcher::vector_length(this, $src2); 5524 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5525 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5526 %} 5527 ins_pipe( pipe_slow ); 5528 %} 5529 5530 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 5531 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5532 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5533 Matcher::vector_length(n->in(2)) == 2); 5534 match(Set dst (MinReductionV dst src)); 5535 match(Set dst (MaxReductionV dst src)); 5536 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5537 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5538 ins_encode %{ 5539 assert(UseAVX > 0, "sanity"); 5540 5541 int opcode = this->ideal_Opcode(); 5542 int vlen = Matcher::vector_length(this, $src); 5543 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5544 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5545 %} 5546 ins_pipe( pipe_slow ); 5547 %} 5548 5549 5550 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5551 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5552 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5553 Matcher::vector_length(n->in(2)) >= 4); 5554 match(Set dst (MinReductionV dst src)); 5555 match(Set dst (MaxReductionV dst src)); 5556 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5557 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5558 ins_encode %{ 5559 assert(UseAVX > 0, "sanity"); 5560 5561 int opcode = this->ideal_Opcode(); 5562 int vlen = Matcher::vector_length(this, $src); 5563 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5564 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5565 %} 5566 ins_pipe( pipe_slow ); 5567 %} 5568 5569 5570 //--------------------Min Double Reduction -------------------- 5571 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5572 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5573 rFlagsReg cr) %{ 5574 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5575 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5576 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5577 Matcher::vector_length(n->in(2)) == 2); 5578 match(Set dst (MinReductionV src1 src2)); 5579 match(Set dst (MaxReductionV src1 src2)); 5580 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5581 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5582 ins_encode %{ 5583 assert(UseAVX > 0, "sanity"); 5584 5585 int opcode = this->ideal_Opcode(); 5586 int vlen = Matcher::vector_length(this, $src2); 5587 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5588 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5589 %} 5590 ins_pipe( pipe_slow ); 5591 %} 5592 5593 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5594 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5595 rFlagsReg cr) %{ 5596 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5597 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5598 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5599 Matcher::vector_length(n->in(2)) >= 4); 5600 match(Set dst (MinReductionV src1 src2)); 5601 match(Set dst (MaxReductionV src1 src2)); 5602 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5603 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5604 ins_encode %{ 5605 assert(UseAVX > 0, "sanity"); 5606 5607 int opcode = this->ideal_Opcode(); 5608 int vlen = Matcher::vector_length(this, $src2); 5609 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5610 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5611 %} 5612 ins_pipe( pipe_slow ); 5613 %} 5614 5615 5616 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5617 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5618 rFlagsReg cr) %{ 5619 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5620 Matcher::vector_length(n->in(2)) == 2); 5621 match(Set dst (MinReductionV dst src)); 5622 match(Set dst (MaxReductionV dst src)); 5623 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5624 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5625 ins_encode %{ 5626 assert(UseAVX > 0, "sanity"); 5627 5628 int opcode = this->ideal_Opcode(); 5629 int vlen = Matcher::vector_length(this, $src); 5630 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5631 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5632 %} 5633 ins_pipe( pipe_slow ); 5634 %} 5635 5636 instruct minmax_reductionD_av(legRegD dst, legVec src, 5637 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5638 rFlagsReg cr) %{ 5639 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5640 Matcher::vector_length(n->in(2)) >= 4); 5641 match(Set dst (MinReductionV dst src)); 5642 match(Set dst (MaxReductionV dst src)); 5643 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5644 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5645 ins_encode %{ 5646 assert(UseAVX > 0, "sanity"); 5647 5648 int opcode = this->ideal_Opcode(); 5649 int vlen = Matcher::vector_length(this, $src); 5650 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5651 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5652 %} 5653 ins_pipe( pipe_slow ); 5654 %} 5655 5656 // ====================VECTOR ARITHMETIC======================================= 5657 5658 // --------------------------------- ADD -------------------------------------- 5659 5660 // Bytes vector add 5661 instruct vaddB(vec dst, vec src) %{ 5662 predicate(UseAVX == 0); 5663 match(Set dst (AddVB dst src)); 5664 format %{ "paddb $dst,$src\t! add packedB" %} 5665 ins_encode %{ 5666 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5667 %} 5668 ins_pipe( pipe_slow ); 5669 %} 5670 5671 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5672 predicate(UseAVX > 0); 5673 match(Set dst (AddVB src1 src2)); 5674 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5675 ins_encode %{ 5676 int vlen_enc = vector_length_encoding(this); 5677 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5678 %} 5679 ins_pipe( pipe_slow ); 5680 %} 5681 5682 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5683 predicate((UseAVX > 0) && 5684 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5685 match(Set dst (AddVB src (LoadVector mem))); 5686 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5687 ins_encode %{ 5688 int vlen_enc = vector_length_encoding(this); 5689 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5690 %} 5691 ins_pipe( pipe_slow ); 5692 %} 5693 5694 // Shorts/Chars vector add 5695 instruct vaddS(vec dst, vec src) %{ 5696 predicate(UseAVX == 0); 5697 match(Set dst (AddVS dst src)); 5698 format %{ "paddw $dst,$src\t! add packedS" %} 5699 ins_encode %{ 5700 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5701 %} 5702 ins_pipe( pipe_slow ); 5703 %} 5704 5705 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5706 predicate(UseAVX > 0); 5707 match(Set dst (AddVS src1 src2)); 5708 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5709 ins_encode %{ 5710 int vlen_enc = vector_length_encoding(this); 5711 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5712 %} 5713 ins_pipe( pipe_slow ); 5714 %} 5715 5716 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5717 predicate((UseAVX > 0) && 5718 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5719 match(Set dst (AddVS src (LoadVector mem))); 5720 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5721 ins_encode %{ 5722 int vlen_enc = vector_length_encoding(this); 5723 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5724 %} 5725 ins_pipe( pipe_slow ); 5726 %} 5727 5728 // Integers vector add 5729 instruct vaddI(vec dst, vec src) %{ 5730 predicate(UseAVX == 0); 5731 match(Set dst (AddVI dst src)); 5732 format %{ "paddd $dst,$src\t! add packedI" %} 5733 ins_encode %{ 5734 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5735 %} 5736 ins_pipe( pipe_slow ); 5737 %} 5738 5739 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5740 predicate(UseAVX > 0); 5741 match(Set dst (AddVI src1 src2)); 5742 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5743 ins_encode %{ 5744 int vlen_enc = vector_length_encoding(this); 5745 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5746 %} 5747 ins_pipe( pipe_slow ); 5748 %} 5749 5750 5751 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5752 predicate((UseAVX > 0) && 5753 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5754 match(Set dst (AddVI src (LoadVector mem))); 5755 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5756 ins_encode %{ 5757 int vlen_enc = vector_length_encoding(this); 5758 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5759 %} 5760 ins_pipe( pipe_slow ); 5761 %} 5762 5763 // Longs vector add 5764 instruct vaddL(vec dst, vec src) %{ 5765 predicate(UseAVX == 0); 5766 match(Set dst (AddVL dst src)); 5767 format %{ "paddq $dst,$src\t! add packedL" %} 5768 ins_encode %{ 5769 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5770 %} 5771 ins_pipe( pipe_slow ); 5772 %} 5773 5774 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5775 predicate(UseAVX > 0); 5776 match(Set dst (AddVL src1 src2)); 5777 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5778 ins_encode %{ 5779 int vlen_enc = vector_length_encoding(this); 5780 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5781 %} 5782 ins_pipe( pipe_slow ); 5783 %} 5784 5785 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5786 predicate((UseAVX > 0) && 5787 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5788 match(Set dst (AddVL src (LoadVector mem))); 5789 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5790 ins_encode %{ 5791 int vlen_enc = vector_length_encoding(this); 5792 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5793 %} 5794 ins_pipe( pipe_slow ); 5795 %} 5796 5797 // Floats vector add 5798 instruct vaddF(vec dst, vec src) %{ 5799 predicate(UseAVX == 0); 5800 match(Set dst (AddVF dst src)); 5801 format %{ "addps $dst,$src\t! add packedF" %} 5802 ins_encode %{ 5803 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5804 %} 5805 ins_pipe( pipe_slow ); 5806 %} 5807 5808 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5809 predicate(UseAVX > 0); 5810 match(Set dst (AddVF src1 src2)); 5811 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5812 ins_encode %{ 5813 int vlen_enc = vector_length_encoding(this); 5814 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5815 %} 5816 ins_pipe( pipe_slow ); 5817 %} 5818 5819 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5820 predicate((UseAVX > 0) && 5821 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5822 match(Set dst (AddVF src (LoadVector mem))); 5823 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5824 ins_encode %{ 5825 int vlen_enc = vector_length_encoding(this); 5826 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5827 %} 5828 ins_pipe( pipe_slow ); 5829 %} 5830 5831 // Doubles vector add 5832 instruct vaddD(vec dst, vec src) %{ 5833 predicate(UseAVX == 0); 5834 match(Set dst (AddVD dst src)); 5835 format %{ "addpd $dst,$src\t! add packedD" %} 5836 ins_encode %{ 5837 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5838 %} 5839 ins_pipe( pipe_slow ); 5840 %} 5841 5842 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5843 predicate(UseAVX > 0); 5844 match(Set dst (AddVD src1 src2)); 5845 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5846 ins_encode %{ 5847 int vlen_enc = vector_length_encoding(this); 5848 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5849 %} 5850 ins_pipe( pipe_slow ); 5851 %} 5852 5853 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5854 predicate((UseAVX > 0) && 5855 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5856 match(Set dst (AddVD src (LoadVector mem))); 5857 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5858 ins_encode %{ 5859 int vlen_enc = vector_length_encoding(this); 5860 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5861 %} 5862 ins_pipe( pipe_slow ); 5863 %} 5864 5865 // --------------------------------- SUB -------------------------------------- 5866 5867 // Bytes vector sub 5868 instruct vsubB(vec dst, vec src) %{ 5869 predicate(UseAVX == 0); 5870 match(Set dst (SubVB dst src)); 5871 format %{ "psubb $dst,$src\t! sub packedB" %} 5872 ins_encode %{ 5873 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5874 %} 5875 ins_pipe( pipe_slow ); 5876 %} 5877 5878 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5879 predicate(UseAVX > 0); 5880 match(Set dst (SubVB src1 src2)); 5881 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5882 ins_encode %{ 5883 int vlen_enc = vector_length_encoding(this); 5884 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5885 %} 5886 ins_pipe( pipe_slow ); 5887 %} 5888 5889 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5890 predicate((UseAVX > 0) && 5891 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5892 match(Set dst (SubVB src (LoadVector mem))); 5893 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5894 ins_encode %{ 5895 int vlen_enc = vector_length_encoding(this); 5896 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5897 %} 5898 ins_pipe( pipe_slow ); 5899 %} 5900 5901 // Shorts/Chars vector sub 5902 instruct vsubS(vec dst, vec src) %{ 5903 predicate(UseAVX == 0); 5904 match(Set dst (SubVS dst src)); 5905 format %{ "psubw $dst,$src\t! sub packedS" %} 5906 ins_encode %{ 5907 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5908 %} 5909 ins_pipe( pipe_slow ); 5910 %} 5911 5912 5913 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5914 predicate(UseAVX > 0); 5915 match(Set dst (SubVS src1 src2)); 5916 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5917 ins_encode %{ 5918 int vlen_enc = vector_length_encoding(this); 5919 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5920 %} 5921 ins_pipe( pipe_slow ); 5922 %} 5923 5924 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5925 predicate((UseAVX > 0) && 5926 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5927 match(Set dst (SubVS src (LoadVector mem))); 5928 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5929 ins_encode %{ 5930 int vlen_enc = vector_length_encoding(this); 5931 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5932 %} 5933 ins_pipe( pipe_slow ); 5934 %} 5935 5936 // Integers vector sub 5937 instruct vsubI(vec dst, vec src) %{ 5938 predicate(UseAVX == 0); 5939 match(Set dst (SubVI dst src)); 5940 format %{ "psubd $dst,$src\t! sub packedI" %} 5941 ins_encode %{ 5942 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5943 %} 5944 ins_pipe( pipe_slow ); 5945 %} 5946 5947 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5948 predicate(UseAVX > 0); 5949 match(Set dst (SubVI src1 src2)); 5950 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5951 ins_encode %{ 5952 int vlen_enc = vector_length_encoding(this); 5953 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5954 %} 5955 ins_pipe( pipe_slow ); 5956 %} 5957 5958 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5959 predicate((UseAVX > 0) && 5960 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5961 match(Set dst (SubVI src (LoadVector mem))); 5962 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5963 ins_encode %{ 5964 int vlen_enc = vector_length_encoding(this); 5965 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5966 %} 5967 ins_pipe( pipe_slow ); 5968 %} 5969 5970 // Longs vector sub 5971 instruct vsubL(vec dst, vec src) %{ 5972 predicate(UseAVX == 0); 5973 match(Set dst (SubVL dst src)); 5974 format %{ "psubq $dst,$src\t! sub packedL" %} 5975 ins_encode %{ 5976 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5977 %} 5978 ins_pipe( pipe_slow ); 5979 %} 5980 5981 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5982 predicate(UseAVX > 0); 5983 match(Set dst (SubVL src1 src2)); 5984 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5985 ins_encode %{ 5986 int vlen_enc = vector_length_encoding(this); 5987 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5988 %} 5989 ins_pipe( pipe_slow ); 5990 %} 5991 5992 5993 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5994 predicate((UseAVX > 0) && 5995 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5996 match(Set dst (SubVL src (LoadVector mem))); 5997 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5998 ins_encode %{ 5999 int vlen_enc = vector_length_encoding(this); 6000 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6001 %} 6002 ins_pipe( pipe_slow ); 6003 %} 6004 6005 // Floats vector sub 6006 instruct vsubF(vec dst, vec src) %{ 6007 predicate(UseAVX == 0); 6008 match(Set dst (SubVF dst src)); 6009 format %{ "subps $dst,$src\t! sub packedF" %} 6010 ins_encode %{ 6011 __ subps($dst$$XMMRegister, $src$$XMMRegister); 6012 %} 6013 ins_pipe( pipe_slow ); 6014 %} 6015 6016 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 6017 predicate(UseAVX > 0); 6018 match(Set dst (SubVF src1 src2)); 6019 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 6020 ins_encode %{ 6021 int vlen_enc = vector_length_encoding(this); 6022 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6023 %} 6024 ins_pipe( pipe_slow ); 6025 %} 6026 6027 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 6028 predicate((UseAVX > 0) && 6029 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6030 match(Set dst (SubVF src (LoadVector mem))); 6031 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 6032 ins_encode %{ 6033 int vlen_enc = vector_length_encoding(this); 6034 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6035 %} 6036 ins_pipe( pipe_slow ); 6037 %} 6038 6039 // Doubles vector sub 6040 instruct vsubD(vec dst, vec src) %{ 6041 predicate(UseAVX == 0); 6042 match(Set dst (SubVD dst src)); 6043 format %{ "subpd $dst,$src\t! sub packedD" %} 6044 ins_encode %{ 6045 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 6046 %} 6047 ins_pipe( pipe_slow ); 6048 %} 6049 6050 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 6051 predicate(UseAVX > 0); 6052 match(Set dst (SubVD src1 src2)); 6053 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 6054 ins_encode %{ 6055 int vlen_enc = vector_length_encoding(this); 6056 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6057 %} 6058 ins_pipe( pipe_slow ); 6059 %} 6060 6061 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 6062 predicate((UseAVX > 0) && 6063 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6064 match(Set dst (SubVD src (LoadVector mem))); 6065 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 6066 ins_encode %{ 6067 int vlen_enc = vector_length_encoding(this); 6068 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6069 %} 6070 ins_pipe( pipe_slow ); 6071 %} 6072 6073 // --------------------------------- MUL -------------------------------------- 6074 6075 // Byte vector mul 6076 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 6077 predicate(Matcher::vector_length_in_bytes(n) <= 8); 6078 match(Set dst (MulVB src1 src2)); 6079 effect(TEMP dst, TEMP xtmp); 6080 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6081 ins_encode %{ 6082 assert(UseSSE > 3, "required"); 6083 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 6084 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 6085 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6086 __ psllw($dst$$XMMRegister, 8); 6087 __ psrlw($dst$$XMMRegister, 8); 6088 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6089 %} 6090 ins_pipe( pipe_slow ); 6091 %} 6092 6093 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 6094 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 6095 match(Set dst (MulVB src1 src2)); 6096 effect(TEMP dst, TEMP xtmp); 6097 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6098 ins_encode %{ 6099 assert(UseSSE > 3, "required"); 6100 // Odd-index elements 6101 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 6102 __ psrlw($dst$$XMMRegister, 8); 6103 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 6104 __ psrlw($xtmp$$XMMRegister, 8); 6105 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6106 __ psllw($dst$$XMMRegister, 8); 6107 // Even-index elements 6108 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6109 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 6110 __ psllw($xtmp$$XMMRegister, 8); 6111 __ psrlw($xtmp$$XMMRegister, 8); 6112 // Combine 6113 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 6114 %} 6115 ins_pipe( pipe_slow ); 6116 %} 6117 6118 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6119 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 6120 match(Set dst (MulVB src1 src2)); 6121 effect(TEMP xtmp1, TEMP xtmp2); 6122 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6123 ins_encode %{ 6124 int vlen_enc = vector_length_encoding(this); 6125 // Odd-index elements 6126 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 6127 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 6128 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6129 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 6130 // Even-index elements 6131 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6132 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6133 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6134 // Combine 6135 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6136 %} 6137 ins_pipe( pipe_slow ); 6138 %} 6139 6140 // Shorts/Chars vector mul 6141 instruct vmulS(vec dst, vec src) %{ 6142 predicate(UseAVX == 0); 6143 match(Set dst (MulVS dst src)); 6144 format %{ "pmullw $dst,$src\t! mul packedS" %} 6145 ins_encode %{ 6146 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6147 %} 6148 ins_pipe( pipe_slow ); 6149 %} 6150 6151 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 6152 predicate(UseAVX > 0); 6153 match(Set dst (MulVS src1 src2)); 6154 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 6155 ins_encode %{ 6156 int vlen_enc = vector_length_encoding(this); 6157 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6158 %} 6159 ins_pipe( pipe_slow ); 6160 %} 6161 6162 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 6163 predicate((UseAVX > 0) && 6164 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6165 match(Set dst (MulVS src (LoadVector mem))); 6166 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 6167 ins_encode %{ 6168 int vlen_enc = vector_length_encoding(this); 6169 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6170 %} 6171 ins_pipe( pipe_slow ); 6172 %} 6173 6174 // Integers vector mul 6175 instruct vmulI(vec dst, vec src) %{ 6176 predicate(UseAVX == 0); 6177 match(Set dst (MulVI dst src)); 6178 format %{ "pmulld $dst,$src\t! mul packedI" %} 6179 ins_encode %{ 6180 assert(UseSSE > 3, "required"); 6181 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6182 %} 6183 ins_pipe( pipe_slow ); 6184 %} 6185 6186 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 6187 predicate(UseAVX > 0); 6188 match(Set dst (MulVI src1 src2)); 6189 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 6190 ins_encode %{ 6191 int vlen_enc = vector_length_encoding(this); 6192 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6193 %} 6194 ins_pipe( pipe_slow ); 6195 %} 6196 6197 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 6198 predicate((UseAVX > 0) && 6199 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6200 match(Set dst (MulVI src (LoadVector mem))); 6201 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 6202 ins_encode %{ 6203 int vlen_enc = vector_length_encoding(this); 6204 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6205 %} 6206 ins_pipe( pipe_slow ); 6207 %} 6208 6209 // Longs vector mul 6210 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 6211 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6212 VM_Version::supports_avx512dq()) || 6213 VM_Version::supports_avx512vldq()); 6214 match(Set dst (MulVL src1 src2)); 6215 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 6216 ins_encode %{ 6217 assert(UseAVX > 2, "required"); 6218 int vlen_enc = vector_length_encoding(this); 6219 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6220 %} 6221 ins_pipe( pipe_slow ); 6222 %} 6223 6224 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 6225 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6226 VM_Version::supports_avx512dq()) || 6227 (Matcher::vector_length_in_bytes(n) > 8 && 6228 VM_Version::supports_avx512vldq())); 6229 match(Set dst (MulVL src (LoadVector mem))); 6230 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 6231 ins_encode %{ 6232 assert(UseAVX > 2, "required"); 6233 int vlen_enc = vector_length_encoding(this); 6234 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6235 %} 6236 ins_pipe( pipe_slow ); 6237 %} 6238 6239 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 6240 predicate(UseAVX == 0); 6241 match(Set dst (MulVL src1 src2)); 6242 effect(TEMP dst, TEMP xtmp); 6243 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6244 ins_encode %{ 6245 assert(VM_Version::supports_sse4_1(), "required"); 6246 // Get the lo-hi products, only the lower 32 bits is in concerns 6247 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 6248 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 6249 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 6250 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 6251 __ psllq($dst$$XMMRegister, 32); 6252 // Get the lo-lo products 6253 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6254 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 6255 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 6256 %} 6257 ins_pipe( pipe_slow ); 6258 %} 6259 6260 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6261 predicate(UseAVX > 0 && 6262 ((Matcher::vector_length_in_bytes(n) == 64 && 6263 !VM_Version::supports_avx512dq()) || 6264 (Matcher::vector_length_in_bytes(n) < 64 && 6265 !VM_Version::supports_avx512vldq()))); 6266 match(Set dst (MulVL src1 src2)); 6267 effect(TEMP xtmp1, TEMP xtmp2); 6268 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6269 ins_encode %{ 6270 int vlen_enc = vector_length_encoding(this); 6271 // Get the lo-hi products, only the lower 32 bits is in concerns 6272 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 6273 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6274 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 6275 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6276 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 6277 // Get the lo-lo products 6278 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6279 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6280 %} 6281 ins_pipe( pipe_slow ); 6282 %} 6283 6284 // Floats vector mul 6285 instruct vmulF(vec dst, vec src) %{ 6286 predicate(UseAVX == 0); 6287 match(Set dst (MulVF dst src)); 6288 format %{ "mulps $dst,$src\t! mul packedF" %} 6289 ins_encode %{ 6290 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6291 %} 6292 ins_pipe( pipe_slow ); 6293 %} 6294 6295 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 6296 predicate(UseAVX > 0); 6297 match(Set dst (MulVF src1 src2)); 6298 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 6299 ins_encode %{ 6300 int vlen_enc = vector_length_encoding(this); 6301 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6302 %} 6303 ins_pipe( pipe_slow ); 6304 %} 6305 6306 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 6307 predicate((UseAVX > 0) && 6308 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6309 match(Set dst (MulVF src (LoadVector mem))); 6310 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 6311 ins_encode %{ 6312 int vlen_enc = vector_length_encoding(this); 6313 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6314 %} 6315 ins_pipe( pipe_slow ); 6316 %} 6317 6318 // Doubles vector mul 6319 instruct vmulD(vec dst, vec src) %{ 6320 predicate(UseAVX == 0); 6321 match(Set dst (MulVD dst src)); 6322 format %{ "mulpd $dst,$src\t! mul packedD" %} 6323 ins_encode %{ 6324 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6325 %} 6326 ins_pipe( pipe_slow ); 6327 %} 6328 6329 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 6330 predicate(UseAVX > 0); 6331 match(Set dst (MulVD src1 src2)); 6332 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 6333 ins_encode %{ 6334 int vlen_enc = vector_length_encoding(this); 6335 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6336 %} 6337 ins_pipe( pipe_slow ); 6338 %} 6339 6340 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 6341 predicate((UseAVX > 0) && 6342 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6343 match(Set dst (MulVD src (LoadVector mem))); 6344 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 6345 ins_encode %{ 6346 int vlen_enc = vector_length_encoding(this); 6347 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6348 %} 6349 ins_pipe( pipe_slow ); 6350 %} 6351 6352 // --------------------------------- DIV -------------------------------------- 6353 6354 // Floats vector div 6355 instruct vdivF(vec dst, vec src) %{ 6356 predicate(UseAVX == 0); 6357 match(Set dst (DivVF dst src)); 6358 format %{ "divps $dst,$src\t! div packedF" %} 6359 ins_encode %{ 6360 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6361 %} 6362 ins_pipe( pipe_slow ); 6363 %} 6364 6365 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 6366 predicate(UseAVX > 0); 6367 match(Set dst (DivVF src1 src2)); 6368 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 6369 ins_encode %{ 6370 int vlen_enc = vector_length_encoding(this); 6371 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6372 %} 6373 ins_pipe( pipe_slow ); 6374 %} 6375 6376 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6377 predicate((UseAVX > 0) && 6378 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6379 match(Set dst (DivVF src (LoadVector mem))); 6380 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6381 ins_encode %{ 6382 int vlen_enc = vector_length_encoding(this); 6383 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6384 %} 6385 ins_pipe( pipe_slow ); 6386 %} 6387 6388 // Doubles vector div 6389 instruct vdivD(vec dst, vec src) %{ 6390 predicate(UseAVX == 0); 6391 match(Set dst (DivVD dst src)); 6392 format %{ "divpd $dst,$src\t! div packedD" %} 6393 ins_encode %{ 6394 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6395 %} 6396 ins_pipe( pipe_slow ); 6397 %} 6398 6399 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6400 predicate(UseAVX > 0); 6401 match(Set dst (DivVD src1 src2)); 6402 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6403 ins_encode %{ 6404 int vlen_enc = vector_length_encoding(this); 6405 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6406 %} 6407 ins_pipe( pipe_slow ); 6408 %} 6409 6410 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6411 predicate((UseAVX > 0) && 6412 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6413 match(Set dst (DivVD src (LoadVector mem))); 6414 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6415 ins_encode %{ 6416 int vlen_enc = vector_length_encoding(this); 6417 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6418 %} 6419 ins_pipe( pipe_slow ); 6420 %} 6421 6422 // ------------------------------ MinMax --------------------------------------- 6423 6424 // Byte, Short, Int vector Min/Max 6425 instruct minmax_reg_sse(vec dst, vec src) %{ 6426 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6427 UseAVX == 0); 6428 match(Set dst (MinV dst src)); 6429 match(Set dst (MaxV dst src)); 6430 format %{ "vector_minmax $dst,$src\t! " %} 6431 ins_encode %{ 6432 assert(UseSSE >= 4, "required"); 6433 6434 int opcode = this->ideal_Opcode(); 6435 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6436 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6437 %} 6438 ins_pipe( pipe_slow ); 6439 %} 6440 6441 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6442 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6443 UseAVX > 0); 6444 match(Set dst (MinV src1 src2)); 6445 match(Set dst (MaxV src1 src2)); 6446 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6447 ins_encode %{ 6448 int opcode = this->ideal_Opcode(); 6449 int vlen_enc = vector_length_encoding(this); 6450 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6451 6452 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6453 %} 6454 ins_pipe( pipe_slow ); 6455 %} 6456 6457 // Long vector Min/Max 6458 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6459 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6460 UseAVX == 0); 6461 match(Set dst (MinV dst src)); 6462 match(Set dst (MaxV src dst)); 6463 effect(TEMP dst, TEMP tmp); 6464 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6465 ins_encode %{ 6466 assert(UseSSE >= 4, "required"); 6467 6468 int opcode = this->ideal_Opcode(); 6469 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6470 assert(elem_bt == T_LONG, "sanity"); 6471 6472 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6473 %} 6474 ins_pipe( pipe_slow ); 6475 %} 6476 6477 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6478 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6479 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6480 match(Set dst (MinV src1 src2)); 6481 match(Set dst (MaxV src1 src2)); 6482 effect(TEMP dst); 6483 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6484 ins_encode %{ 6485 int vlen_enc = vector_length_encoding(this); 6486 int opcode = this->ideal_Opcode(); 6487 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6488 assert(elem_bt == T_LONG, "sanity"); 6489 6490 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6491 %} 6492 ins_pipe( pipe_slow ); 6493 %} 6494 6495 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6496 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6497 Matcher::vector_element_basic_type(n) == T_LONG); 6498 match(Set dst (MinV src1 src2)); 6499 match(Set dst (MaxV src1 src2)); 6500 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6501 ins_encode %{ 6502 assert(UseAVX > 2, "required"); 6503 6504 int vlen_enc = vector_length_encoding(this); 6505 int opcode = this->ideal_Opcode(); 6506 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6507 assert(elem_bt == T_LONG, "sanity"); 6508 6509 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6510 %} 6511 ins_pipe( pipe_slow ); 6512 %} 6513 6514 // Float/Double vector Min/Max 6515 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6516 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6517 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6518 UseAVX > 0); 6519 match(Set dst (MinV a b)); 6520 match(Set dst (MaxV a b)); 6521 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6522 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6523 ins_encode %{ 6524 assert(UseAVX > 0, "required"); 6525 6526 int opcode = this->ideal_Opcode(); 6527 int vlen_enc = vector_length_encoding(this); 6528 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6529 6530 __ vminmax_fp(opcode, elem_bt, 6531 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6532 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6533 %} 6534 ins_pipe( pipe_slow ); 6535 %} 6536 6537 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6538 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6539 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6540 match(Set dst (MinV a b)); 6541 match(Set dst (MaxV a b)); 6542 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6543 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6544 ins_encode %{ 6545 assert(UseAVX > 2, "required"); 6546 6547 int opcode = this->ideal_Opcode(); 6548 int vlen_enc = vector_length_encoding(this); 6549 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6550 6551 __ evminmax_fp(opcode, elem_bt, 6552 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6553 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6554 %} 6555 ins_pipe( pipe_slow ); 6556 %} 6557 6558 // ------------------------------ Unsigned vector Min/Max ---------------------- 6559 6560 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{ 6561 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6562 match(Set dst (UMinV a b)); 6563 match(Set dst (UMaxV a b)); 6564 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6565 ins_encode %{ 6566 int opcode = this->ideal_Opcode(); 6567 int vlen_enc = vector_length_encoding(this); 6568 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6569 assert(is_integral_type(elem_bt), ""); 6570 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); 6571 %} 6572 ins_pipe( pipe_slow ); 6573 %} 6574 6575 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{ 6576 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6577 match(Set dst (UMinV a (LoadVector b))); 6578 match(Set dst (UMaxV a (LoadVector b))); 6579 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6580 ins_encode %{ 6581 int opcode = this->ideal_Opcode(); 6582 int vlen_enc = vector_length_encoding(this); 6583 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6584 assert(is_integral_type(elem_bt), ""); 6585 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc); 6586 %} 6587 ins_pipe( pipe_slow ); 6588 %} 6589 6590 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{ 6591 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG); 6592 match(Set dst (UMinV a b)); 6593 match(Set dst (UMaxV a b)); 6594 effect(TEMP xtmp1, TEMP xtmp2); 6595 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %} 6596 ins_encode %{ 6597 int opcode = this->ideal_Opcode(); 6598 int vlen_enc = vector_length_encoding(this); 6599 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6600 %} 6601 ins_pipe( pipe_slow ); 6602 %} 6603 6604 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{ 6605 match(Set dst (UMinV (Binary dst src2) mask)); 6606 match(Set dst (UMaxV (Binary dst src2) mask)); 6607 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6608 ins_encode %{ 6609 int vlen_enc = vector_length_encoding(this); 6610 BasicType bt = Matcher::vector_element_basic_type(this); 6611 int opc = this->ideal_Opcode(); 6612 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6613 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 6614 %} 6615 ins_pipe( pipe_slow ); 6616 %} 6617 6618 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{ 6619 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask)); 6620 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask)); 6621 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6622 ins_encode %{ 6623 int vlen_enc = vector_length_encoding(this); 6624 BasicType bt = Matcher::vector_element_basic_type(this); 6625 int opc = this->ideal_Opcode(); 6626 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6627 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 6628 %} 6629 ins_pipe( pipe_slow ); 6630 %} 6631 6632 // --------------------------------- Signum/CopySign --------------------------- 6633 6634 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6635 match(Set dst (SignumF dst (Binary zero one))); 6636 effect(KILL cr); 6637 format %{ "signumF $dst, $dst" %} 6638 ins_encode %{ 6639 int opcode = this->ideal_Opcode(); 6640 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6641 %} 6642 ins_pipe( pipe_slow ); 6643 %} 6644 6645 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6646 match(Set dst (SignumD dst (Binary zero one))); 6647 effect(KILL cr); 6648 format %{ "signumD $dst, $dst" %} 6649 ins_encode %{ 6650 int opcode = this->ideal_Opcode(); 6651 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6652 %} 6653 ins_pipe( pipe_slow ); 6654 %} 6655 6656 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6657 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6658 match(Set dst (SignumVF src (Binary zero one))); 6659 match(Set dst (SignumVD src (Binary zero one))); 6660 effect(TEMP dst, TEMP xtmp1); 6661 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6662 ins_encode %{ 6663 int opcode = this->ideal_Opcode(); 6664 int vec_enc = vector_length_encoding(this); 6665 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6666 $xtmp1$$XMMRegister, vec_enc); 6667 %} 6668 ins_pipe( pipe_slow ); 6669 %} 6670 6671 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6672 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6673 match(Set dst (SignumVF src (Binary zero one))); 6674 match(Set dst (SignumVD src (Binary zero one))); 6675 effect(TEMP dst, TEMP ktmp1); 6676 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6677 ins_encode %{ 6678 int opcode = this->ideal_Opcode(); 6679 int vec_enc = vector_length_encoding(this); 6680 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6681 $ktmp1$$KRegister, vec_enc); 6682 %} 6683 ins_pipe( pipe_slow ); 6684 %} 6685 6686 // --------------------------------------- 6687 // For copySign use 0xE4 as writemask for vpternlog 6688 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6689 // C (xmm2) is set to 0x7FFFFFFF 6690 // Wherever xmm2 is 0, we want to pick from B (sign) 6691 // Wherever xmm2 is 1, we want to pick from A (src) 6692 // 6693 // A B C Result 6694 // 0 0 0 0 6695 // 0 0 1 0 6696 // 0 1 0 1 6697 // 0 1 1 0 6698 // 1 0 0 0 6699 // 1 0 1 1 6700 // 1 1 0 1 6701 // 1 1 1 1 6702 // 6703 // Result going from high bit to low bit is 0x11100100 = 0xe4 6704 // --------------------------------------- 6705 6706 #ifdef _LP64 6707 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6708 match(Set dst (CopySignF dst src)); 6709 effect(TEMP tmp1, TEMP tmp2); 6710 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6711 ins_encode %{ 6712 __ movl($tmp2$$Register, 0x7FFFFFFF); 6713 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6714 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6715 %} 6716 ins_pipe( pipe_slow ); 6717 %} 6718 6719 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6720 match(Set dst (CopySignD dst (Binary src zero))); 6721 ins_cost(100); 6722 effect(TEMP tmp1, TEMP tmp2); 6723 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6724 ins_encode %{ 6725 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6726 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6727 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6728 %} 6729 ins_pipe( pipe_slow ); 6730 %} 6731 6732 #endif // _LP64 6733 6734 //----------------------------- CompressBits/ExpandBits ------------------------ 6735 6736 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6737 predicate(n->bottom_type()->isa_int()); 6738 match(Set dst (CompressBits src mask)); 6739 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6740 ins_encode %{ 6741 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6742 %} 6743 ins_pipe( pipe_slow ); 6744 %} 6745 6746 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6747 predicate(n->bottom_type()->isa_int()); 6748 match(Set dst (ExpandBits src mask)); 6749 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6750 ins_encode %{ 6751 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6752 %} 6753 ins_pipe( pipe_slow ); 6754 %} 6755 6756 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6757 predicate(n->bottom_type()->isa_int()); 6758 match(Set dst (CompressBits src (LoadI mask))); 6759 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6760 ins_encode %{ 6761 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6762 %} 6763 ins_pipe( pipe_slow ); 6764 %} 6765 6766 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6767 predicate(n->bottom_type()->isa_int()); 6768 match(Set dst (ExpandBits src (LoadI mask))); 6769 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6770 ins_encode %{ 6771 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6772 %} 6773 ins_pipe( pipe_slow ); 6774 %} 6775 6776 // --------------------------------- Sqrt -------------------------------------- 6777 6778 instruct vsqrtF_reg(vec dst, vec src) %{ 6779 match(Set dst (SqrtVF src)); 6780 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6781 ins_encode %{ 6782 assert(UseAVX > 0, "required"); 6783 int vlen_enc = vector_length_encoding(this); 6784 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6785 %} 6786 ins_pipe( pipe_slow ); 6787 %} 6788 6789 instruct vsqrtF_mem(vec dst, memory mem) %{ 6790 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6791 match(Set dst (SqrtVF (LoadVector mem))); 6792 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6793 ins_encode %{ 6794 assert(UseAVX > 0, "required"); 6795 int vlen_enc = vector_length_encoding(this); 6796 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6797 %} 6798 ins_pipe( pipe_slow ); 6799 %} 6800 6801 // Floating point vector sqrt 6802 instruct vsqrtD_reg(vec dst, vec src) %{ 6803 match(Set dst (SqrtVD src)); 6804 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6805 ins_encode %{ 6806 assert(UseAVX > 0, "required"); 6807 int vlen_enc = vector_length_encoding(this); 6808 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6809 %} 6810 ins_pipe( pipe_slow ); 6811 %} 6812 6813 instruct vsqrtD_mem(vec dst, memory mem) %{ 6814 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6815 match(Set dst (SqrtVD (LoadVector mem))); 6816 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6817 ins_encode %{ 6818 assert(UseAVX > 0, "required"); 6819 int vlen_enc = vector_length_encoding(this); 6820 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6821 %} 6822 ins_pipe( pipe_slow ); 6823 %} 6824 6825 // ------------------------------ Shift --------------------------------------- 6826 6827 // Left and right shift count vectors are the same on x86 6828 // (only lowest bits of xmm reg are used for count). 6829 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6830 match(Set dst (LShiftCntV cnt)); 6831 match(Set dst (RShiftCntV cnt)); 6832 format %{ "movdl $dst,$cnt\t! load shift count" %} 6833 ins_encode %{ 6834 __ movdl($dst$$XMMRegister, $cnt$$Register); 6835 %} 6836 ins_pipe( pipe_slow ); 6837 %} 6838 6839 // Byte vector shift 6840 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6841 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6842 match(Set dst ( LShiftVB src shift)); 6843 match(Set dst ( RShiftVB src shift)); 6844 match(Set dst (URShiftVB src shift)); 6845 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6846 format %{"vector_byte_shift $dst,$src,$shift" %} 6847 ins_encode %{ 6848 assert(UseSSE > 3, "required"); 6849 int opcode = this->ideal_Opcode(); 6850 bool sign = (opcode != Op_URShiftVB); 6851 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6852 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6853 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6854 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6855 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6856 %} 6857 ins_pipe( pipe_slow ); 6858 %} 6859 6860 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6861 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6862 UseAVX <= 1); 6863 match(Set dst ( LShiftVB src shift)); 6864 match(Set dst ( RShiftVB src shift)); 6865 match(Set dst (URShiftVB src shift)); 6866 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6867 format %{"vector_byte_shift $dst,$src,$shift" %} 6868 ins_encode %{ 6869 assert(UseSSE > 3, "required"); 6870 int opcode = this->ideal_Opcode(); 6871 bool sign = (opcode != Op_URShiftVB); 6872 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6873 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6874 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6875 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6876 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6877 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6878 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6879 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6880 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6881 %} 6882 ins_pipe( pipe_slow ); 6883 %} 6884 6885 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6886 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6887 UseAVX > 1); 6888 match(Set dst ( LShiftVB src shift)); 6889 match(Set dst ( RShiftVB src shift)); 6890 match(Set dst (URShiftVB src shift)); 6891 effect(TEMP dst, TEMP tmp); 6892 format %{"vector_byte_shift $dst,$src,$shift" %} 6893 ins_encode %{ 6894 int opcode = this->ideal_Opcode(); 6895 bool sign = (opcode != Op_URShiftVB); 6896 int vlen_enc = Assembler::AVX_256bit; 6897 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6898 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6899 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6900 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6901 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6902 %} 6903 ins_pipe( pipe_slow ); 6904 %} 6905 6906 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6907 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6908 match(Set dst ( LShiftVB src shift)); 6909 match(Set dst ( RShiftVB src shift)); 6910 match(Set dst (URShiftVB src shift)); 6911 effect(TEMP dst, TEMP tmp); 6912 format %{"vector_byte_shift $dst,$src,$shift" %} 6913 ins_encode %{ 6914 assert(UseAVX > 1, "required"); 6915 int opcode = this->ideal_Opcode(); 6916 bool sign = (opcode != Op_URShiftVB); 6917 int vlen_enc = Assembler::AVX_256bit; 6918 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6919 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6920 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6921 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6922 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6923 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6924 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6925 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6926 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6927 %} 6928 ins_pipe( pipe_slow ); 6929 %} 6930 6931 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6932 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6933 match(Set dst ( LShiftVB src shift)); 6934 match(Set dst (RShiftVB src shift)); 6935 match(Set dst (URShiftVB src shift)); 6936 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6937 format %{"vector_byte_shift $dst,$src,$shift" %} 6938 ins_encode %{ 6939 assert(UseAVX > 2, "required"); 6940 int opcode = this->ideal_Opcode(); 6941 bool sign = (opcode != Op_URShiftVB); 6942 int vlen_enc = Assembler::AVX_512bit; 6943 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6944 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6945 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6946 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6947 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6948 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6949 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6950 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6951 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6952 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6953 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 6954 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6955 %} 6956 ins_pipe( pipe_slow ); 6957 %} 6958 6959 // Shorts vector logical right shift produces incorrect Java result 6960 // for negative data because java code convert short value into int with 6961 // sign extension before a shift. But char vectors are fine since chars are 6962 // unsigned values. 6963 // Shorts/Chars vector left shift 6964 instruct vshiftS(vec dst, vec src, vec shift) %{ 6965 predicate(!n->as_ShiftV()->is_var_shift()); 6966 match(Set dst ( LShiftVS src shift)); 6967 match(Set dst ( RShiftVS src shift)); 6968 match(Set dst (URShiftVS src shift)); 6969 effect(TEMP dst, USE src, USE shift); 6970 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6971 ins_encode %{ 6972 int opcode = this->ideal_Opcode(); 6973 if (UseAVX > 0) { 6974 int vlen_enc = vector_length_encoding(this); 6975 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6976 } else { 6977 int vlen = Matcher::vector_length(this); 6978 if (vlen == 2) { 6979 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6980 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6981 } else if (vlen == 4) { 6982 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6983 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6984 } else { 6985 assert (vlen == 8, "sanity"); 6986 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6987 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6988 } 6989 } 6990 %} 6991 ins_pipe( pipe_slow ); 6992 %} 6993 6994 // Integers vector left shift 6995 instruct vshiftI(vec dst, vec src, vec shift) %{ 6996 predicate(!n->as_ShiftV()->is_var_shift()); 6997 match(Set dst ( LShiftVI src shift)); 6998 match(Set dst ( RShiftVI src shift)); 6999 match(Set dst (URShiftVI src shift)); 7000 effect(TEMP dst, USE src, USE shift); 7001 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 7002 ins_encode %{ 7003 int opcode = this->ideal_Opcode(); 7004 if (UseAVX > 0) { 7005 int vlen_enc = vector_length_encoding(this); 7006 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7007 } else { 7008 int vlen = Matcher::vector_length(this); 7009 if (vlen == 2) { 7010 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7011 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7012 } else { 7013 assert(vlen == 4, "sanity"); 7014 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7015 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7016 } 7017 } 7018 %} 7019 ins_pipe( pipe_slow ); 7020 %} 7021 7022 // Integers vector left constant shift 7023 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 7024 match(Set dst (LShiftVI src (LShiftCntV shift))); 7025 match(Set dst (RShiftVI src (RShiftCntV shift))); 7026 match(Set dst (URShiftVI src (RShiftCntV shift))); 7027 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 7028 ins_encode %{ 7029 int opcode = this->ideal_Opcode(); 7030 if (UseAVX > 0) { 7031 int vector_len = vector_length_encoding(this); 7032 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 7033 } else { 7034 int vlen = Matcher::vector_length(this); 7035 if (vlen == 2) { 7036 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7037 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7038 } else { 7039 assert(vlen == 4, "sanity"); 7040 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7041 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7042 } 7043 } 7044 %} 7045 ins_pipe( pipe_slow ); 7046 %} 7047 7048 // Longs vector shift 7049 instruct vshiftL(vec dst, vec src, vec shift) %{ 7050 predicate(!n->as_ShiftV()->is_var_shift()); 7051 match(Set dst ( LShiftVL src shift)); 7052 match(Set dst (URShiftVL src shift)); 7053 effect(TEMP dst, USE src, USE shift); 7054 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 7055 ins_encode %{ 7056 int opcode = this->ideal_Opcode(); 7057 if (UseAVX > 0) { 7058 int vlen_enc = vector_length_encoding(this); 7059 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7060 } else { 7061 assert(Matcher::vector_length(this) == 2, ""); 7062 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7063 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7064 } 7065 %} 7066 ins_pipe( pipe_slow ); 7067 %} 7068 7069 // Longs vector constant shift 7070 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 7071 match(Set dst (LShiftVL src (LShiftCntV shift))); 7072 match(Set dst (URShiftVL src (RShiftCntV shift))); 7073 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 7074 ins_encode %{ 7075 int opcode = this->ideal_Opcode(); 7076 if (UseAVX > 0) { 7077 int vector_len = vector_length_encoding(this); 7078 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 7079 } else { 7080 assert(Matcher::vector_length(this) == 2, ""); 7081 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7082 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7083 } 7084 %} 7085 ins_pipe( pipe_slow ); 7086 %} 7087 7088 // -------------------ArithmeticRightShift ----------------------------------- 7089 // Long vector arithmetic right shift 7090 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 7091 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 7092 match(Set dst (RShiftVL src shift)); 7093 effect(TEMP dst, TEMP tmp); 7094 format %{ "vshiftq $dst,$src,$shift" %} 7095 ins_encode %{ 7096 uint vlen = Matcher::vector_length(this); 7097 if (vlen == 2) { 7098 assert(UseSSE >= 2, "required"); 7099 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7100 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 7101 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 7102 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 7103 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 7104 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 7105 } else { 7106 assert(vlen == 4, "sanity"); 7107 assert(UseAVX > 1, "required"); 7108 int vlen_enc = Assembler::AVX_256bit; 7109 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7110 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 7111 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7112 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7113 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7114 } 7115 %} 7116 ins_pipe( pipe_slow ); 7117 %} 7118 7119 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 7120 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 7121 match(Set dst (RShiftVL src shift)); 7122 format %{ "vshiftq $dst,$src,$shift" %} 7123 ins_encode %{ 7124 int vlen_enc = vector_length_encoding(this); 7125 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7126 %} 7127 ins_pipe( pipe_slow ); 7128 %} 7129 7130 // ------------------- Variable Shift ----------------------------- 7131 // Byte variable shift 7132 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7133 predicate(Matcher::vector_length(n) <= 8 && 7134 n->as_ShiftV()->is_var_shift() && 7135 !VM_Version::supports_avx512bw()); 7136 match(Set dst ( LShiftVB src shift)); 7137 match(Set dst ( RShiftVB src shift)); 7138 match(Set dst (URShiftVB src shift)); 7139 effect(TEMP dst, TEMP vtmp); 7140 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7141 ins_encode %{ 7142 assert(UseAVX >= 2, "required"); 7143 7144 int opcode = this->ideal_Opcode(); 7145 int vlen_enc = Assembler::AVX_128bit; 7146 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7147 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7148 %} 7149 ins_pipe( pipe_slow ); 7150 %} 7151 7152 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7153 predicate(Matcher::vector_length(n) == 16 && 7154 n->as_ShiftV()->is_var_shift() && 7155 !VM_Version::supports_avx512bw()); 7156 match(Set dst ( LShiftVB src shift)); 7157 match(Set dst ( RShiftVB src shift)); 7158 match(Set dst (URShiftVB src shift)); 7159 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7160 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7161 ins_encode %{ 7162 assert(UseAVX >= 2, "required"); 7163 7164 int opcode = this->ideal_Opcode(); 7165 int vlen_enc = Assembler::AVX_128bit; 7166 // Shift lower half and get word result in dst 7167 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7168 7169 // Shift upper half and get word result in vtmp1 7170 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7171 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7172 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7173 7174 // Merge and down convert the two word results to byte in dst 7175 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7176 %} 7177 ins_pipe( pipe_slow ); 7178 %} 7179 7180 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 7181 predicate(Matcher::vector_length(n) == 32 && 7182 n->as_ShiftV()->is_var_shift() && 7183 !VM_Version::supports_avx512bw()); 7184 match(Set dst ( LShiftVB src shift)); 7185 match(Set dst ( RShiftVB src shift)); 7186 match(Set dst (URShiftVB src shift)); 7187 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 7188 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 7189 ins_encode %{ 7190 assert(UseAVX >= 2, "required"); 7191 7192 int opcode = this->ideal_Opcode(); 7193 int vlen_enc = Assembler::AVX_128bit; 7194 // Process lower 128 bits and get result in dst 7195 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7196 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7197 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7198 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7199 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7200 7201 // Process higher 128 bits and get result in vtmp3 7202 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7203 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7204 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 7205 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 7206 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 7207 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7208 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 7209 7210 // Merge the two results in dst 7211 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7212 %} 7213 ins_pipe( pipe_slow ); 7214 %} 7215 7216 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 7217 predicate(Matcher::vector_length(n) <= 32 && 7218 n->as_ShiftV()->is_var_shift() && 7219 VM_Version::supports_avx512bw()); 7220 match(Set dst ( LShiftVB src shift)); 7221 match(Set dst ( RShiftVB src shift)); 7222 match(Set dst (URShiftVB src shift)); 7223 effect(TEMP dst, TEMP vtmp); 7224 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7225 ins_encode %{ 7226 assert(UseAVX > 2, "required"); 7227 7228 int opcode = this->ideal_Opcode(); 7229 int vlen_enc = vector_length_encoding(this); 7230 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7231 %} 7232 ins_pipe( pipe_slow ); 7233 %} 7234 7235 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7236 predicate(Matcher::vector_length(n) == 64 && 7237 n->as_ShiftV()->is_var_shift() && 7238 VM_Version::supports_avx512bw()); 7239 match(Set dst ( LShiftVB src shift)); 7240 match(Set dst ( RShiftVB src shift)); 7241 match(Set dst (URShiftVB src shift)); 7242 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7243 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7244 ins_encode %{ 7245 assert(UseAVX > 2, "required"); 7246 7247 int opcode = this->ideal_Opcode(); 7248 int vlen_enc = Assembler::AVX_256bit; 7249 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7250 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7251 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7252 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7253 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7254 %} 7255 ins_pipe( pipe_slow ); 7256 %} 7257 7258 // Short variable shift 7259 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7260 predicate(Matcher::vector_length(n) <= 8 && 7261 n->as_ShiftV()->is_var_shift() && 7262 !VM_Version::supports_avx512bw()); 7263 match(Set dst ( LShiftVS src shift)); 7264 match(Set dst ( RShiftVS src shift)); 7265 match(Set dst (URShiftVS src shift)); 7266 effect(TEMP dst, TEMP vtmp); 7267 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7268 ins_encode %{ 7269 assert(UseAVX >= 2, "required"); 7270 7271 int opcode = this->ideal_Opcode(); 7272 bool sign = (opcode != Op_URShiftVS); 7273 int vlen_enc = Assembler::AVX_256bit; 7274 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 7275 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 7276 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7277 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7278 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 7279 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7280 %} 7281 ins_pipe( pipe_slow ); 7282 %} 7283 7284 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7285 predicate(Matcher::vector_length(n) == 16 && 7286 n->as_ShiftV()->is_var_shift() && 7287 !VM_Version::supports_avx512bw()); 7288 match(Set dst ( LShiftVS src shift)); 7289 match(Set dst ( RShiftVS src shift)); 7290 match(Set dst (URShiftVS src shift)); 7291 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7292 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7293 ins_encode %{ 7294 assert(UseAVX >= 2, "required"); 7295 7296 int opcode = this->ideal_Opcode(); 7297 bool sign = (opcode != Op_URShiftVS); 7298 int vlen_enc = Assembler::AVX_256bit; 7299 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 7300 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7301 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7302 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7303 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7304 7305 // Shift upper half, with result in dst using vtmp1 as TEMP 7306 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 7307 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 7308 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7309 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7310 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7311 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7312 7313 // Merge lower and upper half result into dst 7314 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7315 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 7316 %} 7317 ins_pipe( pipe_slow ); 7318 %} 7319 7320 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 7321 predicate(n->as_ShiftV()->is_var_shift() && 7322 VM_Version::supports_avx512bw()); 7323 match(Set dst ( LShiftVS src shift)); 7324 match(Set dst ( RShiftVS src shift)); 7325 match(Set dst (URShiftVS src shift)); 7326 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 7327 ins_encode %{ 7328 assert(UseAVX > 2, "required"); 7329 7330 int opcode = this->ideal_Opcode(); 7331 int vlen_enc = vector_length_encoding(this); 7332 if (!VM_Version::supports_avx512vl()) { 7333 vlen_enc = Assembler::AVX_512bit; 7334 } 7335 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7336 %} 7337 ins_pipe( pipe_slow ); 7338 %} 7339 7340 //Integer variable shift 7341 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 7342 predicate(n->as_ShiftV()->is_var_shift()); 7343 match(Set dst ( LShiftVI src shift)); 7344 match(Set dst ( RShiftVI src shift)); 7345 match(Set dst (URShiftVI src shift)); 7346 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 7347 ins_encode %{ 7348 assert(UseAVX >= 2, "required"); 7349 7350 int opcode = this->ideal_Opcode(); 7351 int vlen_enc = vector_length_encoding(this); 7352 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7353 %} 7354 ins_pipe( pipe_slow ); 7355 %} 7356 7357 //Long variable shift 7358 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 7359 predicate(n->as_ShiftV()->is_var_shift()); 7360 match(Set dst ( LShiftVL src shift)); 7361 match(Set dst (URShiftVL src shift)); 7362 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 7363 ins_encode %{ 7364 assert(UseAVX >= 2, "required"); 7365 7366 int opcode = this->ideal_Opcode(); 7367 int vlen_enc = vector_length_encoding(this); 7368 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7369 %} 7370 ins_pipe( pipe_slow ); 7371 %} 7372 7373 //Long variable right shift arithmetic 7374 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 7375 predicate(Matcher::vector_length(n) <= 4 && 7376 n->as_ShiftV()->is_var_shift() && 7377 UseAVX == 2); 7378 match(Set dst (RShiftVL src shift)); 7379 effect(TEMP dst, TEMP vtmp); 7380 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 7381 ins_encode %{ 7382 int opcode = this->ideal_Opcode(); 7383 int vlen_enc = vector_length_encoding(this); 7384 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 7385 $vtmp$$XMMRegister); 7386 %} 7387 ins_pipe( pipe_slow ); 7388 %} 7389 7390 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 7391 predicate(n->as_ShiftV()->is_var_shift() && 7392 UseAVX > 2); 7393 match(Set dst (RShiftVL src shift)); 7394 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 7395 ins_encode %{ 7396 int opcode = this->ideal_Opcode(); 7397 int vlen_enc = vector_length_encoding(this); 7398 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7399 %} 7400 ins_pipe( pipe_slow ); 7401 %} 7402 7403 // --------------------------------- AND -------------------------------------- 7404 7405 instruct vand(vec dst, vec src) %{ 7406 predicate(UseAVX == 0); 7407 match(Set dst (AndV dst src)); 7408 format %{ "pand $dst,$src\t! and vectors" %} 7409 ins_encode %{ 7410 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7411 %} 7412 ins_pipe( pipe_slow ); 7413 %} 7414 7415 instruct vand_reg(vec dst, vec src1, vec src2) %{ 7416 predicate(UseAVX > 0); 7417 match(Set dst (AndV src1 src2)); 7418 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 7419 ins_encode %{ 7420 int vlen_enc = vector_length_encoding(this); 7421 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7422 %} 7423 ins_pipe( pipe_slow ); 7424 %} 7425 7426 instruct vand_mem(vec dst, vec src, memory mem) %{ 7427 predicate((UseAVX > 0) && 7428 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7429 match(Set dst (AndV src (LoadVector mem))); 7430 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 7431 ins_encode %{ 7432 int vlen_enc = vector_length_encoding(this); 7433 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7434 %} 7435 ins_pipe( pipe_slow ); 7436 %} 7437 7438 // --------------------------------- OR --------------------------------------- 7439 7440 instruct vor(vec dst, vec src) %{ 7441 predicate(UseAVX == 0); 7442 match(Set dst (OrV dst src)); 7443 format %{ "por $dst,$src\t! or vectors" %} 7444 ins_encode %{ 7445 __ por($dst$$XMMRegister, $src$$XMMRegister); 7446 %} 7447 ins_pipe( pipe_slow ); 7448 %} 7449 7450 instruct vor_reg(vec dst, vec src1, vec src2) %{ 7451 predicate(UseAVX > 0); 7452 match(Set dst (OrV src1 src2)); 7453 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7454 ins_encode %{ 7455 int vlen_enc = vector_length_encoding(this); 7456 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7457 %} 7458 ins_pipe( pipe_slow ); 7459 %} 7460 7461 instruct vor_mem(vec dst, vec src, memory mem) %{ 7462 predicate((UseAVX > 0) && 7463 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7464 match(Set dst (OrV src (LoadVector mem))); 7465 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7466 ins_encode %{ 7467 int vlen_enc = vector_length_encoding(this); 7468 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7469 %} 7470 ins_pipe( pipe_slow ); 7471 %} 7472 7473 // --------------------------------- XOR -------------------------------------- 7474 7475 instruct vxor(vec dst, vec src) %{ 7476 predicate(UseAVX == 0); 7477 match(Set dst (XorV dst src)); 7478 format %{ "pxor $dst,$src\t! xor vectors" %} 7479 ins_encode %{ 7480 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7481 %} 7482 ins_pipe( pipe_slow ); 7483 %} 7484 7485 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7486 predicate(UseAVX > 0); 7487 match(Set dst (XorV src1 src2)); 7488 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7489 ins_encode %{ 7490 int vlen_enc = vector_length_encoding(this); 7491 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7492 %} 7493 ins_pipe( pipe_slow ); 7494 %} 7495 7496 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7497 predicate((UseAVX > 0) && 7498 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7499 match(Set dst (XorV src (LoadVector mem))); 7500 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7501 ins_encode %{ 7502 int vlen_enc = vector_length_encoding(this); 7503 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7504 %} 7505 ins_pipe( pipe_slow ); 7506 %} 7507 7508 // --------------------------------- VectorCast -------------------------------------- 7509 7510 instruct vcastBtoX(vec dst, vec src) %{ 7511 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE); 7512 match(Set dst (VectorCastB2X src)); 7513 format %{ "vector_cast_b2x $dst,$src\t!" %} 7514 ins_encode %{ 7515 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7516 int vlen_enc = vector_length_encoding(this); 7517 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7518 %} 7519 ins_pipe( pipe_slow ); 7520 %} 7521 7522 instruct vcastBtoD(legVec dst, legVec src) %{ 7523 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE); 7524 match(Set dst (VectorCastB2X src)); 7525 format %{ "vector_cast_b2x $dst,$src\t!" %} 7526 ins_encode %{ 7527 int vlen_enc = vector_length_encoding(this); 7528 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7529 %} 7530 ins_pipe( pipe_slow ); 7531 %} 7532 7533 instruct castStoX(vec dst, vec src) %{ 7534 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7535 Matcher::vector_length(n->in(1)) <= 8 && // src 7536 Matcher::vector_element_basic_type(n) == T_BYTE); 7537 match(Set dst (VectorCastS2X src)); 7538 format %{ "vector_cast_s2x $dst,$src" %} 7539 ins_encode %{ 7540 assert(UseAVX > 0, "required"); 7541 7542 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7543 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7544 %} 7545 ins_pipe( pipe_slow ); 7546 %} 7547 7548 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7549 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7550 Matcher::vector_length(n->in(1)) == 16 && // src 7551 Matcher::vector_element_basic_type(n) == T_BYTE); 7552 effect(TEMP dst, TEMP vtmp); 7553 match(Set dst (VectorCastS2X src)); 7554 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7555 ins_encode %{ 7556 assert(UseAVX > 0, "required"); 7557 7558 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7559 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7560 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7561 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7562 %} 7563 ins_pipe( pipe_slow ); 7564 %} 7565 7566 instruct vcastStoX_evex(vec dst, vec src) %{ 7567 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7568 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7569 match(Set dst (VectorCastS2X src)); 7570 format %{ "vector_cast_s2x $dst,$src\t!" %} 7571 ins_encode %{ 7572 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7573 int src_vlen_enc = vector_length_encoding(this, $src); 7574 int vlen_enc = vector_length_encoding(this); 7575 switch (to_elem_bt) { 7576 case T_BYTE: 7577 if (!VM_Version::supports_avx512vl()) { 7578 vlen_enc = Assembler::AVX_512bit; 7579 } 7580 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7581 break; 7582 case T_INT: 7583 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7584 break; 7585 case T_FLOAT: 7586 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7587 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7588 break; 7589 case T_LONG: 7590 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7591 break; 7592 case T_DOUBLE: { 7593 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7594 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7595 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7596 break; 7597 } 7598 default: 7599 ShouldNotReachHere(); 7600 } 7601 %} 7602 ins_pipe( pipe_slow ); 7603 %} 7604 7605 instruct castItoX(vec dst, vec src) %{ 7606 predicate(UseAVX <= 2 && 7607 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7608 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7609 match(Set dst (VectorCastI2X src)); 7610 format %{ "vector_cast_i2x $dst,$src" %} 7611 ins_encode %{ 7612 assert(UseAVX > 0, "required"); 7613 7614 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7615 int vlen_enc = vector_length_encoding(this, $src); 7616 7617 if (to_elem_bt == T_BYTE) { 7618 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7619 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7620 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7621 } else { 7622 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7623 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7624 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7625 } 7626 %} 7627 ins_pipe( pipe_slow ); 7628 %} 7629 7630 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7631 predicate(UseAVX <= 2 && 7632 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7633 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7634 match(Set dst (VectorCastI2X src)); 7635 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7636 effect(TEMP dst, TEMP vtmp); 7637 ins_encode %{ 7638 assert(UseAVX > 0, "required"); 7639 7640 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7641 int vlen_enc = vector_length_encoding(this, $src); 7642 7643 if (to_elem_bt == T_BYTE) { 7644 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7645 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7646 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7647 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7648 } else { 7649 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7650 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7651 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7652 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7653 } 7654 %} 7655 ins_pipe( pipe_slow ); 7656 %} 7657 7658 instruct vcastItoX_evex(vec dst, vec src) %{ 7659 predicate(UseAVX > 2 || 7660 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7661 match(Set dst (VectorCastI2X src)); 7662 format %{ "vector_cast_i2x $dst,$src\t!" %} 7663 ins_encode %{ 7664 assert(UseAVX > 0, "required"); 7665 7666 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7667 int src_vlen_enc = vector_length_encoding(this, $src); 7668 int dst_vlen_enc = vector_length_encoding(this); 7669 switch (dst_elem_bt) { 7670 case T_BYTE: 7671 if (!VM_Version::supports_avx512vl()) { 7672 src_vlen_enc = Assembler::AVX_512bit; 7673 } 7674 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7675 break; 7676 case T_SHORT: 7677 if (!VM_Version::supports_avx512vl()) { 7678 src_vlen_enc = Assembler::AVX_512bit; 7679 } 7680 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7681 break; 7682 case T_FLOAT: 7683 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7684 break; 7685 case T_LONG: 7686 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7687 break; 7688 case T_DOUBLE: 7689 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7690 break; 7691 default: 7692 ShouldNotReachHere(); 7693 } 7694 %} 7695 ins_pipe( pipe_slow ); 7696 %} 7697 7698 instruct vcastLtoBS(vec dst, vec src) %{ 7699 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7700 UseAVX <= 2); 7701 match(Set dst (VectorCastL2X src)); 7702 format %{ "vector_cast_l2x $dst,$src" %} 7703 ins_encode %{ 7704 assert(UseAVX > 0, "required"); 7705 7706 int vlen = Matcher::vector_length_in_bytes(this, $src); 7707 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7708 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7709 : ExternalAddress(vector_int_to_short_mask()); 7710 if (vlen <= 16) { 7711 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7712 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7713 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7714 } else { 7715 assert(vlen <= 32, "required"); 7716 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7717 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7718 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7719 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7720 } 7721 if (to_elem_bt == T_BYTE) { 7722 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7723 } 7724 %} 7725 ins_pipe( pipe_slow ); 7726 %} 7727 7728 instruct vcastLtoX_evex(vec dst, vec src) %{ 7729 predicate(UseAVX > 2 || 7730 (Matcher::vector_element_basic_type(n) == T_INT || 7731 Matcher::vector_element_basic_type(n) == T_FLOAT || 7732 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7733 match(Set dst (VectorCastL2X src)); 7734 format %{ "vector_cast_l2x $dst,$src\t!" %} 7735 ins_encode %{ 7736 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7737 int vlen = Matcher::vector_length_in_bytes(this, $src); 7738 int vlen_enc = vector_length_encoding(this, $src); 7739 switch (to_elem_bt) { 7740 case T_BYTE: 7741 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7742 vlen_enc = Assembler::AVX_512bit; 7743 } 7744 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7745 break; 7746 case T_SHORT: 7747 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7748 vlen_enc = Assembler::AVX_512bit; 7749 } 7750 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7751 break; 7752 case T_INT: 7753 if (vlen == 8) { 7754 if ($dst$$XMMRegister != $src$$XMMRegister) { 7755 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7756 } 7757 } else if (vlen == 16) { 7758 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7759 } else if (vlen == 32) { 7760 if (UseAVX > 2) { 7761 if (!VM_Version::supports_avx512vl()) { 7762 vlen_enc = Assembler::AVX_512bit; 7763 } 7764 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7765 } else { 7766 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7767 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7768 } 7769 } else { // vlen == 64 7770 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7771 } 7772 break; 7773 case T_FLOAT: 7774 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7775 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7776 break; 7777 case T_DOUBLE: 7778 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7779 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7780 break; 7781 7782 default: assert(false, "%s", type2name(to_elem_bt)); 7783 } 7784 %} 7785 ins_pipe( pipe_slow ); 7786 %} 7787 7788 instruct vcastFtoD_reg(vec dst, vec src) %{ 7789 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7790 match(Set dst (VectorCastF2X src)); 7791 format %{ "vector_cast_f2d $dst,$src\t!" %} 7792 ins_encode %{ 7793 int vlen_enc = vector_length_encoding(this); 7794 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7795 %} 7796 ins_pipe( pipe_slow ); 7797 %} 7798 7799 7800 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7801 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7802 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7803 match(Set dst (VectorCastF2X src)); 7804 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7805 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7806 ins_encode %{ 7807 int vlen_enc = vector_length_encoding(this, $src); 7808 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7809 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7810 // 32 bit addresses for register indirect addressing mode since stub constants 7811 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7812 // However, targets are free to increase this limit, but having a large code cache size 7813 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7814 // cap we save a temporary register allocation which in limiting case can prevent 7815 // spilling in high register pressure blocks. 7816 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7817 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7818 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7819 %} 7820 ins_pipe( pipe_slow ); 7821 %} 7822 7823 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7824 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7825 is_integral_type(Matcher::vector_element_basic_type(n))); 7826 match(Set dst (VectorCastF2X src)); 7827 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7828 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7829 ins_encode %{ 7830 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7831 if (to_elem_bt == T_LONG) { 7832 int vlen_enc = vector_length_encoding(this); 7833 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7834 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7835 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7836 } else { 7837 int vlen_enc = vector_length_encoding(this, $src); 7838 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7839 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7840 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7841 } 7842 %} 7843 ins_pipe( pipe_slow ); 7844 %} 7845 7846 instruct vcastDtoF_reg(vec dst, vec src) %{ 7847 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7848 match(Set dst (VectorCastD2X src)); 7849 format %{ "vector_cast_d2x $dst,$src\t!" %} 7850 ins_encode %{ 7851 int vlen_enc = vector_length_encoding(this, $src); 7852 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7853 %} 7854 ins_pipe( pipe_slow ); 7855 %} 7856 7857 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7858 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7859 is_integral_type(Matcher::vector_element_basic_type(n))); 7860 match(Set dst (VectorCastD2X src)); 7861 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7862 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7863 ins_encode %{ 7864 int vlen_enc = vector_length_encoding(this, $src); 7865 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7866 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7867 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7868 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7869 %} 7870 ins_pipe( pipe_slow ); 7871 %} 7872 7873 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7874 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7875 is_integral_type(Matcher::vector_element_basic_type(n))); 7876 match(Set dst (VectorCastD2X src)); 7877 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7878 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7879 ins_encode %{ 7880 int vlen_enc = vector_length_encoding(this, $src); 7881 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7882 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7883 ExternalAddress(vector_float_signflip()); 7884 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7885 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7886 %} 7887 ins_pipe( pipe_slow ); 7888 %} 7889 7890 instruct vucast(vec dst, vec src) %{ 7891 match(Set dst (VectorUCastB2X src)); 7892 match(Set dst (VectorUCastS2X src)); 7893 match(Set dst (VectorUCastI2X src)); 7894 format %{ "vector_ucast $dst,$src\t!" %} 7895 ins_encode %{ 7896 assert(UseAVX > 0, "required"); 7897 7898 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7899 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7900 int vlen_enc = vector_length_encoding(this); 7901 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7902 %} 7903 ins_pipe( pipe_slow ); 7904 %} 7905 7906 #ifdef _LP64 7907 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7908 predicate(!VM_Version::supports_avx512vl() && 7909 Matcher::vector_length_in_bytes(n) < 64 && 7910 Matcher::vector_element_basic_type(n) == T_INT); 7911 match(Set dst (RoundVF src)); 7912 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7913 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7914 ins_encode %{ 7915 int vlen_enc = vector_length_encoding(this); 7916 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7917 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7918 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7919 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7920 %} 7921 ins_pipe( pipe_slow ); 7922 %} 7923 7924 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7925 predicate((VM_Version::supports_avx512vl() || 7926 Matcher::vector_length_in_bytes(n) == 64) && 7927 Matcher::vector_element_basic_type(n) == T_INT); 7928 match(Set dst (RoundVF src)); 7929 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7930 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7931 ins_encode %{ 7932 int vlen_enc = vector_length_encoding(this); 7933 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7934 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7935 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7936 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7937 %} 7938 ins_pipe( pipe_slow ); 7939 %} 7940 7941 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7942 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7943 match(Set dst (RoundVD src)); 7944 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7945 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7946 ins_encode %{ 7947 int vlen_enc = vector_length_encoding(this); 7948 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7949 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 7950 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 7951 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7952 %} 7953 ins_pipe( pipe_slow ); 7954 %} 7955 7956 #endif // _LP64 7957 7958 // --------------------------------- VectorMaskCmp -------------------------------------- 7959 7960 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7961 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7962 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7963 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7964 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7965 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7966 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7967 ins_encode %{ 7968 int vlen_enc = vector_length_encoding(this, $src1); 7969 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7970 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7971 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7972 } else { 7973 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7974 } 7975 %} 7976 ins_pipe( pipe_slow ); 7977 %} 7978 7979 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7980 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7981 n->bottom_type()->isa_vectmask() == nullptr && 7982 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7983 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7984 effect(TEMP ktmp); 7985 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7986 ins_encode %{ 7987 int vlen_enc = Assembler::AVX_512bit; 7988 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7989 KRegister mask = k0; // The comparison itself is not being masked. 7990 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7991 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7992 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7993 } else { 7994 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7995 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7996 } 7997 %} 7998 ins_pipe( pipe_slow ); 7999 %} 8000 8001 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 8002 predicate(n->bottom_type()->isa_vectmask() && 8003 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 8004 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8005 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 8006 ins_encode %{ 8007 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 8008 int vlen_enc = vector_length_encoding(this, $src1); 8009 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 8010 KRegister mask = k0; // The comparison itself is not being masked. 8011 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 8012 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8013 } else { 8014 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8015 } 8016 %} 8017 ins_pipe( pipe_slow ); 8018 %} 8019 8020 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 8021 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8022 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8023 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8024 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8025 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 8026 (n->in(2)->get_int() == BoolTest::eq || 8027 n->in(2)->get_int() == BoolTest::lt || 8028 n->in(2)->get_int() == BoolTest::gt)); // cond 8029 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8030 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 8031 ins_encode %{ 8032 int vlen_enc = vector_length_encoding(this, $src1); 8033 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8034 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8035 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 8036 %} 8037 ins_pipe( pipe_slow ); 8038 %} 8039 8040 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 8041 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8042 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8043 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8044 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8045 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 8046 (n->in(2)->get_int() == BoolTest::ne || 8047 n->in(2)->get_int() == BoolTest::le || 8048 n->in(2)->get_int() == BoolTest::ge)); // cond 8049 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8050 effect(TEMP dst, TEMP xtmp); 8051 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 8052 ins_encode %{ 8053 int vlen_enc = vector_length_encoding(this, $src1); 8054 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8055 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8056 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 8057 %} 8058 ins_pipe( pipe_slow ); 8059 %} 8060 8061 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 8062 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8063 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8064 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8065 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8066 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8067 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8068 effect(TEMP dst, TEMP xtmp); 8069 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 8070 ins_encode %{ 8071 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 8072 int vlen_enc = vector_length_encoding(this, $src1); 8073 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8074 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8075 8076 if (vlen_enc == Assembler::AVX_128bit) { 8077 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 8078 } else { 8079 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 8080 } 8081 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8082 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8083 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 8084 %} 8085 ins_pipe( pipe_slow ); 8086 %} 8087 8088 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 8089 predicate((n->bottom_type()->isa_vectmask() == nullptr && 8090 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 8091 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8092 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8093 effect(TEMP ktmp); 8094 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 8095 ins_encode %{ 8096 assert(UseAVX > 2, "required"); 8097 8098 int vlen_enc = vector_length_encoding(this, $src1); 8099 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8100 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8101 KRegister mask = k0; // The comparison itself is not being masked. 8102 bool merge = false; 8103 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8104 8105 switch (src1_elem_bt) { 8106 case T_INT: { 8107 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8108 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8109 break; 8110 } 8111 case T_LONG: { 8112 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8113 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8114 break; 8115 } 8116 default: assert(false, "%s", type2name(src1_elem_bt)); 8117 } 8118 %} 8119 ins_pipe( pipe_slow ); 8120 %} 8121 8122 8123 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 8124 predicate(n->bottom_type()->isa_vectmask() && 8125 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8126 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8127 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 8128 ins_encode %{ 8129 assert(UseAVX > 2, "required"); 8130 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 8131 8132 int vlen_enc = vector_length_encoding(this, $src1); 8133 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8134 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8135 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8136 8137 // Comparison i 8138 switch (src1_elem_bt) { 8139 case T_BYTE: { 8140 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8141 break; 8142 } 8143 case T_SHORT: { 8144 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8145 break; 8146 } 8147 case T_INT: { 8148 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8149 break; 8150 } 8151 case T_LONG: { 8152 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8153 break; 8154 } 8155 default: assert(false, "%s", type2name(src1_elem_bt)); 8156 } 8157 %} 8158 ins_pipe( pipe_slow ); 8159 %} 8160 8161 // Extract 8162 8163 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 8164 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 8165 match(Set dst (ExtractI src idx)); 8166 match(Set dst (ExtractS src idx)); 8167 #ifdef _LP64 8168 match(Set dst (ExtractB src idx)); 8169 #endif 8170 format %{ "extractI $dst,$src,$idx\t!" %} 8171 ins_encode %{ 8172 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8173 8174 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8175 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8176 %} 8177 ins_pipe( pipe_slow ); 8178 %} 8179 8180 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 8181 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 8182 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 8183 match(Set dst (ExtractI src idx)); 8184 match(Set dst (ExtractS src idx)); 8185 #ifdef _LP64 8186 match(Set dst (ExtractB src idx)); 8187 #endif 8188 effect(TEMP vtmp); 8189 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 8190 ins_encode %{ 8191 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8192 8193 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8194 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8195 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 8196 %} 8197 ins_pipe( pipe_slow ); 8198 %} 8199 8200 #ifdef _LP64 8201 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 8202 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 8203 match(Set dst (ExtractL src idx)); 8204 format %{ "extractL $dst,$src,$idx\t!" %} 8205 ins_encode %{ 8206 assert(UseSSE >= 4, "required"); 8207 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8208 8209 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8210 %} 8211 ins_pipe( pipe_slow ); 8212 %} 8213 8214 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 8215 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8216 Matcher::vector_length(n->in(1)) == 8); // src 8217 match(Set dst (ExtractL src idx)); 8218 effect(TEMP vtmp); 8219 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 8220 ins_encode %{ 8221 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8222 8223 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8224 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 8225 %} 8226 ins_pipe( pipe_slow ); 8227 %} 8228 #endif 8229 8230 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8231 predicate(Matcher::vector_length(n->in(1)) <= 4); 8232 match(Set dst (ExtractF src idx)); 8233 effect(TEMP dst, TEMP vtmp); 8234 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8235 ins_encode %{ 8236 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8237 8238 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 8239 %} 8240 ins_pipe( pipe_slow ); 8241 %} 8242 8243 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8244 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 8245 Matcher::vector_length(n->in(1)/*src*/) == 16); 8246 match(Set dst (ExtractF src idx)); 8247 effect(TEMP vtmp); 8248 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8249 ins_encode %{ 8250 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8251 8252 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8253 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 8254 %} 8255 ins_pipe( pipe_slow ); 8256 %} 8257 8258 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 8259 predicate(Matcher::vector_length(n->in(1)) == 2); // src 8260 match(Set dst (ExtractD src idx)); 8261 format %{ "extractD $dst,$src,$idx\t!" %} 8262 ins_encode %{ 8263 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8264 8265 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8266 %} 8267 ins_pipe( pipe_slow ); 8268 %} 8269 8270 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 8271 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8272 Matcher::vector_length(n->in(1)) == 8); // src 8273 match(Set dst (ExtractD src idx)); 8274 effect(TEMP vtmp); 8275 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 8276 ins_encode %{ 8277 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8278 8279 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8280 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 8281 %} 8282 ins_pipe( pipe_slow ); 8283 %} 8284 8285 // --------------------------------- Vector Blend -------------------------------------- 8286 8287 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 8288 predicate(UseAVX == 0); 8289 match(Set dst (VectorBlend (Binary dst src) mask)); 8290 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 8291 effect(TEMP tmp); 8292 ins_encode %{ 8293 assert(UseSSE >= 4, "required"); 8294 8295 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 8296 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 8297 } 8298 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 8299 %} 8300 ins_pipe( pipe_slow ); 8301 %} 8302 8303 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8304 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8305 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8306 Matcher::vector_length_in_bytes(n) <= 32 && 8307 is_integral_type(Matcher::vector_element_basic_type(n))); 8308 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8309 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8310 ins_encode %{ 8311 int vlen_enc = vector_length_encoding(this); 8312 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8313 %} 8314 ins_pipe( pipe_slow ); 8315 %} 8316 8317 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8318 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8319 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8320 Matcher::vector_length_in_bytes(n) <= 32 && 8321 !is_integral_type(Matcher::vector_element_basic_type(n))); 8322 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8323 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8324 ins_encode %{ 8325 int vlen_enc = vector_length_encoding(this); 8326 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8327 %} 8328 ins_pipe( pipe_slow ); 8329 %} 8330 8331 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ 8332 predicate(UseAVX > 0 && EnableX86ECoreOpts && 8333 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8334 Matcher::vector_length_in_bytes(n) <= 32); 8335 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8336 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} 8337 effect(TEMP vtmp, TEMP dst); 8338 ins_encode %{ 8339 int vlen_enc = vector_length_encoding(this); 8340 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8341 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8342 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8343 %} 8344 ins_pipe( pipe_slow ); 8345 %} 8346 8347 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 8348 predicate(Matcher::vector_length_in_bytes(n) == 64 && 8349 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 8350 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8351 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8352 effect(TEMP ktmp); 8353 ins_encode %{ 8354 int vlen_enc = Assembler::AVX_512bit; 8355 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8356 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 8357 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8358 %} 8359 ins_pipe( pipe_slow ); 8360 %} 8361 8362 8363 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 8364 predicate(n->in(2)->bottom_type()->isa_vectmask() && 8365 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 8366 VM_Version::supports_avx512bw())); 8367 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8368 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8369 ins_encode %{ 8370 int vlen_enc = vector_length_encoding(this); 8371 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8372 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8373 %} 8374 ins_pipe( pipe_slow ); 8375 %} 8376 8377 // --------------------------------- ABS -------------------------------------- 8378 // a = |a| 8379 instruct vabsB_reg(vec dst, vec src) %{ 8380 match(Set dst (AbsVB src)); 8381 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 8382 ins_encode %{ 8383 uint vlen = Matcher::vector_length(this); 8384 if (vlen <= 16) { 8385 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8386 } else { 8387 int vlen_enc = vector_length_encoding(this); 8388 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8389 } 8390 %} 8391 ins_pipe( pipe_slow ); 8392 %} 8393 8394 instruct vabsS_reg(vec dst, vec src) %{ 8395 match(Set dst (AbsVS src)); 8396 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 8397 ins_encode %{ 8398 uint vlen = Matcher::vector_length(this); 8399 if (vlen <= 8) { 8400 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8401 } else { 8402 int vlen_enc = vector_length_encoding(this); 8403 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8404 } 8405 %} 8406 ins_pipe( pipe_slow ); 8407 %} 8408 8409 instruct vabsI_reg(vec dst, vec src) %{ 8410 match(Set dst (AbsVI src)); 8411 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 8412 ins_encode %{ 8413 uint vlen = Matcher::vector_length(this); 8414 if (vlen <= 4) { 8415 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8416 } else { 8417 int vlen_enc = vector_length_encoding(this); 8418 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8419 } 8420 %} 8421 ins_pipe( pipe_slow ); 8422 %} 8423 8424 instruct vabsL_reg(vec dst, vec src) %{ 8425 match(Set dst (AbsVL src)); 8426 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 8427 ins_encode %{ 8428 assert(UseAVX > 2, "required"); 8429 int vlen_enc = vector_length_encoding(this); 8430 if (!VM_Version::supports_avx512vl()) { 8431 vlen_enc = Assembler::AVX_512bit; 8432 } 8433 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8434 %} 8435 ins_pipe( pipe_slow ); 8436 %} 8437 8438 // --------------------------------- ABSNEG -------------------------------------- 8439 8440 instruct vabsnegF(vec dst, vec src) %{ 8441 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 8442 match(Set dst (AbsVF src)); 8443 match(Set dst (NegVF src)); 8444 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 8445 ins_cost(150); 8446 ins_encode %{ 8447 int opcode = this->ideal_Opcode(); 8448 int vlen = Matcher::vector_length(this); 8449 if (vlen == 2) { 8450 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8451 } else { 8452 assert(vlen == 8 || vlen == 16, "required"); 8453 int vlen_enc = vector_length_encoding(this); 8454 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8455 } 8456 %} 8457 ins_pipe( pipe_slow ); 8458 %} 8459 8460 instruct vabsneg4F(vec dst) %{ 8461 predicate(Matcher::vector_length(n) == 4); 8462 match(Set dst (AbsVF dst)); 8463 match(Set dst (NegVF dst)); 8464 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 8465 ins_cost(150); 8466 ins_encode %{ 8467 int opcode = this->ideal_Opcode(); 8468 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 8469 %} 8470 ins_pipe( pipe_slow ); 8471 %} 8472 8473 instruct vabsnegD(vec dst, vec src) %{ 8474 match(Set dst (AbsVD src)); 8475 match(Set dst (NegVD src)); 8476 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 8477 ins_encode %{ 8478 int opcode = this->ideal_Opcode(); 8479 uint vlen = Matcher::vector_length(this); 8480 if (vlen == 2) { 8481 assert(UseSSE >= 2, "required"); 8482 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8483 } else { 8484 int vlen_enc = vector_length_encoding(this); 8485 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8486 } 8487 %} 8488 ins_pipe( pipe_slow ); 8489 %} 8490 8491 //------------------------------------- VectorTest -------------------------------------------- 8492 8493 #ifdef _LP64 8494 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8495 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8496 match(Set cr (VectorTest src1 src2)); 8497 effect(TEMP vtmp); 8498 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8499 ins_encode %{ 8500 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8501 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8502 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8503 %} 8504 ins_pipe( pipe_slow ); 8505 %} 8506 8507 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8508 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8509 match(Set cr (VectorTest src1 src2)); 8510 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8511 ins_encode %{ 8512 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8513 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8514 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8515 %} 8516 ins_pipe( pipe_slow ); 8517 %} 8518 8519 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8520 predicate((Matcher::vector_length(n->in(1)) < 8 || 8521 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8522 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8523 match(Set cr (VectorTest src1 src2)); 8524 effect(TEMP tmp); 8525 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8526 ins_encode %{ 8527 uint masklen = Matcher::vector_length(this, $src1); 8528 __ kmovwl($tmp$$Register, $src1$$KRegister); 8529 __ andl($tmp$$Register, (1 << masklen) - 1); 8530 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8531 %} 8532 ins_pipe( pipe_slow ); 8533 %} 8534 8535 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8536 predicate((Matcher::vector_length(n->in(1)) < 8 || 8537 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8538 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8539 match(Set cr (VectorTest src1 src2)); 8540 effect(TEMP tmp); 8541 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8542 ins_encode %{ 8543 uint masklen = Matcher::vector_length(this, $src1); 8544 __ kmovwl($tmp$$Register, $src1$$KRegister); 8545 __ andl($tmp$$Register, (1 << masklen) - 1); 8546 %} 8547 ins_pipe( pipe_slow ); 8548 %} 8549 8550 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8551 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8552 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8553 match(Set cr (VectorTest src1 src2)); 8554 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8555 ins_encode %{ 8556 uint masklen = Matcher::vector_length(this, $src1); 8557 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8558 %} 8559 ins_pipe( pipe_slow ); 8560 %} 8561 #endif 8562 8563 //------------------------------------- LoadMask -------------------------------------------- 8564 8565 instruct loadMask(legVec dst, legVec src) %{ 8566 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8567 match(Set dst (VectorLoadMask src)); 8568 effect(TEMP dst); 8569 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8570 ins_encode %{ 8571 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8572 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8573 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8574 %} 8575 ins_pipe( pipe_slow ); 8576 %} 8577 8578 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8579 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8580 match(Set dst (VectorLoadMask src)); 8581 effect(TEMP xtmp); 8582 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8583 ins_encode %{ 8584 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8585 true, Assembler::AVX_512bit); 8586 %} 8587 ins_pipe( pipe_slow ); 8588 %} 8589 8590 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8591 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8592 match(Set dst (VectorLoadMask src)); 8593 effect(TEMP xtmp); 8594 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8595 ins_encode %{ 8596 int vlen_enc = vector_length_encoding(in(1)); 8597 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8598 false, vlen_enc); 8599 %} 8600 ins_pipe( pipe_slow ); 8601 %} 8602 8603 //------------------------------------- StoreMask -------------------------------------------- 8604 8605 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8606 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8607 match(Set dst (VectorStoreMask src size)); 8608 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8609 ins_encode %{ 8610 int vlen = Matcher::vector_length(this); 8611 if (vlen <= 16 && UseAVX <= 2) { 8612 assert(UseSSE >= 3, "required"); 8613 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8614 } else { 8615 assert(UseAVX > 0, "required"); 8616 int src_vlen_enc = vector_length_encoding(this, $src); 8617 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8618 } 8619 %} 8620 ins_pipe( pipe_slow ); 8621 %} 8622 8623 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8624 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8625 match(Set dst (VectorStoreMask src size)); 8626 effect(TEMP_DEF dst, TEMP xtmp); 8627 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8628 ins_encode %{ 8629 int vlen_enc = Assembler::AVX_128bit; 8630 int vlen = Matcher::vector_length(this); 8631 if (vlen <= 8) { 8632 assert(UseSSE >= 3, "required"); 8633 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8634 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8635 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8636 } else { 8637 assert(UseAVX > 0, "required"); 8638 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8639 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8640 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8641 } 8642 %} 8643 ins_pipe( pipe_slow ); 8644 %} 8645 8646 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8647 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8648 match(Set dst (VectorStoreMask src size)); 8649 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8650 effect(TEMP_DEF dst, TEMP xtmp); 8651 ins_encode %{ 8652 int vlen_enc = Assembler::AVX_128bit; 8653 int vlen = Matcher::vector_length(this); 8654 if (vlen <= 4) { 8655 assert(UseSSE >= 3, "required"); 8656 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8657 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8658 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8659 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8660 } else { 8661 assert(UseAVX > 0, "required"); 8662 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8663 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8664 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8665 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8666 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8667 } 8668 %} 8669 ins_pipe( pipe_slow ); 8670 %} 8671 8672 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8673 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8674 match(Set dst (VectorStoreMask src size)); 8675 effect(TEMP_DEF dst, TEMP xtmp); 8676 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8677 ins_encode %{ 8678 assert(UseSSE >= 3, "required"); 8679 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8680 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8681 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8682 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8683 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8684 %} 8685 ins_pipe( pipe_slow ); 8686 %} 8687 8688 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8689 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8690 match(Set dst (VectorStoreMask src size)); 8691 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8692 effect(TEMP_DEF dst, TEMP vtmp); 8693 ins_encode %{ 8694 int vlen_enc = Assembler::AVX_128bit; 8695 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8696 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8697 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8698 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8699 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8700 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8701 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8702 %} 8703 ins_pipe( pipe_slow ); 8704 %} 8705 8706 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8707 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8708 match(Set dst (VectorStoreMask src size)); 8709 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8710 ins_encode %{ 8711 int src_vlen_enc = vector_length_encoding(this, $src); 8712 int dst_vlen_enc = vector_length_encoding(this); 8713 if (!VM_Version::supports_avx512vl()) { 8714 src_vlen_enc = Assembler::AVX_512bit; 8715 } 8716 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8717 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8718 %} 8719 ins_pipe( pipe_slow ); 8720 %} 8721 8722 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8723 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8724 match(Set dst (VectorStoreMask src size)); 8725 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8726 ins_encode %{ 8727 int src_vlen_enc = vector_length_encoding(this, $src); 8728 int dst_vlen_enc = vector_length_encoding(this); 8729 if (!VM_Version::supports_avx512vl()) { 8730 src_vlen_enc = Assembler::AVX_512bit; 8731 } 8732 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8733 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8734 %} 8735 ins_pipe( pipe_slow ); 8736 %} 8737 8738 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8739 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8740 match(Set dst (VectorStoreMask mask size)); 8741 effect(TEMP_DEF dst); 8742 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8743 ins_encode %{ 8744 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8745 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8746 false, Assembler::AVX_512bit, noreg); 8747 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8748 %} 8749 ins_pipe( pipe_slow ); 8750 %} 8751 8752 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8753 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8754 match(Set dst (VectorStoreMask mask size)); 8755 effect(TEMP_DEF dst); 8756 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8757 ins_encode %{ 8758 int dst_vlen_enc = vector_length_encoding(this); 8759 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8760 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8761 %} 8762 ins_pipe( pipe_slow ); 8763 %} 8764 8765 instruct vmaskcast_evex(kReg dst) %{ 8766 match(Set dst (VectorMaskCast dst)); 8767 ins_cost(0); 8768 format %{ "vector_mask_cast $dst" %} 8769 ins_encode %{ 8770 // empty 8771 %} 8772 ins_pipe(empty); 8773 %} 8774 8775 instruct vmaskcast(vec dst) %{ 8776 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8777 match(Set dst (VectorMaskCast dst)); 8778 ins_cost(0); 8779 format %{ "vector_mask_cast $dst" %} 8780 ins_encode %{ 8781 // empty 8782 %} 8783 ins_pipe(empty); 8784 %} 8785 8786 instruct vmaskcast_avx(vec dst, vec src) %{ 8787 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8788 match(Set dst (VectorMaskCast src)); 8789 format %{ "vector_mask_cast $dst, $src" %} 8790 ins_encode %{ 8791 int vlen = Matcher::vector_length(this); 8792 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8793 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8794 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8795 %} 8796 ins_pipe(pipe_slow); 8797 %} 8798 8799 //-------------------------------- Load Iota Indices ---------------------------------- 8800 8801 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8802 match(Set dst (VectorLoadConst src)); 8803 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8804 ins_encode %{ 8805 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8806 BasicType bt = Matcher::vector_element_basic_type(this); 8807 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8808 %} 8809 ins_pipe( pipe_slow ); 8810 %} 8811 8812 #ifdef _LP64 8813 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8814 match(Set dst (PopulateIndex src1 src2)); 8815 effect(TEMP dst, TEMP vtmp); 8816 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8817 ins_encode %{ 8818 assert($src2$$constant == 1, "required"); 8819 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8820 int vlen_enc = vector_length_encoding(this); 8821 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8822 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8823 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8824 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8825 %} 8826 ins_pipe( pipe_slow ); 8827 %} 8828 8829 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8830 match(Set dst (PopulateIndex src1 src2)); 8831 effect(TEMP dst, TEMP vtmp); 8832 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8833 ins_encode %{ 8834 assert($src2$$constant == 1, "required"); 8835 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8836 int vlen_enc = vector_length_encoding(this); 8837 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8838 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8839 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8840 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8841 %} 8842 ins_pipe( pipe_slow ); 8843 %} 8844 #endif 8845 //-------------------------------- Rearrange ---------------------------------- 8846 8847 // LoadShuffle/Rearrange for Byte 8848 8849 instruct loadShuffleB(vec dst) %{ 8850 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 8851 match(Set dst (VectorLoadShuffle dst)); 8852 format %{ "vector_load_shuffle $dst, $dst" %} 8853 ins_encode %{ 8854 // empty 8855 %} 8856 ins_pipe( pipe_slow ); 8857 %} 8858 8859 instruct rearrangeB(vec dst, vec shuffle) %{ 8860 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8861 Matcher::vector_length(n) < 32); 8862 match(Set dst (VectorRearrange dst shuffle)); 8863 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8864 ins_encode %{ 8865 assert(UseSSE >= 4, "required"); 8866 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8867 %} 8868 ins_pipe( pipe_slow ); 8869 %} 8870 8871 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8872 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8873 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8874 match(Set dst (VectorRearrange src shuffle)); 8875 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8876 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8877 ins_encode %{ 8878 assert(UseAVX >= 2, "required"); 8879 // Swap src into vtmp1 8880 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8881 // Shuffle swapped src to get entries from other 128 bit lane 8882 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8883 // Shuffle original src to get entries from self 128 bit lane 8884 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8885 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8886 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8887 // Perform the blend 8888 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8889 %} 8890 ins_pipe( pipe_slow ); 8891 %} 8892 8893 8894 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8895 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8896 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8897 match(Set dst (VectorRearrange src shuffle)); 8898 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8899 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8900 ins_encode %{ 8901 int vlen_enc = vector_length_encoding(this); 8902 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8903 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8904 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8905 %} 8906 ins_pipe( pipe_slow ); 8907 %} 8908 8909 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8910 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8911 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8912 match(Set dst (VectorRearrange src shuffle)); 8913 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8914 ins_encode %{ 8915 int vlen_enc = vector_length_encoding(this); 8916 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8917 %} 8918 ins_pipe( pipe_slow ); 8919 %} 8920 8921 // LoadShuffle/Rearrange for Short 8922 8923 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8924 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8925 Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS 8926 match(Set dst (VectorLoadShuffle src)); 8927 effect(TEMP dst, TEMP vtmp); 8928 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8929 ins_encode %{ 8930 // Create a byte shuffle mask from short shuffle mask 8931 // only byte shuffle instruction available on these platforms 8932 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8933 if (UseAVX == 0) { 8934 assert(vlen_in_bytes <= 16, "required"); 8935 // Multiply each shuffle by two to get byte index 8936 __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); 8937 __ psllw($vtmp$$XMMRegister, 1); 8938 8939 // Duplicate to create 2 copies of byte index 8940 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8941 __ psllw($dst$$XMMRegister, 8); 8942 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8943 8944 // Add one to get alternate byte index 8945 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8946 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8947 } else { 8948 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8949 int vlen_enc = vector_length_encoding(this); 8950 // Multiply each shuffle by two to get byte index 8951 __ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8952 __ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8953 8954 // Duplicate to create 2 copies of byte index 8955 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8956 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8957 8958 // Add one to get alternate byte index 8959 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 8960 } 8961 %} 8962 ins_pipe( pipe_slow ); 8963 %} 8964 8965 instruct rearrangeS(vec dst, vec shuffle) %{ 8966 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8967 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8968 match(Set dst (VectorRearrange dst shuffle)); 8969 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8970 ins_encode %{ 8971 assert(UseSSE >= 4, "required"); 8972 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8973 %} 8974 ins_pipe( pipe_slow ); 8975 %} 8976 8977 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8978 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8979 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8980 match(Set dst (VectorRearrange src shuffle)); 8981 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8982 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8983 ins_encode %{ 8984 assert(UseAVX >= 2, "required"); 8985 // Swap src into vtmp1 8986 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8987 // Shuffle swapped src to get entries from other 128 bit lane 8988 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8989 // Shuffle original src to get entries from self 128 bit lane 8990 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8991 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8992 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8993 // Perform the blend 8994 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8995 %} 8996 ins_pipe( pipe_slow ); 8997 %} 8998 8999 instruct loadShuffleS_evex(vec dst, vec src) %{ 9000 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 9001 VM_Version::supports_avx512bw()); 9002 match(Set dst (VectorLoadShuffle src)); 9003 format %{ "vector_load_shuffle $dst, $src" %} 9004 ins_encode %{ 9005 int vlen_enc = vector_length_encoding(this); 9006 if (!VM_Version::supports_avx512vl()) { 9007 vlen_enc = Assembler::AVX_512bit; 9008 } 9009 __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9010 %} 9011 ins_pipe( pipe_slow ); 9012 %} 9013 9014 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 9015 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 9016 VM_Version::supports_avx512bw()); 9017 match(Set dst (VectorRearrange src shuffle)); 9018 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9019 ins_encode %{ 9020 int vlen_enc = vector_length_encoding(this); 9021 if (!VM_Version::supports_avx512vl()) { 9022 vlen_enc = Assembler::AVX_512bit; 9023 } 9024 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9025 %} 9026 ins_pipe( pipe_slow ); 9027 %} 9028 9029 // LoadShuffle/Rearrange for Integer and Float 9030 9031 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 9032 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9033 Matcher::vector_length(n) == 4 && UseAVX == 0); 9034 match(Set dst (VectorLoadShuffle src)); 9035 effect(TEMP dst, TEMP vtmp); 9036 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 9037 ins_encode %{ 9038 assert(UseSSE >= 4, "required"); 9039 9040 // Create a byte shuffle mask from int shuffle mask 9041 // only byte shuffle instruction available on these platforms 9042 9043 // Duplicate and multiply each shuffle by 4 9044 __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); 9045 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 9046 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 9047 __ psllw($vtmp$$XMMRegister, 2); 9048 9049 // Duplicate again to create 4 copies of byte index 9050 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 9051 __ psllw($dst$$XMMRegister, 8); 9052 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 9053 9054 // Add 3,2,1,0 to get alternate byte index 9055 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 9056 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 9057 %} 9058 ins_pipe( pipe_slow ); 9059 %} 9060 9061 instruct rearrangeI(vec dst, vec shuffle) %{ 9062 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9063 UseAVX == 0); 9064 match(Set dst (VectorRearrange dst shuffle)); 9065 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 9066 ins_encode %{ 9067 assert(UseSSE >= 4, "required"); 9068 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 9069 %} 9070 ins_pipe( pipe_slow ); 9071 %} 9072 9073 instruct loadShuffleI_avx(vec dst, vec src) %{ 9074 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9075 UseAVX > 0); 9076 match(Set dst (VectorLoadShuffle src)); 9077 format %{ "vector_load_shuffle $dst, $src" %} 9078 ins_encode %{ 9079 int vlen_enc = vector_length_encoding(this); 9080 __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9081 %} 9082 ins_pipe( pipe_slow ); 9083 %} 9084 9085 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 9086 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9087 UseAVX > 0); 9088 match(Set dst (VectorRearrange src shuffle)); 9089 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9090 ins_encode %{ 9091 int vlen_enc = vector_length_encoding(this); 9092 BasicType bt = Matcher::vector_element_basic_type(this); 9093 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9094 %} 9095 ins_pipe( pipe_slow ); 9096 %} 9097 9098 // LoadShuffle/Rearrange for Long and Double 9099 9100 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 9101 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9102 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 9103 match(Set dst (VectorLoadShuffle src)); 9104 effect(TEMP dst, TEMP vtmp); 9105 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 9106 ins_encode %{ 9107 assert(UseAVX >= 2, "required"); 9108 9109 int vlen_enc = vector_length_encoding(this); 9110 // Create a double word shuffle mask from long shuffle mask 9111 // only double word shuffle instruction available on these platforms 9112 9113 // Multiply each shuffle by two to get double word index 9114 __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 9115 __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 9116 9117 // Duplicate each double word shuffle 9118 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 9119 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 9120 9121 // Add one to get alternate double word index 9122 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 9123 %} 9124 ins_pipe( pipe_slow ); 9125 %} 9126 9127 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 9128 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9129 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 9130 match(Set dst (VectorRearrange src shuffle)); 9131 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9132 ins_encode %{ 9133 assert(UseAVX >= 2, "required"); 9134 9135 int vlen_enc = vector_length_encoding(this); 9136 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9137 %} 9138 ins_pipe( pipe_slow ); 9139 %} 9140 9141 instruct loadShuffleL_evex(vec dst, vec src) %{ 9142 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9143 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 9144 match(Set dst (VectorLoadShuffle src)); 9145 format %{ "vector_load_shuffle $dst, $src" %} 9146 ins_encode %{ 9147 assert(UseAVX > 2, "required"); 9148 9149 int vlen_enc = vector_length_encoding(this); 9150 __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9151 %} 9152 ins_pipe( pipe_slow ); 9153 %} 9154 9155 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 9156 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9157 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 9158 match(Set dst (VectorRearrange src shuffle)); 9159 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9160 ins_encode %{ 9161 assert(UseAVX > 2, "required"); 9162 9163 int vlen_enc = vector_length_encoding(this); 9164 if (vlen_enc == Assembler::AVX_128bit) { 9165 vlen_enc = Assembler::AVX_256bit; 9166 } 9167 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9168 %} 9169 ins_pipe( pipe_slow ); 9170 %} 9171 9172 // --------------------------------- FMA -------------------------------------- 9173 // a * b + c 9174 9175 instruct vfmaF_reg(vec a, vec b, vec c) %{ 9176 match(Set c (FmaVF c (Binary a b))); 9177 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9178 ins_cost(150); 9179 ins_encode %{ 9180 assert(UseFMA, "not enabled"); 9181 int vlen_enc = vector_length_encoding(this); 9182 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9183 %} 9184 ins_pipe( pipe_slow ); 9185 %} 9186 9187 instruct vfmaF_mem(vec a, memory b, vec c) %{ 9188 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9189 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9190 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9191 ins_cost(150); 9192 ins_encode %{ 9193 assert(UseFMA, "not enabled"); 9194 int vlen_enc = vector_length_encoding(this); 9195 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9196 %} 9197 ins_pipe( pipe_slow ); 9198 %} 9199 9200 instruct vfmaD_reg(vec a, vec b, vec c) %{ 9201 match(Set c (FmaVD c (Binary a b))); 9202 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9203 ins_cost(150); 9204 ins_encode %{ 9205 assert(UseFMA, "not enabled"); 9206 int vlen_enc = vector_length_encoding(this); 9207 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9208 %} 9209 ins_pipe( pipe_slow ); 9210 %} 9211 9212 instruct vfmaD_mem(vec a, memory b, vec c) %{ 9213 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9214 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9215 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9216 ins_cost(150); 9217 ins_encode %{ 9218 assert(UseFMA, "not enabled"); 9219 int vlen_enc = vector_length_encoding(this); 9220 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9221 %} 9222 ins_pipe( pipe_slow ); 9223 %} 9224 9225 // --------------------------------- Vector Multiply Add -------------------------------------- 9226 9227 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 9228 predicate(UseAVX == 0); 9229 match(Set dst (MulAddVS2VI dst src1)); 9230 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 9231 ins_encode %{ 9232 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9233 %} 9234 ins_pipe( pipe_slow ); 9235 %} 9236 9237 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 9238 predicate(UseAVX > 0); 9239 match(Set dst (MulAddVS2VI src1 src2)); 9240 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 9241 ins_encode %{ 9242 int vlen_enc = vector_length_encoding(this); 9243 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9244 %} 9245 ins_pipe( pipe_slow ); 9246 %} 9247 9248 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9249 9250 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 9251 predicate(VM_Version::supports_avx512_vnni()); 9252 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9253 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 9254 ins_encode %{ 9255 assert(UseAVX > 2, "required"); 9256 int vlen_enc = vector_length_encoding(this); 9257 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9258 %} 9259 ins_pipe( pipe_slow ); 9260 ins_cost(10); 9261 %} 9262 9263 // --------------------------------- PopCount -------------------------------------- 9264 9265 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 9266 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9267 match(Set dst (PopCountVI src)); 9268 match(Set dst (PopCountVL src)); 9269 format %{ "vector_popcount_integral $dst, $src" %} 9270 ins_encode %{ 9271 int opcode = this->ideal_Opcode(); 9272 int vlen_enc = vector_length_encoding(this, $src); 9273 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9274 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 9275 %} 9276 ins_pipe( pipe_slow ); 9277 %} 9278 9279 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9280 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9281 match(Set dst (PopCountVI src mask)); 9282 match(Set dst (PopCountVL src mask)); 9283 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 9284 ins_encode %{ 9285 int vlen_enc = vector_length_encoding(this, $src); 9286 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9287 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9288 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 9289 %} 9290 ins_pipe( pipe_slow ); 9291 %} 9292 9293 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 9294 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9295 match(Set dst (PopCountVI src)); 9296 match(Set dst (PopCountVL src)); 9297 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9298 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 9299 ins_encode %{ 9300 int opcode = this->ideal_Opcode(); 9301 int vlen_enc = vector_length_encoding(this, $src); 9302 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9303 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9304 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 9305 %} 9306 ins_pipe( pipe_slow ); 9307 %} 9308 9309 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 9310 9311 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 9312 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9313 Matcher::vector_length_in_bytes(n->in(1)))); 9314 match(Set dst (CountTrailingZerosV src)); 9315 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 9316 ins_cost(400); 9317 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 9318 ins_encode %{ 9319 int vlen_enc = vector_length_encoding(this, $src); 9320 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9321 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9322 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9323 %} 9324 ins_pipe( pipe_slow ); 9325 %} 9326 9327 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9328 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9329 VM_Version::supports_avx512cd() && 9330 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9331 match(Set dst (CountTrailingZerosV src)); 9332 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9333 ins_cost(400); 9334 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 9335 ins_encode %{ 9336 int vlen_enc = vector_length_encoding(this, $src); 9337 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9338 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9339 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9340 %} 9341 ins_pipe( pipe_slow ); 9342 %} 9343 9344 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 9345 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9346 match(Set dst (CountTrailingZerosV src)); 9347 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 9348 ins_cost(400); 9349 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 9350 ins_encode %{ 9351 int vlen_enc = vector_length_encoding(this, $src); 9352 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9353 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9354 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 9355 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 9356 %} 9357 ins_pipe( pipe_slow ); 9358 %} 9359 9360 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9361 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9362 match(Set dst (CountTrailingZerosV src)); 9363 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9364 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9365 ins_encode %{ 9366 int vlen_enc = vector_length_encoding(this, $src); 9367 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9368 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9369 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9370 %} 9371 ins_pipe( pipe_slow ); 9372 %} 9373 9374 9375 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 9376 9377 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 9378 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 9379 effect(TEMP dst); 9380 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9381 ins_encode %{ 9382 int vector_len = vector_length_encoding(this); 9383 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 9384 %} 9385 ins_pipe( pipe_slow ); 9386 %} 9387 9388 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 9389 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 9390 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 9391 effect(TEMP dst); 9392 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9393 ins_encode %{ 9394 int vector_len = vector_length_encoding(this); 9395 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 9396 %} 9397 ins_pipe( pipe_slow ); 9398 %} 9399 9400 // --------------------------------- Rotation Operations ---------------------------------- 9401 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 9402 match(Set dst (RotateLeftV src shift)); 9403 match(Set dst (RotateRightV src shift)); 9404 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 9405 ins_encode %{ 9406 int opcode = this->ideal_Opcode(); 9407 int vector_len = vector_length_encoding(this); 9408 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9409 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 9410 %} 9411 ins_pipe( pipe_slow ); 9412 %} 9413 9414 instruct vprorate(vec dst, vec src, vec shift) %{ 9415 match(Set dst (RotateLeftV src shift)); 9416 match(Set dst (RotateRightV src shift)); 9417 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 9418 ins_encode %{ 9419 int opcode = this->ideal_Opcode(); 9420 int vector_len = vector_length_encoding(this); 9421 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9422 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9423 %} 9424 ins_pipe( pipe_slow ); 9425 %} 9426 9427 // ---------------------------------- Masked Operations ------------------------------------ 9428 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 9429 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 9430 match(Set dst (LoadVectorMasked mem mask)); 9431 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9432 ins_encode %{ 9433 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9434 int vlen_enc = vector_length_encoding(this); 9435 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 9436 %} 9437 ins_pipe( pipe_slow ); 9438 %} 9439 9440 9441 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 9442 predicate(n->in(3)->bottom_type()->isa_vectmask()); 9443 match(Set dst (LoadVectorMasked mem mask)); 9444 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9445 ins_encode %{ 9446 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9447 int vector_len = vector_length_encoding(this); 9448 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 9449 %} 9450 ins_pipe( pipe_slow ); 9451 %} 9452 9453 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 9454 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9455 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9456 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9457 ins_encode %{ 9458 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9459 int vlen_enc = vector_length_encoding(src_node); 9460 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9461 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 9462 %} 9463 ins_pipe( pipe_slow ); 9464 %} 9465 9466 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 9467 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9468 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9469 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9470 ins_encode %{ 9471 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9472 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9473 int vlen_enc = vector_length_encoding(src_node); 9474 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 9475 %} 9476 ins_pipe( pipe_slow ); 9477 %} 9478 9479 #ifdef _LP64 9480 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{ 9481 match(Set addr (VerifyVectorAlignment addr mask)); 9482 effect(KILL cr); 9483 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %} 9484 ins_encode %{ 9485 Label Lskip; 9486 // check if masked bits of addr are zero 9487 __ testq($addr$$Register, $mask$$constant); 9488 __ jccb(Assembler::equal, Lskip); 9489 __ stop("verify_vector_alignment found a misaligned vector memory access"); 9490 __ bind(Lskip); 9491 %} 9492 ins_pipe(pipe_slow); 9493 %} 9494 9495 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9496 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9497 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9498 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9499 ins_encode %{ 9500 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9501 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9502 9503 Label DONE; 9504 int vlen_enc = vector_length_encoding(this, $src1); 9505 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9506 9507 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9508 __ mov64($dst$$Register, -1L); 9509 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9510 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9511 __ jccb(Assembler::carrySet, DONE); 9512 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9513 __ notq($dst$$Register); 9514 __ tzcntq($dst$$Register, $dst$$Register); 9515 __ bind(DONE); 9516 %} 9517 ins_pipe( pipe_slow ); 9518 %} 9519 9520 9521 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9522 match(Set dst (VectorMaskGen len)); 9523 effect(TEMP temp, KILL cr); 9524 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9525 ins_encode %{ 9526 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9527 %} 9528 ins_pipe( pipe_slow ); 9529 %} 9530 9531 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9532 match(Set dst (VectorMaskGen len)); 9533 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9534 effect(TEMP temp); 9535 ins_encode %{ 9536 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9537 __ kmovql($dst$$KRegister, $temp$$Register); 9538 %} 9539 ins_pipe( pipe_slow ); 9540 %} 9541 9542 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9543 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9544 match(Set dst (VectorMaskToLong mask)); 9545 effect(TEMP dst, KILL cr); 9546 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9547 ins_encode %{ 9548 int opcode = this->ideal_Opcode(); 9549 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9550 int mask_len = Matcher::vector_length(this, $mask); 9551 int mask_size = mask_len * type2aelembytes(mbt); 9552 int vlen_enc = vector_length_encoding(this, $mask); 9553 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9554 $dst$$Register, mask_len, mask_size, vlen_enc); 9555 %} 9556 ins_pipe( pipe_slow ); 9557 %} 9558 9559 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9560 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9561 match(Set dst (VectorMaskToLong mask)); 9562 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9563 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9564 ins_encode %{ 9565 int opcode = this->ideal_Opcode(); 9566 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9567 int mask_len = Matcher::vector_length(this, $mask); 9568 int vlen_enc = vector_length_encoding(this, $mask); 9569 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9570 $dst$$Register, mask_len, mbt, vlen_enc); 9571 %} 9572 ins_pipe( pipe_slow ); 9573 %} 9574 9575 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9576 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9577 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9578 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9579 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9580 ins_encode %{ 9581 int opcode = this->ideal_Opcode(); 9582 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9583 int mask_len = Matcher::vector_length(this, $mask); 9584 int vlen_enc = vector_length_encoding(this, $mask); 9585 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9586 $dst$$Register, mask_len, mbt, vlen_enc); 9587 %} 9588 ins_pipe( pipe_slow ); 9589 %} 9590 9591 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9592 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9593 match(Set dst (VectorMaskTrueCount mask)); 9594 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9595 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9596 ins_encode %{ 9597 int opcode = this->ideal_Opcode(); 9598 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9599 int mask_len = Matcher::vector_length(this, $mask); 9600 int mask_size = mask_len * type2aelembytes(mbt); 9601 int vlen_enc = vector_length_encoding(this, $mask); 9602 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9603 $tmp$$Register, mask_len, mask_size, vlen_enc); 9604 %} 9605 ins_pipe( pipe_slow ); 9606 %} 9607 9608 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9609 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9610 match(Set dst (VectorMaskTrueCount mask)); 9611 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9612 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9613 ins_encode %{ 9614 int opcode = this->ideal_Opcode(); 9615 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9616 int mask_len = Matcher::vector_length(this, $mask); 9617 int vlen_enc = vector_length_encoding(this, $mask); 9618 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9619 $tmp$$Register, mask_len, mbt, vlen_enc); 9620 %} 9621 ins_pipe( pipe_slow ); 9622 %} 9623 9624 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9625 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9626 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9627 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9628 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9629 ins_encode %{ 9630 int opcode = this->ideal_Opcode(); 9631 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9632 int mask_len = Matcher::vector_length(this, $mask); 9633 int vlen_enc = vector_length_encoding(this, $mask); 9634 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9635 $tmp$$Register, mask_len, mbt, vlen_enc); 9636 %} 9637 ins_pipe( pipe_slow ); 9638 %} 9639 9640 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9641 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9642 match(Set dst (VectorMaskFirstTrue mask)); 9643 match(Set dst (VectorMaskLastTrue mask)); 9644 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9645 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9646 ins_encode %{ 9647 int opcode = this->ideal_Opcode(); 9648 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9649 int mask_len = Matcher::vector_length(this, $mask); 9650 int mask_size = mask_len * type2aelembytes(mbt); 9651 int vlen_enc = vector_length_encoding(this, $mask); 9652 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9653 $tmp$$Register, mask_len, mask_size, vlen_enc); 9654 %} 9655 ins_pipe( pipe_slow ); 9656 %} 9657 9658 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9659 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9660 match(Set dst (VectorMaskFirstTrue mask)); 9661 match(Set dst (VectorMaskLastTrue mask)); 9662 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9663 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9664 ins_encode %{ 9665 int opcode = this->ideal_Opcode(); 9666 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9667 int mask_len = Matcher::vector_length(this, $mask); 9668 int vlen_enc = vector_length_encoding(this, $mask); 9669 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9670 $tmp$$Register, mask_len, mbt, vlen_enc); 9671 %} 9672 ins_pipe( pipe_slow ); 9673 %} 9674 9675 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9676 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9677 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9678 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9679 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9680 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9681 ins_encode %{ 9682 int opcode = this->ideal_Opcode(); 9683 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9684 int mask_len = Matcher::vector_length(this, $mask); 9685 int vlen_enc = vector_length_encoding(this, $mask); 9686 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9687 $tmp$$Register, mask_len, mbt, vlen_enc); 9688 %} 9689 ins_pipe( pipe_slow ); 9690 %} 9691 9692 // --------------------------------- Compress/Expand Operations --------------------------- 9693 #ifdef _LP64 9694 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{ 9695 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 9696 match(Set dst (CompressV src mask)); 9697 match(Set dst (ExpandV src mask)); 9698 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr); 9699 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %} 9700 ins_encode %{ 9701 int opcode = this->ideal_Opcode(); 9702 int vlen_enc = vector_length_encoding(this); 9703 BasicType bt = Matcher::vector_element_basic_type(this); 9704 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register, 9705 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc); 9706 %} 9707 ins_pipe( pipe_slow ); 9708 %} 9709 #endif 9710 9711 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9712 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 9713 match(Set dst (CompressV src mask)); 9714 match(Set dst (ExpandV src mask)); 9715 format %{ "vector_compress_expand $dst, $src, $mask" %} 9716 ins_encode %{ 9717 int opcode = this->ideal_Opcode(); 9718 int vector_len = vector_length_encoding(this); 9719 BasicType bt = Matcher::vector_element_basic_type(this); 9720 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9721 %} 9722 ins_pipe( pipe_slow ); 9723 %} 9724 9725 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9726 match(Set dst (CompressM mask)); 9727 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9728 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9729 ins_encode %{ 9730 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9731 int mask_len = Matcher::vector_length(this); 9732 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9733 %} 9734 ins_pipe( pipe_slow ); 9735 %} 9736 9737 #endif // _LP64 9738 9739 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9740 9741 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9742 predicate(!VM_Version::supports_gfni()); 9743 match(Set dst (ReverseV src)); 9744 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9745 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9746 ins_encode %{ 9747 int vec_enc = vector_length_encoding(this); 9748 BasicType bt = Matcher::vector_element_basic_type(this); 9749 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9750 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9751 %} 9752 ins_pipe( pipe_slow ); 9753 %} 9754 9755 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9756 predicate(VM_Version::supports_gfni()); 9757 match(Set dst (ReverseV src)); 9758 effect(TEMP dst, TEMP xtmp); 9759 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9760 ins_encode %{ 9761 int vec_enc = vector_length_encoding(this); 9762 BasicType bt = Matcher::vector_element_basic_type(this); 9763 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1)); 9764 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9765 $xtmp$$XMMRegister); 9766 %} 9767 ins_pipe( pipe_slow ); 9768 %} 9769 9770 instruct vreverse_byte_reg(vec dst, vec src) %{ 9771 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9772 match(Set dst (ReverseBytesV src)); 9773 effect(TEMP dst); 9774 format %{ "vector_reverse_byte $dst, $src" %} 9775 ins_encode %{ 9776 int vec_enc = vector_length_encoding(this); 9777 BasicType bt = Matcher::vector_element_basic_type(this); 9778 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9779 %} 9780 ins_pipe( pipe_slow ); 9781 %} 9782 9783 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9784 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9785 match(Set dst (ReverseBytesV src)); 9786 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9787 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9788 ins_encode %{ 9789 int vec_enc = vector_length_encoding(this); 9790 BasicType bt = Matcher::vector_element_basic_type(this); 9791 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9792 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9793 %} 9794 ins_pipe( pipe_slow ); 9795 %} 9796 9797 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9798 9799 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9800 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9801 Matcher::vector_length_in_bytes(n->in(1)))); 9802 match(Set dst (CountLeadingZerosV src)); 9803 format %{ "vector_count_leading_zeros $dst, $src" %} 9804 ins_encode %{ 9805 int vlen_enc = vector_length_encoding(this, $src); 9806 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9807 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9808 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9809 %} 9810 ins_pipe( pipe_slow ); 9811 %} 9812 9813 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9814 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9815 Matcher::vector_length_in_bytes(n->in(1)))); 9816 match(Set dst (CountLeadingZerosV src mask)); 9817 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9818 ins_encode %{ 9819 int vlen_enc = vector_length_encoding(this, $src); 9820 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9821 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9822 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9823 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9824 %} 9825 ins_pipe( pipe_slow ); 9826 %} 9827 9828 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9829 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9830 VM_Version::supports_avx512cd() && 9831 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9832 match(Set dst (CountLeadingZerosV src)); 9833 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9834 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9835 ins_encode %{ 9836 int vlen_enc = vector_length_encoding(this, $src); 9837 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9838 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9839 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9840 %} 9841 ins_pipe( pipe_slow ); 9842 %} 9843 9844 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9845 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9846 match(Set dst (CountLeadingZerosV src)); 9847 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9848 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9849 ins_encode %{ 9850 int vlen_enc = vector_length_encoding(this, $src); 9851 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9852 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9853 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9854 $rtmp$$Register, true, vlen_enc); 9855 %} 9856 ins_pipe( pipe_slow ); 9857 %} 9858 9859 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9860 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9861 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9862 match(Set dst (CountLeadingZerosV src)); 9863 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9864 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9865 ins_encode %{ 9866 int vlen_enc = vector_length_encoding(this, $src); 9867 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9868 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9869 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9870 %} 9871 ins_pipe( pipe_slow ); 9872 %} 9873 9874 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9875 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9876 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9877 match(Set dst (CountLeadingZerosV src)); 9878 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9879 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9880 ins_encode %{ 9881 int vlen_enc = vector_length_encoding(this, $src); 9882 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9883 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9884 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9885 %} 9886 ins_pipe( pipe_slow ); 9887 %} 9888 9889 // ---------------------------------- Vector Masked Operations ------------------------------------ 9890 9891 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9892 match(Set dst (AddVB (Binary dst src2) mask)); 9893 match(Set dst (AddVS (Binary dst src2) mask)); 9894 match(Set dst (AddVI (Binary dst src2) mask)); 9895 match(Set dst (AddVL (Binary dst src2) mask)); 9896 match(Set dst (AddVF (Binary dst src2) mask)); 9897 match(Set dst (AddVD (Binary dst src2) mask)); 9898 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9899 ins_encode %{ 9900 int vlen_enc = vector_length_encoding(this); 9901 BasicType bt = Matcher::vector_element_basic_type(this); 9902 int opc = this->ideal_Opcode(); 9903 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9904 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9905 %} 9906 ins_pipe( pipe_slow ); 9907 %} 9908 9909 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9910 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9911 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9912 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9913 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9914 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9915 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9916 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9917 ins_encode %{ 9918 int vlen_enc = vector_length_encoding(this); 9919 BasicType bt = Matcher::vector_element_basic_type(this); 9920 int opc = this->ideal_Opcode(); 9921 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9922 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9923 %} 9924 ins_pipe( pipe_slow ); 9925 %} 9926 9927 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9928 match(Set dst (XorV (Binary dst src2) mask)); 9929 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9930 ins_encode %{ 9931 int vlen_enc = vector_length_encoding(this); 9932 BasicType bt = Matcher::vector_element_basic_type(this); 9933 int opc = this->ideal_Opcode(); 9934 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9935 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9936 %} 9937 ins_pipe( pipe_slow ); 9938 %} 9939 9940 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9941 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9942 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9943 ins_encode %{ 9944 int vlen_enc = vector_length_encoding(this); 9945 BasicType bt = Matcher::vector_element_basic_type(this); 9946 int opc = this->ideal_Opcode(); 9947 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9948 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9949 %} 9950 ins_pipe( pipe_slow ); 9951 %} 9952 9953 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9954 match(Set dst (OrV (Binary dst src2) mask)); 9955 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9956 ins_encode %{ 9957 int vlen_enc = vector_length_encoding(this); 9958 BasicType bt = Matcher::vector_element_basic_type(this); 9959 int opc = this->ideal_Opcode(); 9960 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9961 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9962 %} 9963 ins_pipe( pipe_slow ); 9964 %} 9965 9966 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9967 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9968 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9969 ins_encode %{ 9970 int vlen_enc = vector_length_encoding(this); 9971 BasicType bt = Matcher::vector_element_basic_type(this); 9972 int opc = this->ideal_Opcode(); 9973 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9974 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9975 %} 9976 ins_pipe( pipe_slow ); 9977 %} 9978 9979 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9980 match(Set dst (AndV (Binary dst src2) mask)); 9981 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9982 ins_encode %{ 9983 int vlen_enc = vector_length_encoding(this); 9984 BasicType bt = Matcher::vector_element_basic_type(this); 9985 int opc = this->ideal_Opcode(); 9986 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9987 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9988 %} 9989 ins_pipe( pipe_slow ); 9990 %} 9991 9992 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9993 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9994 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9995 ins_encode %{ 9996 int vlen_enc = vector_length_encoding(this); 9997 BasicType bt = Matcher::vector_element_basic_type(this); 9998 int opc = this->ideal_Opcode(); 9999 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10000 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10001 %} 10002 ins_pipe( pipe_slow ); 10003 %} 10004 10005 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 10006 match(Set dst (SubVB (Binary dst src2) mask)); 10007 match(Set dst (SubVS (Binary dst src2) mask)); 10008 match(Set dst (SubVI (Binary dst src2) mask)); 10009 match(Set dst (SubVL (Binary dst src2) mask)); 10010 match(Set dst (SubVF (Binary dst src2) mask)); 10011 match(Set dst (SubVD (Binary dst src2) mask)); 10012 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 10013 ins_encode %{ 10014 int vlen_enc = vector_length_encoding(this); 10015 BasicType bt = Matcher::vector_element_basic_type(this); 10016 int opc = this->ideal_Opcode(); 10017 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10018 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10019 %} 10020 ins_pipe( pipe_slow ); 10021 %} 10022 10023 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 10024 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 10025 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 10026 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 10027 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 10028 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 10029 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 10030 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 10031 ins_encode %{ 10032 int vlen_enc = vector_length_encoding(this); 10033 BasicType bt = Matcher::vector_element_basic_type(this); 10034 int opc = this->ideal_Opcode(); 10035 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10036 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10037 %} 10038 ins_pipe( pipe_slow ); 10039 %} 10040 10041 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 10042 match(Set dst (MulVS (Binary dst src2) mask)); 10043 match(Set dst (MulVI (Binary dst src2) mask)); 10044 match(Set dst (MulVL (Binary dst src2) mask)); 10045 match(Set dst (MulVF (Binary dst src2) mask)); 10046 match(Set dst (MulVD (Binary dst src2) mask)); 10047 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 10048 ins_encode %{ 10049 int vlen_enc = vector_length_encoding(this); 10050 BasicType bt = Matcher::vector_element_basic_type(this); 10051 int opc = this->ideal_Opcode(); 10052 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10053 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10054 %} 10055 ins_pipe( pipe_slow ); 10056 %} 10057 10058 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 10059 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 10060 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 10061 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 10062 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 10063 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 10064 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 10065 ins_encode %{ 10066 int vlen_enc = vector_length_encoding(this); 10067 BasicType bt = Matcher::vector_element_basic_type(this); 10068 int opc = this->ideal_Opcode(); 10069 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10070 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10071 %} 10072 ins_pipe( pipe_slow ); 10073 %} 10074 10075 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 10076 match(Set dst (SqrtVF dst mask)); 10077 match(Set dst (SqrtVD dst mask)); 10078 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 10079 ins_encode %{ 10080 int vlen_enc = vector_length_encoding(this); 10081 BasicType bt = Matcher::vector_element_basic_type(this); 10082 int opc = this->ideal_Opcode(); 10083 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10084 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10085 %} 10086 ins_pipe( pipe_slow ); 10087 %} 10088 10089 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 10090 match(Set dst (DivVF (Binary dst src2) mask)); 10091 match(Set dst (DivVD (Binary dst src2) mask)); 10092 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 10093 ins_encode %{ 10094 int vlen_enc = vector_length_encoding(this); 10095 BasicType bt = Matcher::vector_element_basic_type(this); 10096 int opc = this->ideal_Opcode(); 10097 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10098 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10099 %} 10100 ins_pipe( pipe_slow ); 10101 %} 10102 10103 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 10104 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 10105 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 10106 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 10107 ins_encode %{ 10108 int vlen_enc = vector_length_encoding(this); 10109 BasicType bt = Matcher::vector_element_basic_type(this); 10110 int opc = this->ideal_Opcode(); 10111 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10112 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10113 %} 10114 ins_pipe( pipe_slow ); 10115 %} 10116 10117 10118 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10119 match(Set dst (RotateLeftV (Binary dst shift) mask)); 10120 match(Set dst (RotateRightV (Binary dst shift) mask)); 10121 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 10122 ins_encode %{ 10123 int vlen_enc = vector_length_encoding(this); 10124 BasicType bt = Matcher::vector_element_basic_type(this); 10125 int opc = this->ideal_Opcode(); 10126 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10127 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10128 %} 10129 ins_pipe( pipe_slow ); 10130 %} 10131 10132 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 10133 match(Set dst (RotateLeftV (Binary dst src2) mask)); 10134 match(Set dst (RotateRightV (Binary dst src2) mask)); 10135 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 10136 ins_encode %{ 10137 int vlen_enc = vector_length_encoding(this); 10138 BasicType bt = Matcher::vector_element_basic_type(this); 10139 int opc = this->ideal_Opcode(); 10140 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10141 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10142 %} 10143 ins_pipe( pipe_slow ); 10144 %} 10145 10146 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10147 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 10148 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 10149 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 10150 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 10151 ins_encode %{ 10152 int vlen_enc = vector_length_encoding(this); 10153 BasicType bt = Matcher::vector_element_basic_type(this); 10154 int opc = this->ideal_Opcode(); 10155 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10156 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10157 %} 10158 ins_pipe( pipe_slow ); 10159 %} 10160 10161 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10162 predicate(!n->as_ShiftV()->is_var_shift()); 10163 match(Set dst (LShiftVS (Binary dst src2) mask)); 10164 match(Set dst (LShiftVI (Binary dst src2) mask)); 10165 match(Set dst (LShiftVL (Binary dst src2) mask)); 10166 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10167 ins_encode %{ 10168 int vlen_enc = vector_length_encoding(this); 10169 BasicType bt = Matcher::vector_element_basic_type(this); 10170 int opc = this->ideal_Opcode(); 10171 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10172 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10173 %} 10174 ins_pipe( pipe_slow ); 10175 %} 10176 10177 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10178 predicate(n->as_ShiftV()->is_var_shift()); 10179 match(Set dst (LShiftVS (Binary dst src2) mask)); 10180 match(Set dst (LShiftVI (Binary dst src2) mask)); 10181 match(Set dst (LShiftVL (Binary dst src2) mask)); 10182 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10183 ins_encode %{ 10184 int vlen_enc = vector_length_encoding(this); 10185 BasicType bt = Matcher::vector_element_basic_type(this); 10186 int opc = this->ideal_Opcode(); 10187 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10188 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10189 %} 10190 ins_pipe( pipe_slow ); 10191 %} 10192 10193 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10194 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 10195 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 10196 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 10197 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 10198 ins_encode %{ 10199 int vlen_enc = vector_length_encoding(this); 10200 BasicType bt = Matcher::vector_element_basic_type(this); 10201 int opc = this->ideal_Opcode(); 10202 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10203 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10204 %} 10205 ins_pipe( pipe_slow ); 10206 %} 10207 10208 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10209 predicate(!n->as_ShiftV()->is_var_shift()); 10210 match(Set dst (RShiftVS (Binary dst src2) mask)); 10211 match(Set dst (RShiftVI (Binary dst src2) mask)); 10212 match(Set dst (RShiftVL (Binary dst src2) mask)); 10213 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10214 ins_encode %{ 10215 int vlen_enc = vector_length_encoding(this); 10216 BasicType bt = Matcher::vector_element_basic_type(this); 10217 int opc = this->ideal_Opcode(); 10218 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10219 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10220 %} 10221 ins_pipe( pipe_slow ); 10222 %} 10223 10224 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10225 predicate(n->as_ShiftV()->is_var_shift()); 10226 match(Set dst (RShiftVS (Binary dst src2) mask)); 10227 match(Set dst (RShiftVI (Binary dst src2) mask)); 10228 match(Set dst (RShiftVL (Binary dst src2) mask)); 10229 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10230 ins_encode %{ 10231 int vlen_enc = vector_length_encoding(this); 10232 BasicType bt = Matcher::vector_element_basic_type(this); 10233 int opc = this->ideal_Opcode(); 10234 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10235 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10236 %} 10237 ins_pipe( pipe_slow ); 10238 %} 10239 10240 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10241 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 10242 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 10243 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 10244 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 10245 ins_encode %{ 10246 int vlen_enc = vector_length_encoding(this); 10247 BasicType bt = Matcher::vector_element_basic_type(this); 10248 int opc = this->ideal_Opcode(); 10249 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10250 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10251 %} 10252 ins_pipe( pipe_slow ); 10253 %} 10254 10255 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10256 predicate(!n->as_ShiftV()->is_var_shift()); 10257 match(Set dst (URShiftVS (Binary dst src2) mask)); 10258 match(Set dst (URShiftVI (Binary dst src2) mask)); 10259 match(Set dst (URShiftVL (Binary dst src2) mask)); 10260 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10261 ins_encode %{ 10262 int vlen_enc = vector_length_encoding(this); 10263 BasicType bt = Matcher::vector_element_basic_type(this); 10264 int opc = this->ideal_Opcode(); 10265 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10266 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10267 %} 10268 ins_pipe( pipe_slow ); 10269 %} 10270 10271 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10272 predicate(n->as_ShiftV()->is_var_shift()); 10273 match(Set dst (URShiftVS (Binary dst src2) mask)); 10274 match(Set dst (URShiftVI (Binary dst src2) mask)); 10275 match(Set dst (URShiftVL (Binary dst src2) mask)); 10276 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10277 ins_encode %{ 10278 int vlen_enc = vector_length_encoding(this); 10279 BasicType bt = Matcher::vector_element_basic_type(this); 10280 int opc = this->ideal_Opcode(); 10281 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10282 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10283 %} 10284 ins_pipe( pipe_slow ); 10285 %} 10286 10287 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 10288 match(Set dst (MaxV (Binary dst src2) mask)); 10289 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10290 ins_encode %{ 10291 int vlen_enc = vector_length_encoding(this); 10292 BasicType bt = Matcher::vector_element_basic_type(this); 10293 int opc = this->ideal_Opcode(); 10294 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10295 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10296 %} 10297 ins_pipe( pipe_slow ); 10298 %} 10299 10300 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 10301 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 10302 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10303 ins_encode %{ 10304 int vlen_enc = vector_length_encoding(this); 10305 BasicType bt = Matcher::vector_element_basic_type(this); 10306 int opc = this->ideal_Opcode(); 10307 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10308 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10309 %} 10310 ins_pipe( pipe_slow ); 10311 %} 10312 10313 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 10314 match(Set dst (MinV (Binary dst src2) mask)); 10315 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10316 ins_encode %{ 10317 int vlen_enc = vector_length_encoding(this); 10318 BasicType bt = Matcher::vector_element_basic_type(this); 10319 int opc = this->ideal_Opcode(); 10320 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10321 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10322 %} 10323 ins_pipe( pipe_slow ); 10324 %} 10325 10326 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 10327 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 10328 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10329 ins_encode %{ 10330 int vlen_enc = vector_length_encoding(this); 10331 BasicType bt = Matcher::vector_element_basic_type(this); 10332 int opc = this->ideal_Opcode(); 10333 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10334 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10335 %} 10336 ins_pipe( pipe_slow ); 10337 %} 10338 10339 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 10340 match(Set dst (VectorRearrange (Binary dst src2) mask)); 10341 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 10342 ins_encode %{ 10343 int vlen_enc = vector_length_encoding(this); 10344 BasicType bt = Matcher::vector_element_basic_type(this); 10345 int opc = this->ideal_Opcode(); 10346 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10347 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10348 %} 10349 ins_pipe( pipe_slow ); 10350 %} 10351 10352 instruct vabs_masked(vec dst, kReg mask) %{ 10353 match(Set dst (AbsVB dst mask)); 10354 match(Set dst (AbsVS dst mask)); 10355 match(Set dst (AbsVI dst mask)); 10356 match(Set dst (AbsVL dst mask)); 10357 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 10358 ins_encode %{ 10359 int vlen_enc = vector_length_encoding(this); 10360 BasicType bt = Matcher::vector_element_basic_type(this); 10361 int opc = this->ideal_Opcode(); 10362 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10363 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10364 %} 10365 ins_pipe( pipe_slow ); 10366 %} 10367 10368 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 10369 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 10370 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 10371 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10372 ins_encode %{ 10373 assert(UseFMA, "Needs FMA instructions support."); 10374 int vlen_enc = vector_length_encoding(this); 10375 BasicType bt = Matcher::vector_element_basic_type(this); 10376 int opc = this->ideal_Opcode(); 10377 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10378 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 10379 %} 10380 ins_pipe( pipe_slow ); 10381 %} 10382 10383 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 10384 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 10385 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 10386 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10387 ins_encode %{ 10388 assert(UseFMA, "Needs FMA instructions support."); 10389 int vlen_enc = vector_length_encoding(this); 10390 BasicType bt = Matcher::vector_element_basic_type(this); 10391 int opc = this->ideal_Opcode(); 10392 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10393 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 10394 %} 10395 ins_pipe( pipe_slow ); 10396 %} 10397 10398 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 10399 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 10400 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 10401 ins_encode %{ 10402 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 10403 int vlen_enc = vector_length_encoding(this, $src1); 10404 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 10405 10406 // Comparison i 10407 switch (src1_elem_bt) { 10408 case T_BYTE: { 10409 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10410 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10411 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10412 break; 10413 } 10414 case T_SHORT: { 10415 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10416 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10417 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10418 break; 10419 } 10420 case T_INT: { 10421 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10422 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10423 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10424 break; 10425 } 10426 case T_LONG: { 10427 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10428 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10429 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10430 break; 10431 } 10432 case T_FLOAT: { 10433 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10434 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10435 break; 10436 } 10437 case T_DOUBLE: { 10438 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10439 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10440 break; 10441 } 10442 default: assert(false, "%s", type2name(src1_elem_bt)); break; 10443 } 10444 %} 10445 ins_pipe( pipe_slow ); 10446 %} 10447 10448 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 10449 predicate(Matcher::vector_length(n) <= 32); 10450 match(Set dst (MaskAll src)); 10451 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 10452 ins_encode %{ 10453 int mask_len = Matcher::vector_length(this); 10454 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 10455 %} 10456 ins_pipe( pipe_slow ); 10457 %} 10458 10459 #ifdef _LP64 10460 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10461 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10462 match(Set dst (XorVMask src (MaskAll cnt))); 10463 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10464 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10465 ins_encode %{ 10466 uint masklen = Matcher::vector_length(this); 10467 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10468 %} 10469 ins_pipe( pipe_slow ); 10470 %} 10471 10472 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10473 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10474 (Matcher::vector_length(n) == 16) || 10475 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10476 match(Set dst (XorVMask src (MaskAll cnt))); 10477 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10478 ins_encode %{ 10479 uint masklen = Matcher::vector_length(this); 10480 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10481 %} 10482 ins_pipe( pipe_slow ); 10483 %} 10484 10485 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10486 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10487 match(Set dst (VectorLongToMask src)); 10488 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10489 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10490 ins_encode %{ 10491 int mask_len = Matcher::vector_length(this); 10492 int vec_enc = vector_length_encoding(mask_len); 10493 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10494 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10495 %} 10496 ins_pipe( pipe_slow ); 10497 %} 10498 10499 10500 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10501 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10502 match(Set dst (VectorLongToMask src)); 10503 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10504 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10505 ins_encode %{ 10506 int mask_len = Matcher::vector_length(this); 10507 assert(mask_len <= 32, "invalid mask length"); 10508 int vec_enc = vector_length_encoding(mask_len); 10509 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10510 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10511 %} 10512 ins_pipe( pipe_slow ); 10513 %} 10514 10515 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10516 predicate(n->bottom_type()->isa_vectmask()); 10517 match(Set dst (VectorLongToMask src)); 10518 format %{ "long_to_mask_evex $dst, $src\t!" %} 10519 ins_encode %{ 10520 __ kmov($dst$$KRegister, $src$$Register); 10521 %} 10522 ins_pipe( pipe_slow ); 10523 %} 10524 #endif 10525 10526 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10527 match(Set dst (AndVMask src1 src2)); 10528 match(Set dst (OrVMask src1 src2)); 10529 match(Set dst (XorVMask src1 src2)); 10530 effect(TEMP kscratch); 10531 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10532 ins_encode %{ 10533 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10534 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10535 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal"); 10536 uint masklen = Matcher::vector_length(this); 10537 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10538 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10539 %} 10540 ins_pipe( pipe_slow ); 10541 %} 10542 10543 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10544 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10545 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10546 ins_encode %{ 10547 int vlen_enc = vector_length_encoding(this); 10548 BasicType bt = Matcher::vector_element_basic_type(this); 10549 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10550 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10551 %} 10552 ins_pipe( pipe_slow ); 10553 %} 10554 10555 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10556 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10557 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10558 ins_encode %{ 10559 int vlen_enc = vector_length_encoding(this); 10560 BasicType bt = Matcher::vector_element_basic_type(this); 10561 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10562 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10563 %} 10564 ins_pipe( pipe_slow ); 10565 %} 10566 10567 instruct castMM(kReg dst) 10568 %{ 10569 match(Set dst (CastVV dst)); 10570 10571 size(0); 10572 format %{ "# castVV of $dst" %} 10573 ins_encode(/* empty encoding */); 10574 ins_cost(0); 10575 ins_pipe(empty); 10576 %} 10577 10578 instruct castVV(vec dst) 10579 %{ 10580 match(Set dst (CastVV dst)); 10581 10582 size(0); 10583 format %{ "# castVV of $dst" %} 10584 ins_encode(/* empty encoding */); 10585 ins_cost(0); 10586 ins_pipe(empty); 10587 %} 10588 10589 instruct castVVLeg(legVec dst) 10590 %{ 10591 match(Set dst (CastVV dst)); 10592 10593 size(0); 10594 format %{ "# castVV of $dst" %} 10595 ins_encode(/* empty encoding */); 10596 ins_cost(0); 10597 ins_pipe(empty); 10598 %} 10599 10600 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10601 %{ 10602 match(Set dst (IsInfiniteF src)); 10603 effect(TEMP ktmp, KILL cr); 10604 format %{ "float_class_check $dst, $src" %} 10605 ins_encode %{ 10606 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10607 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10608 %} 10609 ins_pipe(pipe_slow); 10610 %} 10611 10612 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10613 %{ 10614 match(Set dst (IsInfiniteD src)); 10615 effect(TEMP ktmp, KILL cr); 10616 format %{ "double_class_check $dst, $src" %} 10617 ins_encode %{ 10618 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10619 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10620 %} 10621 ins_pipe(pipe_slow); 10622 %} 10623 10624 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2) 10625 %{ 10626 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10627 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10628 match(Set dst (SaturatingAddV src1 src2)); 10629 match(Set dst (SaturatingSubV src1 src2)); 10630 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10631 ins_encode %{ 10632 int vlen_enc = vector_length_encoding(this); 10633 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10634 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10635 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10636 %} 10637 ins_pipe(pipe_slow); 10638 %} 10639 10640 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2) 10641 %{ 10642 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10643 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10644 match(Set dst (SaturatingAddV src1 src2)); 10645 match(Set dst (SaturatingSubV src1 src2)); 10646 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10647 ins_encode %{ 10648 int vlen_enc = vector_length_encoding(this); 10649 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10650 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10651 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10652 %} 10653 ins_pipe(pipe_slow); 10654 %} 10655 10656 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2) 10657 %{ 10658 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10659 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10660 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10661 match(Set dst (SaturatingAddV src1 src2)); 10662 match(Set dst (SaturatingSubV src1 src2)); 10663 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2); 10664 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 10665 ins_encode %{ 10666 int vlen_enc = vector_length_encoding(this); 10667 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10668 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10669 $src1$$XMMRegister, $src2$$XMMRegister, 10670 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10671 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc); 10672 %} 10673 ins_pipe(pipe_slow); 10674 %} 10675 10676 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4) 10677 %{ 10678 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10679 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10680 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10681 match(Set dst (SaturatingAddV src1 src2)); 10682 match(Set dst (SaturatingSubV src1 src2)); 10683 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4); 10684 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 10685 ins_encode %{ 10686 int vlen_enc = vector_length_encoding(this); 10687 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10688 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10689 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10690 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc); 10691 %} 10692 ins_pipe(pipe_slow); 10693 %} 10694 10695 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp) 10696 %{ 10697 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10698 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10699 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10700 match(Set dst (SaturatingAddV src1 src2)); 10701 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp); 10702 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %} 10703 ins_encode %{ 10704 int vlen_enc = vector_length_encoding(this); 10705 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10706 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10707 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10708 %} 10709 ins_pipe(pipe_slow); 10710 %} 10711 10712 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3) 10713 %{ 10714 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10715 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10716 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10717 match(Set dst (SaturatingAddV src1 src2)); 10718 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 10719 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 10720 ins_encode %{ 10721 int vlen_enc = vector_length_encoding(this); 10722 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10723 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10724 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc); 10725 %} 10726 ins_pipe(pipe_slow); 10727 %} 10728 10729 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp) 10730 %{ 10731 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10732 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10733 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10734 match(Set dst (SaturatingSubV src1 src2)); 10735 effect(TEMP ktmp); 10736 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %} 10737 ins_encode %{ 10738 int vlen_enc = vector_length_encoding(this); 10739 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10740 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10741 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10742 %} 10743 ins_pipe(pipe_slow); 10744 %} 10745 10746 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) 10747 %{ 10748 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10749 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10750 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10751 match(Set dst (SaturatingSubV src1 src2)); 10752 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 10753 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %} 10754 ins_encode %{ 10755 int vlen_enc = vector_length_encoding(this); 10756 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10757 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10758 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 10759 %} 10760 ins_pipe(pipe_slow); 10761 %} 10762 10763 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2) 10764 %{ 10765 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10766 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10767 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10768 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10769 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10770 ins_encode %{ 10771 int vlen_enc = vector_length_encoding(this); 10772 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10773 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10774 $src1$$XMMRegister, $src2$$Address, false, vlen_enc); 10775 %} 10776 ins_pipe(pipe_slow); 10777 %} 10778 10779 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2) 10780 %{ 10781 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10782 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10783 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10784 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10785 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10786 ins_encode %{ 10787 int vlen_enc = vector_length_encoding(this); 10788 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10789 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10790 $src1$$XMMRegister, $src2$$Address, true, vlen_enc); 10791 %} 10792 ins_pipe(pipe_slow); 10793 %} 10794 10795 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10796 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10797 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10798 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10799 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10800 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10801 ins_encode %{ 10802 int vlen_enc = vector_length_encoding(this); 10803 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10804 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10805 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc); 10806 %} 10807 ins_pipe( pipe_slow ); 10808 %} 10809 10810 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10811 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10812 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10813 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10814 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10815 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10816 ins_encode %{ 10817 int vlen_enc = vector_length_encoding(this); 10818 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10819 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10820 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc); 10821 %} 10822 ins_pipe( pipe_slow ); 10823 %} 10824 10825 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10826 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10827 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10828 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10829 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10830 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10831 ins_encode %{ 10832 int vlen_enc = vector_length_encoding(this); 10833 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10834 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10835 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc); 10836 %} 10837 ins_pipe( pipe_slow ); 10838 %} 10839 10840 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10841 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10842 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10843 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10844 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10845 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10846 ins_encode %{ 10847 int vlen_enc = vector_length_encoding(this); 10848 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10849 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10850 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc); 10851 %} 10852 ins_pipe( pipe_slow ); 10853 %} 10854 10855 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2) 10856 %{ 10857 match(Set index (SelectFromTwoVector (Binary index src1) src2)); 10858 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %} 10859 ins_encode %{ 10860 int vlen_enc = vector_length_encoding(this); 10861 BasicType bt = Matcher::vector_element_basic_type(this); 10862 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10863 %} 10864 ins_pipe(pipe_slow); 10865 %}