1 // 2 // Copyright (c) 2011, 2024, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 // AVX3 Mask Registers. 632 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 633 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 634 635 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 636 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 637 638 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 639 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 640 641 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 642 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 643 644 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 645 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 646 647 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 648 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 649 650 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 651 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 652 653 654 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 655 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 656 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 657 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 658 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 659 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 660 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 661 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 662 #ifdef _LP64 663 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 664 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 665 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 666 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 667 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 668 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 669 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 670 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 671 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 672 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 673 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 674 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 675 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 676 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 677 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 678 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 679 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 680 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 681 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 682 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 683 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 684 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 685 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 686 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 687 #endif 688 ); 689 690 alloc_class chunk2(K7, K7_H, 691 K6, K6_H, 692 K5, K5_H, 693 K4, K4_H, 694 K3, K3_H, 695 K2, K2_H, 696 K1, K1_H); 697 698 reg_class vectmask_reg(K1, K1_H, 699 K2, K2_H, 700 K3, K3_H, 701 K4, K4_H, 702 K5, K5_H, 703 K6, K6_H, 704 K7, K7_H); 705 706 reg_class vectmask_reg_K1(K1, K1_H); 707 reg_class vectmask_reg_K2(K2, K2_H); 708 reg_class vectmask_reg_K3(K3, K3_H); 709 reg_class vectmask_reg_K4(K4, K4_H); 710 reg_class vectmask_reg_K5(K5, K5_H); 711 reg_class vectmask_reg_K6(K6, K6_H); 712 reg_class vectmask_reg_K7(K7, K7_H); 713 714 // flags allocation class should be last. 715 alloc_class chunk3(RFLAGS); 716 717 718 // Singleton class for condition codes 719 reg_class int_flags(RFLAGS); 720 721 // Class for pre evex float registers 722 reg_class float_reg_legacy(XMM0, 723 XMM1, 724 XMM2, 725 XMM3, 726 XMM4, 727 XMM5, 728 XMM6, 729 XMM7 730 #ifdef _LP64 731 ,XMM8, 732 XMM9, 733 XMM10, 734 XMM11, 735 XMM12, 736 XMM13, 737 XMM14, 738 XMM15 739 #endif 740 ); 741 742 // Class for evex float registers 743 reg_class float_reg_evex(XMM0, 744 XMM1, 745 XMM2, 746 XMM3, 747 XMM4, 748 XMM5, 749 XMM6, 750 XMM7 751 #ifdef _LP64 752 ,XMM8, 753 XMM9, 754 XMM10, 755 XMM11, 756 XMM12, 757 XMM13, 758 XMM14, 759 XMM15, 760 XMM16, 761 XMM17, 762 XMM18, 763 XMM19, 764 XMM20, 765 XMM21, 766 XMM22, 767 XMM23, 768 XMM24, 769 XMM25, 770 XMM26, 771 XMM27, 772 XMM28, 773 XMM29, 774 XMM30, 775 XMM31 776 #endif 777 ); 778 779 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 780 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 781 782 // Class for pre evex double registers 783 reg_class double_reg_legacy(XMM0, XMM0b, 784 XMM1, XMM1b, 785 XMM2, XMM2b, 786 XMM3, XMM3b, 787 XMM4, XMM4b, 788 XMM5, XMM5b, 789 XMM6, XMM6b, 790 XMM7, XMM7b 791 #ifdef _LP64 792 ,XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b 800 #endif 801 ); 802 803 // Class for evex double registers 804 reg_class double_reg_evex(XMM0, XMM0b, 805 XMM1, XMM1b, 806 XMM2, XMM2b, 807 XMM3, XMM3b, 808 XMM4, XMM4b, 809 XMM5, XMM5b, 810 XMM6, XMM6b, 811 XMM7, XMM7b 812 #ifdef _LP64 813 ,XMM8, XMM8b, 814 XMM9, XMM9b, 815 XMM10, XMM10b, 816 XMM11, XMM11b, 817 XMM12, XMM12b, 818 XMM13, XMM13b, 819 XMM14, XMM14b, 820 XMM15, XMM15b, 821 XMM16, XMM16b, 822 XMM17, XMM17b, 823 XMM18, XMM18b, 824 XMM19, XMM19b, 825 XMM20, XMM20b, 826 XMM21, XMM21b, 827 XMM22, XMM22b, 828 XMM23, XMM23b, 829 XMM24, XMM24b, 830 XMM25, XMM25b, 831 XMM26, XMM26b, 832 XMM27, XMM27b, 833 XMM28, XMM28b, 834 XMM29, XMM29b, 835 XMM30, XMM30b, 836 XMM31, XMM31b 837 #endif 838 ); 839 840 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 841 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 842 843 // Class for pre evex 32bit vector registers 844 reg_class vectors_reg_legacy(XMM0, 845 XMM1, 846 XMM2, 847 XMM3, 848 XMM4, 849 XMM5, 850 XMM6, 851 XMM7 852 #ifdef _LP64 853 ,XMM8, 854 XMM9, 855 XMM10, 856 XMM11, 857 XMM12, 858 XMM13, 859 XMM14, 860 XMM15 861 #endif 862 ); 863 864 // Class for evex 32bit vector registers 865 reg_class vectors_reg_evex(XMM0, 866 XMM1, 867 XMM2, 868 XMM3, 869 XMM4, 870 XMM5, 871 XMM6, 872 XMM7 873 #ifdef _LP64 874 ,XMM8, 875 XMM9, 876 XMM10, 877 XMM11, 878 XMM12, 879 XMM13, 880 XMM14, 881 XMM15, 882 XMM16, 883 XMM17, 884 XMM18, 885 XMM19, 886 XMM20, 887 XMM21, 888 XMM22, 889 XMM23, 890 XMM24, 891 XMM25, 892 XMM26, 893 XMM27, 894 XMM28, 895 XMM29, 896 XMM30, 897 XMM31 898 #endif 899 ); 900 901 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 902 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 903 904 // Class for all 64bit vector registers 905 reg_class vectord_reg_legacy(XMM0, XMM0b, 906 XMM1, XMM1b, 907 XMM2, XMM2b, 908 XMM3, XMM3b, 909 XMM4, XMM4b, 910 XMM5, XMM5b, 911 XMM6, XMM6b, 912 XMM7, XMM7b 913 #ifdef _LP64 914 ,XMM8, XMM8b, 915 XMM9, XMM9b, 916 XMM10, XMM10b, 917 XMM11, XMM11b, 918 XMM12, XMM12b, 919 XMM13, XMM13b, 920 XMM14, XMM14b, 921 XMM15, XMM15b 922 #endif 923 ); 924 925 // Class for all 64bit vector registers 926 reg_class vectord_reg_evex(XMM0, XMM0b, 927 XMM1, XMM1b, 928 XMM2, XMM2b, 929 XMM3, XMM3b, 930 XMM4, XMM4b, 931 XMM5, XMM5b, 932 XMM6, XMM6b, 933 XMM7, XMM7b 934 #ifdef _LP64 935 ,XMM8, XMM8b, 936 XMM9, XMM9b, 937 XMM10, XMM10b, 938 XMM11, XMM11b, 939 XMM12, XMM12b, 940 XMM13, XMM13b, 941 XMM14, XMM14b, 942 XMM15, XMM15b, 943 XMM16, XMM16b, 944 XMM17, XMM17b, 945 XMM18, XMM18b, 946 XMM19, XMM19b, 947 XMM20, XMM20b, 948 XMM21, XMM21b, 949 XMM22, XMM22b, 950 XMM23, XMM23b, 951 XMM24, XMM24b, 952 XMM25, XMM25b, 953 XMM26, XMM26b, 954 XMM27, XMM27b, 955 XMM28, XMM28b, 956 XMM29, XMM29b, 957 XMM30, XMM30b, 958 XMM31, XMM31b 959 #endif 960 ); 961 962 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 963 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 964 965 // Class for all 128bit vector registers 966 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 967 XMM1, XMM1b, XMM1c, XMM1d, 968 XMM2, XMM2b, XMM2c, XMM2d, 969 XMM3, XMM3b, XMM3c, XMM3d, 970 XMM4, XMM4b, XMM4c, XMM4d, 971 XMM5, XMM5b, XMM5c, XMM5d, 972 XMM6, XMM6b, XMM6c, XMM6d, 973 XMM7, XMM7b, XMM7c, XMM7d 974 #ifdef _LP64 975 ,XMM8, XMM8b, XMM8c, XMM8d, 976 XMM9, XMM9b, XMM9c, XMM9d, 977 XMM10, XMM10b, XMM10c, XMM10d, 978 XMM11, XMM11b, XMM11c, XMM11d, 979 XMM12, XMM12b, XMM12c, XMM12d, 980 XMM13, XMM13b, XMM13c, XMM13d, 981 XMM14, XMM14b, XMM14c, XMM14d, 982 XMM15, XMM15b, XMM15c, XMM15d 983 #endif 984 ); 985 986 // Class for all 128bit vector registers 987 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 988 XMM1, XMM1b, XMM1c, XMM1d, 989 XMM2, XMM2b, XMM2c, XMM2d, 990 XMM3, XMM3b, XMM3c, XMM3d, 991 XMM4, XMM4b, XMM4c, XMM4d, 992 XMM5, XMM5b, XMM5c, XMM5d, 993 XMM6, XMM6b, XMM6c, XMM6d, 994 XMM7, XMM7b, XMM7c, XMM7d 995 #ifdef _LP64 996 ,XMM8, XMM8b, XMM8c, XMM8d, 997 XMM9, XMM9b, XMM9c, XMM9d, 998 XMM10, XMM10b, XMM10c, XMM10d, 999 XMM11, XMM11b, XMM11c, XMM11d, 1000 XMM12, XMM12b, XMM12c, XMM12d, 1001 XMM13, XMM13b, XMM13c, XMM13d, 1002 XMM14, XMM14b, XMM14c, XMM14d, 1003 XMM15, XMM15b, XMM15c, XMM15d, 1004 XMM16, XMM16b, XMM16c, XMM16d, 1005 XMM17, XMM17b, XMM17c, XMM17d, 1006 XMM18, XMM18b, XMM18c, XMM18d, 1007 XMM19, XMM19b, XMM19c, XMM19d, 1008 XMM20, XMM20b, XMM20c, XMM20d, 1009 XMM21, XMM21b, XMM21c, XMM21d, 1010 XMM22, XMM22b, XMM22c, XMM22d, 1011 XMM23, XMM23b, XMM23c, XMM23d, 1012 XMM24, XMM24b, XMM24c, XMM24d, 1013 XMM25, XMM25b, XMM25c, XMM25d, 1014 XMM26, XMM26b, XMM26c, XMM26d, 1015 XMM27, XMM27b, XMM27c, XMM27d, 1016 XMM28, XMM28b, XMM28c, XMM28d, 1017 XMM29, XMM29b, XMM29c, XMM29d, 1018 XMM30, XMM30b, XMM30c, XMM30d, 1019 XMM31, XMM31b, XMM31c, XMM31d 1020 #endif 1021 ); 1022 1023 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1024 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1025 1026 // Class for all 256bit vector registers 1027 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1028 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1029 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1030 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1031 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1032 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1033 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1034 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1035 #ifdef _LP64 1036 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1037 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1038 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1039 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1040 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1041 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1042 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1043 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1044 #endif 1045 ); 1046 1047 // Class for all 256bit vector registers 1048 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1049 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1050 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1051 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1052 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1053 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1054 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1055 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1056 #ifdef _LP64 1057 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1058 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1059 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1060 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1061 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1062 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1063 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1064 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1065 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1066 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1067 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1068 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1069 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1070 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1071 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1072 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1073 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1074 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1075 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1076 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1077 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1078 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1079 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1080 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1081 #endif 1082 ); 1083 1084 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1085 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1086 1087 // Class for all 512bit vector registers 1088 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1089 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1090 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1091 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1092 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1093 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1094 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1095 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1096 #ifdef _LP64 1097 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1098 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1099 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1100 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1101 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1102 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1103 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1104 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1105 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1106 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1107 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1108 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1109 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1110 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1111 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1112 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1113 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1114 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1115 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1116 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1117 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1118 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1119 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1120 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1121 #endif 1122 ); 1123 1124 // Class for restricted 512bit vector registers 1125 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1126 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1127 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1128 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1129 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1130 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1131 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1132 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1133 #ifdef _LP64 1134 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1135 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1136 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1137 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1138 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1139 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1140 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1141 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1142 #endif 1143 ); 1144 1145 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1146 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1147 1148 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1149 %} 1150 1151 1152 //----------SOURCE BLOCK------------------------------------------------------- 1153 // This is a block of C++ code which provides values, functions, and 1154 // definitions necessary in the rest of the architecture description 1155 1156 source_hpp %{ 1157 // Header information of the source block. 1158 // Method declarations/definitions which are used outside 1159 // the ad-scope can conveniently be defined here. 1160 // 1161 // To keep related declarations/definitions/uses close together, 1162 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1163 1164 #include "runtime/vm_version.hpp" 1165 1166 class NativeJump; 1167 1168 class CallStubImpl { 1169 1170 //-------------------------------------------------------------- 1171 //---< Used for optimization in Compile::shorten_branches >--- 1172 //-------------------------------------------------------------- 1173 1174 public: 1175 // Size of call trampoline stub. 1176 static uint size_call_trampoline() { 1177 return 0; // no call trampolines on this platform 1178 } 1179 1180 // number of relocations needed by a call trampoline stub 1181 static uint reloc_call_trampoline() { 1182 return 0; // no call trampolines on this platform 1183 } 1184 }; 1185 1186 class HandlerImpl { 1187 1188 public: 1189 1190 static int emit_exception_handler(C2_MacroAssembler *masm); 1191 static int emit_deopt_handler(C2_MacroAssembler* masm); 1192 1193 static uint size_exception_handler() { 1194 // NativeCall instruction size is the same as NativeJump. 1195 // exception handler starts out as jump and can be patched to 1196 // a call be deoptimization. (4932387) 1197 // Note that this value is also credited (in output.cpp) to 1198 // the size of the code section. 1199 return NativeJump::instruction_size; 1200 } 1201 1202 #ifdef _LP64 1203 static uint size_deopt_handler() { 1204 // three 5 byte instructions plus one move for unreachable address. 1205 return 15+3; 1206 } 1207 #else 1208 static uint size_deopt_handler() { 1209 // NativeCall instruction size is the same as NativeJump. 1210 // exception handler starts out as jump and can be patched to 1211 // a call be deoptimization. (4932387) 1212 // Note that this value is also credited (in output.cpp) to 1213 // the size of the code section. 1214 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1215 } 1216 #endif 1217 }; 1218 1219 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1220 switch(bytes) { 1221 case 4: // fall-through 1222 case 8: // fall-through 1223 case 16: return Assembler::AVX_128bit; 1224 case 32: return Assembler::AVX_256bit; 1225 case 64: return Assembler::AVX_512bit; 1226 1227 default: { 1228 ShouldNotReachHere(); 1229 return Assembler::AVX_NoVec; 1230 } 1231 } 1232 } 1233 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1236 } 1237 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1239 uint def_idx = use->operand_index(opnd); 1240 Node* def = use->in(def_idx); 1241 return vector_length_encoding(def); 1242 } 1243 1244 static inline bool is_vector_popcount_predicate(BasicType bt) { 1245 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1246 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1247 } 1248 1249 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1250 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1251 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1252 } 1253 1254 class Node::PD { 1255 public: 1256 enum NodeFlags { 1257 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1258 Flag_sets_carry_flag = Node::_last_flag << 2, 1259 Flag_sets_parity_flag = Node::_last_flag << 3, 1260 Flag_sets_zero_flag = Node::_last_flag << 4, 1261 Flag_sets_overflow_flag = Node::_last_flag << 5, 1262 Flag_sets_sign_flag = Node::_last_flag << 6, 1263 Flag_clears_carry_flag = Node::_last_flag << 7, 1264 Flag_clears_parity_flag = Node::_last_flag << 8, 1265 Flag_clears_zero_flag = Node::_last_flag << 9, 1266 Flag_clears_overflow_flag = Node::_last_flag << 10, 1267 Flag_clears_sign_flag = Node::_last_flag << 11, 1268 _last_flag = Flag_clears_sign_flag 1269 }; 1270 }; 1271 1272 %} // end source_hpp 1273 1274 source %{ 1275 1276 #include "opto/addnode.hpp" 1277 #include "c2_intelJccErratum_x86.hpp" 1278 1279 void PhaseOutput::pd_perform_mach_node_analysis() { 1280 if (VM_Version::has_intel_jcc_erratum()) { 1281 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1282 _buf_sizes._code += extra_padding; 1283 } 1284 } 1285 1286 int MachNode::pd_alignment_required() const { 1287 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1288 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1289 return IntelJccErratum::largest_jcc_size() + 1; 1290 } else { 1291 return 1; 1292 } 1293 } 1294 1295 int MachNode::compute_padding(int current_offset) const { 1296 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1297 Compile* C = Compile::current(); 1298 PhaseOutput* output = C->output(); 1299 Block* block = output->block(); 1300 int index = output->index(); 1301 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1302 } else { 1303 return 0; 1304 } 1305 } 1306 1307 // Emit exception handler code. 1308 // Stuff framesize into a register and call a VM stub routine. 1309 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) { 1310 1311 // Note that the code buffer's insts_mark is always relative to insts. 1312 // That's why we must use the macroassembler to generate a handler. 1313 address base = __ start_a_stub(size_exception_handler()); 1314 if (base == nullptr) { 1315 ciEnv::current()->record_failure("CodeCache is full"); 1316 return 0; // CodeBuffer::expand failed 1317 } 1318 int offset = __ offset(); 1319 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1320 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1321 __ end_a_stub(); 1322 return offset; 1323 } 1324 1325 // Emit deopt handler code. 1326 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 address base = __ start_a_stub(size_deopt_handler()); 1331 if (base == nullptr) { 1332 ciEnv::current()->record_failure("CodeCache is full"); 1333 return 0; // CodeBuffer::expand failed 1334 } 1335 int offset = __ offset(); 1336 1337 #ifdef _LP64 1338 address the_pc = (address) __ pc(); 1339 Label next; 1340 // push a "the_pc" on the stack without destroying any registers 1341 // as they all may be live. 1342 1343 // push address of "next" 1344 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1345 __ bind(next); 1346 // adjust it so it matches "the_pc" 1347 __ subptr(Address(rsp, 0), __ offset() - offset); 1348 #else 1349 InternalAddress here(__ pc()); 1350 __ pushptr(here.addr(), noreg); 1351 #endif 1352 1353 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1354 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1355 __ end_a_stub(); 1356 return offset; 1357 } 1358 1359 static Assembler::Width widthForType(BasicType bt) { 1360 if (bt == T_BYTE) { 1361 return Assembler::B; 1362 } else if (bt == T_SHORT) { 1363 return Assembler::W; 1364 } else if (bt == T_INT) { 1365 return Assembler::D; 1366 } else { 1367 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1368 return Assembler::Q; 1369 } 1370 } 1371 1372 //============================================================================= 1373 1374 // Float masks come from different places depending on platform. 1375 #ifdef _LP64 1376 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1377 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1378 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1379 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1380 #else 1381 static address float_signmask() { return (address)float_signmask_pool; } 1382 static address float_signflip() { return (address)float_signflip_pool; } 1383 static address double_signmask() { return (address)double_signmask_pool; } 1384 static address double_signflip() { return (address)double_signflip_pool; } 1385 #endif 1386 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1387 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1388 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1389 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1390 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1391 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1392 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1393 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1394 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1395 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1396 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1397 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1398 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1399 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1400 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1401 1402 //============================================================================= 1403 bool Matcher::match_rule_supported(int opcode) { 1404 if (!has_match_rule(opcode)) { 1405 return false; // no match rule present 1406 } 1407 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1408 switch (opcode) { 1409 case Op_AbsVL: 1410 case Op_StoreVectorScatter: 1411 if (UseAVX < 3) { 1412 return false; 1413 } 1414 break; 1415 case Op_PopCountI: 1416 case Op_PopCountL: 1417 if (!UsePopCountInstruction) { 1418 return false; 1419 } 1420 break; 1421 case Op_PopCountVI: 1422 if (UseAVX < 2) { 1423 return false; 1424 } 1425 break; 1426 case Op_CompressV: 1427 case Op_ExpandV: 1428 case Op_PopCountVL: 1429 if (UseAVX < 2) { 1430 return false; 1431 } 1432 break; 1433 case Op_MulVI: 1434 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1435 return false; 1436 } 1437 break; 1438 case Op_MulVL: 1439 if (UseSSE < 4) { // only with SSE4_1 or AVX 1440 return false; 1441 } 1442 break; 1443 case Op_MulReductionVL: 1444 if (VM_Version::supports_avx512dq() == false) { 1445 return false; 1446 } 1447 break; 1448 case Op_AddReductionVL: 1449 if (UseSSE < 2) { // requires at least SSE2 1450 return false; 1451 } 1452 break; 1453 case Op_AbsVB: 1454 case Op_AbsVS: 1455 case Op_AbsVI: 1456 case Op_AddReductionVI: 1457 case Op_AndReductionV: 1458 case Op_OrReductionV: 1459 case Op_XorReductionV: 1460 if (UseSSE < 3) { // requires at least SSSE3 1461 return false; 1462 } 1463 break; 1464 case Op_VectorLoadShuffle: 1465 case Op_VectorRearrange: 1466 case Op_MulReductionVI: 1467 if (UseSSE < 4) { // requires at least SSE4 1468 return false; 1469 } 1470 break; 1471 case Op_IsInfiniteF: 1472 case Op_IsInfiniteD: 1473 if (!VM_Version::supports_avx512dq()) { 1474 return false; 1475 } 1476 break; 1477 case Op_SqrtVD: 1478 case Op_SqrtVF: 1479 case Op_VectorMaskCmp: 1480 case Op_VectorCastB2X: 1481 case Op_VectorCastS2X: 1482 case Op_VectorCastI2X: 1483 case Op_VectorCastL2X: 1484 case Op_VectorCastF2X: 1485 case Op_VectorCastD2X: 1486 case Op_VectorUCastB2X: 1487 case Op_VectorUCastS2X: 1488 case Op_VectorUCastI2X: 1489 case Op_VectorMaskCast: 1490 if (UseAVX < 1) { // enabled for AVX only 1491 return false; 1492 } 1493 break; 1494 case Op_PopulateIndex: 1495 if (!is_LP64 || (UseAVX < 2)) { 1496 return false; 1497 } 1498 break; 1499 case Op_RoundVF: 1500 if (UseAVX < 2) { // enabled for AVX2 only 1501 return false; 1502 } 1503 break; 1504 case Op_RoundVD: 1505 if (UseAVX < 3) { 1506 return false; // enabled for AVX3 only 1507 } 1508 break; 1509 case Op_CompareAndSwapL: 1510 #ifdef _LP64 1511 case Op_CompareAndSwapP: 1512 #endif 1513 break; 1514 case Op_StrIndexOf: 1515 if (!UseSSE42Intrinsics) { 1516 return false; 1517 } 1518 break; 1519 case Op_StrIndexOfChar: 1520 if (!UseSSE42Intrinsics) { 1521 return false; 1522 } 1523 break; 1524 case Op_OnSpinWait: 1525 if (VM_Version::supports_on_spin_wait() == false) { 1526 return false; 1527 } 1528 break; 1529 case Op_MulVB: 1530 case Op_LShiftVB: 1531 case Op_RShiftVB: 1532 case Op_URShiftVB: 1533 case Op_VectorInsert: 1534 case Op_VectorLoadMask: 1535 case Op_VectorStoreMask: 1536 case Op_VectorBlend: 1537 if (UseSSE < 4) { 1538 return false; 1539 } 1540 break; 1541 #ifdef _LP64 1542 case Op_MaxD: 1543 case Op_MaxF: 1544 case Op_MinD: 1545 case Op_MinF: 1546 if (UseAVX < 1) { // enabled for AVX only 1547 return false; 1548 } 1549 break; 1550 #endif 1551 case Op_CacheWB: 1552 case Op_CacheWBPreSync: 1553 case Op_CacheWBPostSync: 1554 if (!VM_Version::supports_data_cache_line_flush()) { 1555 return false; 1556 } 1557 break; 1558 case Op_ExtractB: 1559 case Op_ExtractL: 1560 case Op_ExtractI: 1561 case Op_RoundDoubleMode: 1562 if (UseSSE < 4) { 1563 return false; 1564 } 1565 break; 1566 case Op_RoundDoubleModeV: 1567 if (VM_Version::supports_avx() == false) { 1568 return false; // 128bit vroundpd is not available 1569 } 1570 break; 1571 case Op_LoadVectorGather: 1572 case Op_LoadVectorGatherMasked: 1573 if (UseAVX < 2) { 1574 return false; 1575 } 1576 break; 1577 case Op_FmaF: 1578 case Op_FmaD: 1579 case Op_FmaVD: 1580 case Op_FmaVF: 1581 if (!UseFMA) { 1582 return false; 1583 } 1584 break; 1585 case Op_MacroLogicV: 1586 if (UseAVX < 3 || !UseVectorMacroLogic) { 1587 return false; 1588 } 1589 break; 1590 1591 case Op_VectorCmpMasked: 1592 case Op_VectorMaskGen: 1593 if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { 1594 return false; 1595 } 1596 break; 1597 case Op_VectorMaskFirstTrue: 1598 case Op_VectorMaskLastTrue: 1599 case Op_VectorMaskTrueCount: 1600 case Op_VectorMaskToLong: 1601 if (!is_LP64 || UseAVX < 1) { 1602 return false; 1603 } 1604 break; 1605 case Op_RoundF: 1606 case Op_RoundD: 1607 if (!is_LP64) { 1608 return false; 1609 } 1610 break; 1611 case Op_CopySignD: 1612 case Op_CopySignF: 1613 if (UseAVX < 3 || !is_LP64) { 1614 return false; 1615 } 1616 if (!VM_Version::supports_avx512vl()) { 1617 return false; 1618 } 1619 break; 1620 #ifndef _LP64 1621 case Op_AddReductionVF: 1622 case Op_AddReductionVD: 1623 case Op_MulReductionVF: 1624 case Op_MulReductionVD: 1625 if (UseSSE < 1) { // requires at least SSE 1626 return false; 1627 } 1628 break; 1629 case Op_MulAddVS2VI: 1630 case Op_RShiftVL: 1631 case Op_AbsVD: 1632 case Op_NegVD: 1633 if (UseSSE < 2) { 1634 return false; 1635 } 1636 break; 1637 #endif // !LP64 1638 case Op_CompressBits: 1639 if (!VM_Version::supports_bmi2() || (!is_LP64 && UseSSE < 2)) { 1640 return false; 1641 } 1642 break; 1643 case Op_ExpandBits: 1644 if (!VM_Version::supports_bmi2() || (!is_LP64 && (UseSSE < 2 || !VM_Version::supports_bmi1()))) { 1645 return false; 1646 } 1647 break; 1648 case Op_SignumF: 1649 if (UseSSE < 1) { 1650 return false; 1651 } 1652 break; 1653 case Op_SignumD: 1654 if (UseSSE < 2) { 1655 return false; 1656 } 1657 break; 1658 case Op_CompressM: 1659 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1660 return false; 1661 } 1662 break; 1663 case Op_SqrtF: 1664 if (UseSSE < 1) { 1665 return false; 1666 } 1667 break; 1668 case Op_SqrtD: 1669 #ifdef _LP64 1670 if (UseSSE < 2) { 1671 return false; 1672 } 1673 #else 1674 // x86_32.ad has a special match rule for SqrtD. 1675 // Together with common x86 rules, this handles all UseSSE cases. 1676 #endif 1677 break; 1678 case Op_ConvF2HF: 1679 case Op_ConvHF2F: 1680 if (!VM_Version::supports_float16()) { 1681 return false; 1682 } 1683 break; 1684 case Op_VectorCastF2HF: 1685 case Op_VectorCastHF2F: 1686 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1687 return false; 1688 } 1689 break; 1690 } 1691 return true; // Match rules are supported by default. 1692 } 1693 1694 //------------------------------------------------------------------------ 1695 1696 static inline bool is_pop_count_instr_target(BasicType bt) { 1697 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1698 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1699 } 1700 1701 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) { 1702 return match_rule_supported_vector(opcode, vlen, bt); 1703 } 1704 1705 // Identify extra cases that we might want to provide match rules for vector nodes and 1706 // other intrinsics guarded with vector length (vlen) and element type (bt). 1707 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1708 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1709 if (!match_rule_supported(opcode)) { 1710 return false; 1711 } 1712 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1713 // * SSE2 supports 128bit vectors for all types; 1714 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1715 // * AVX2 supports 256bit vectors for all types; 1716 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1717 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1718 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1719 // And MaxVectorSize is taken into account as well. 1720 if (!vector_size_supported(bt, vlen)) { 1721 return false; 1722 } 1723 // Special cases which require vector length follow: 1724 // * implementation limitations 1725 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1726 // * 128bit vroundpd instruction is present only in AVX1 1727 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1728 switch (opcode) { 1729 case Op_AbsVF: 1730 case Op_NegVF: 1731 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1732 return false; // 512bit vandps and vxorps are not available 1733 } 1734 break; 1735 case Op_AbsVD: 1736 case Op_NegVD: 1737 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1738 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1739 } 1740 break; 1741 case Op_RotateRightV: 1742 case Op_RotateLeftV: 1743 if (bt != T_INT && bt != T_LONG) { 1744 return false; 1745 } // fallthrough 1746 case Op_MacroLogicV: 1747 if (!VM_Version::supports_evex() || 1748 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1749 return false; 1750 } 1751 break; 1752 case Op_ClearArray: 1753 case Op_VectorMaskGen: 1754 case Op_VectorCmpMasked: 1755 if (!is_LP64 || !VM_Version::supports_avx512bw()) { 1756 return false; 1757 } 1758 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1759 return false; 1760 } 1761 break; 1762 case Op_LoadVectorMasked: 1763 case Op_StoreVectorMasked: 1764 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1765 return false; 1766 } 1767 break; 1768 case Op_MaxV: 1769 case Op_MinV: 1770 if (UseSSE < 4 && is_integral_type(bt)) { 1771 return false; 1772 } 1773 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1774 // Float/Double intrinsics are enabled for AVX family currently. 1775 if (UseAVX == 0) { 1776 return false; 1777 } 1778 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1779 return false; 1780 } 1781 } 1782 break; 1783 case Op_CallLeafVector: 1784 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1785 return false; 1786 } 1787 break; 1788 case Op_AddReductionVI: 1789 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1790 return false; 1791 } 1792 // fallthrough 1793 case Op_AndReductionV: 1794 case Op_OrReductionV: 1795 case Op_XorReductionV: 1796 if (is_subword_type(bt) && (UseSSE < 4)) { 1797 return false; 1798 } 1799 #ifndef _LP64 1800 if (bt == T_BYTE || bt == T_LONG) { 1801 return false; 1802 } 1803 #endif 1804 break; 1805 #ifndef _LP64 1806 case Op_VectorInsert: 1807 if (bt == T_LONG || bt == T_DOUBLE) { 1808 return false; 1809 } 1810 break; 1811 #endif 1812 case Op_MinReductionV: 1813 case Op_MaxReductionV: 1814 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1815 return false; 1816 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1817 return false; 1818 } 1819 // Float/Double intrinsics enabled for AVX family. 1820 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1821 return false; 1822 } 1823 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1824 return false; 1825 } 1826 #ifndef _LP64 1827 if (bt == T_BYTE || bt == T_LONG) { 1828 return false; 1829 } 1830 #endif 1831 break; 1832 case Op_VectorTest: 1833 if (UseSSE < 4) { 1834 return false; // Implementation limitation 1835 } else if (size_in_bits < 32) { 1836 return false; // Implementation limitation 1837 } 1838 break; 1839 case Op_VectorLoadShuffle: 1840 case Op_VectorRearrange: 1841 if(vlen == 2) { 1842 return false; // Implementation limitation due to how shuffle is loaded 1843 } else if (size_in_bits == 256 && UseAVX < 2) { 1844 return false; // Implementation limitation 1845 } 1846 break; 1847 case Op_VectorLoadMask: 1848 case Op_VectorMaskCast: 1849 if (size_in_bits == 256 && UseAVX < 2) { 1850 return false; // Implementation limitation 1851 } 1852 // fallthrough 1853 case Op_VectorStoreMask: 1854 if (vlen == 2) { 1855 return false; // Implementation limitation 1856 } 1857 break; 1858 case Op_PopulateIndex: 1859 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1860 return false; 1861 } 1862 break; 1863 case Op_VectorCastB2X: 1864 case Op_VectorCastS2X: 1865 case Op_VectorCastI2X: 1866 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1867 return false; 1868 } 1869 break; 1870 case Op_VectorCastL2X: 1871 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1872 return false; 1873 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1874 return false; 1875 } 1876 break; 1877 case Op_VectorCastF2X: { 1878 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1879 // happen after intermediate conversion to integer and special handling 1880 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1881 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1882 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1883 return false; 1884 } 1885 } 1886 // fallthrough 1887 case Op_VectorCastD2X: 1888 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1889 return false; 1890 } 1891 break; 1892 case Op_VectorCastF2HF: 1893 case Op_VectorCastHF2F: 1894 if (!VM_Version::supports_f16c() && 1895 ((!VM_Version::supports_evex() || 1896 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1897 return false; 1898 } 1899 break; 1900 case Op_RoundVD: 1901 if (!VM_Version::supports_avx512dq()) { 1902 return false; 1903 } 1904 break; 1905 case Op_MulReductionVI: 1906 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1907 return false; 1908 } 1909 break; 1910 case Op_LoadVectorGatherMasked: 1911 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1912 return false; 1913 } 1914 if (is_subword_type(bt) && 1915 (!is_LP64 || 1916 (size_in_bits > 256 && !VM_Version::supports_avx512bw()) || 1917 (size_in_bits < 64) || 1918 (bt == T_SHORT && !VM_Version::supports_bmi2()))) { 1919 return false; 1920 } 1921 break; 1922 case Op_StoreVectorScatterMasked: 1923 case Op_StoreVectorScatter: 1924 if (is_subword_type(bt)) { 1925 return false; 1926 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1927 return false; 1928 } 1929 // fallthrough 1930 case Op_LoadVectorGather: 1931 if (!is_subword_type(bt) && size_in_bits == 64) { 1932 return false; 1933 } 1934 if (is_subword_type(bt) && size_in_bits < 64) { 1935 return false; 1936 } 1937 break; 1938 case Op_SelectFromTwoVector: 1939 if (size_in_bits < 128 || (size_in_bits < 512 && !VM_Version::supports_avx512vl())) { 1940 return false; 1941 } 1942 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 1943 return false; 1944 } 1945 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 1946 return false; 1947 } 1948 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) { 1949 return false; 1950 } 1951 break; 1952 case Op_MaskAll: 1953 if (!VM_Version::supports_evex()) { 1954 return false; 1955 } 1956 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1957 return false; 1958 } 1959 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1960 return false; 1961 } 1962 break; 1963 case Op_VectorMaskCmp: 1964 if (vlen < 2 || size_in_bits < 32) { 1965 return false; 1966 } 1967 break; 1968 case Op_CompressM: 1969 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1970 return false; 1971 } 1972 break; 1973 case Op_CompressV: 1974 case Op_ExpandV: 1975 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1976 return false; 1977 } 1978 if (!is_LP64 && !VM_Version::supports_avx512vl() && size_in_bits < 512) { 1979 return false; 1980 } 1981 if (size_in_bits < 128 ) { 1982 return false; 1983 } 1984 case Op_VectorLongToMask: 1985 if (UseAVX < 1 || !is_LP64) { 1986 return false; 1987 } 1988 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1989 return false; 1990 } 1991 break; 1992 case Op_SignumVD: 1993 case Op_SignumVF: 1994 if (UseAVX < 1) { 1995 return false; 1996 } 1997 break; 1998 case Op_PopCountVI: 1999 case Op_PopCountVL: { 2000 if (!is_pop_count_instr_target(bt) && 2001 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 2002 return false; 2003 } 2004 } 2005 break; 2006 case Op_ReverseV: 2007 case Op_ReverseBytesV: 2008 if (UseAVX < 2) { 2009 return false; 2010 } 2011 break; 2012 case Op_CountTrailingZerosV: 2013 case Op_CountLeadingZerosV: 2014 if (UseAVX < 2) { 2015 return false; 2016 } 2017 break; 2018 } 2019 return true; // Per default match rules are supported. 2020 } 2021 2022 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 2023 // ADLC based match_rule_supported routine checks for the existence of pattern based 2024 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 2025 // of their non-masked counterpart with mask edge being the differentiator. 2026 // This routine does a strict check on the existence of masked operation patterns 2027 // by returning a default false value for all the other opcodes apart from the 2028 // ones whose masked instruction patterns are defined in this file. 2029 if (!match_rule_supported_vector(opcode, vlen, bt)) { 2030 return false; 2031 } 2032 2033 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 2034 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 2035 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 2036 return false; 2037 } 2038 switch(opcode) { 2039 // Unary masked operations 2040 case Op_AbsVB: 2041 case Op_AbsVS: 2042 if(!VM_Version::supports_avx512bw()) { 2043 return false; // Implementation limitation 2044 } 2045 case Op_AbsVI: 2046 case Op_AbsVL: 2047 return true; 2048 2049 // Ternary masked operations 2050 case Op_FmaVF: 2051 case Op_FmaVD: 2052 return true; 2053 2054 case Op_MacroLogicV: 2055 if(bt != T_INT && bt != T_LONG) { 2056 return false; 2057 } 2058 return true; 2059 2060 // Binary masked operations 2061 case Op_AddVB: 2062 case Op_AddVS: 2063 case Op_SubVB: 2064 case Op_SubVS: 2065 case Op_MulVS: 2066 case Op_LShiftVS: 2067 case Op_RShiftVS: 2068 case Op_URShiftVS: 2069 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2070 if (!VM_Version::supports_avx512bw()) { 2071 return false; // Implementation limitation 2072 } 2073 return true; 2074 2075 case Op_MulVL: 2076 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2077 if (!VM_Version::supports_avx512dq()) { 2078 return false; // Implementation limitation 2079 } 2080 return true; 2081 2082 case Op_AndV: 2083 case Op_OrV: 2084 case Op_XorV: 2085 case Op_RotateRightV: 2086 case Op_RotateLeftV: 2087 if (bt != T_INT && bt != T_LONG) { 2088 return false; // Implementation limitation 2089 } 2090 return true; 2091 2092 case Op_VectorLoadMask: 2093 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2094 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2095 return false; 2096 } 2097 return true; 2098 2099 case Op_AddVI: 2100 case Op_AddVL: 2101 case Op_AddVF: 2102 case Op_AddVD: 2103 case Op_SubVI: 2104 case Op_SubVL: 2105 case Op_SubVF: 2106 case Op_SubVD: 2107 case Op_MulVI: 2108 case Op_MulVF: 2109 case Op_MulVD: 2110 case Op_DivVF: 2111 case Op_DivVD: 2112 case Op_SqrtVF: 2113 case Op_SqrtVD: 2114 case Op_LShiftVI: 2115 case Op_LShiftVL: 2116 case Op_RShiftVI: 2117 case Op_RShiftVL: 2118 case Op_URShiftVI: 2119 case Op_URShiftVL: 2120 case Op_LoadVectorMasked: 2121 case Op_StoreVectorMasked: 2122 case Op_LoadVectorGatherMasked: 2123 case Op_StoreVectorScatterMasked: 2124 return true; 2125 2126 case Op_MaxV: 2127 case Op_MinV: 2128 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2129 return false; // Implementation limitation 2130 } 2131 if (is_floating_point_type(bt)) { 2132 return false; // Implementation limitation 2133 } 2134 return true; 2135 2136 case Op_VectorMaskCmp: 2137 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2138 return false; // Implementation limitation 2139 } 2140 return true; 2141 2142 case Op_VectorRearrange: 2143 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2144 return false; // Implementation limitation 2145 } 2146 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2147 return false; // Implementation limitation 2148 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2149 return false; // Implementation limitation 2150 } 2151 return true; 2152 2153 // Binary Logical operations 2154 case Op_AndVMask: 2155 case Op_OrVMask: 2156 case Op_XorVMask: 2157 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2158 return false; // Implementation limitation 2159 } 2160 return true; 2161 2162 case Op_PopCountVI: 2163 case Op_PopCountVL: 2164 if (!is_pop_count_instr_target(bt)) { 2165 return false; 2166 } 2167 return true; 2168 2169 case Op_MaskAll: 2170 return true; 2171 2172 case Op_CountLeadingZerosV: 2173 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2174 return true; 2175 } 2176 default: 2177 return false; 2178 } 2179 } 2180 2181 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2182 return false; 2183 } 2184 2185 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2186 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2187 bool legacy = (generic_opnd->opcode() == LEGVEC); 2188 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2189 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2190 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2191 return new legVecZOper(); 2192 } 2193 if (legacy) { 2194 switch (ideal_reg) { 2195 case Op_VecS: return new legVecSOper(); 2196 case Op_VecD: return new legVecDOper(); 2197 case Op_VecX: return new legVecXOper(); 2198 case Op_VecY: return new legVecYOper(); 2199 case Op_VecZ: return new legVecZOper(); 2200 } 2201 } else { 2202 switch (ideal_reg) { 2203 case Op_VecS: return new vecSOper(); 2204 case Op_VecD: return new vecDOper(); 2205 case Op_VecX: return new vecXOper(); 2206 case Op_VecY: return new vecYOper(); 2207 case Op_VecZ: return new vecZOper(); 2208 } 2209 } 2210 ShouldNotReachHere(); 2211 return nullptr; 2212 } 2213 2214 bool Matcher::is_reg2reg_move(MachNode* m) { 2215 switch (m->rule()) { 2216 case MoveVec2Leg_rule: 2217 case MoveLeg2Vec_rule: 2218 case MoveF2VL_rule: 2219 case MoveF2LEG_rule: 2220 case MoveVL2F_rule: 2221 case MoveLEG2F_rule: 2222 case MoveD2VL_rule: 2223 case MoveD2LEG_rule: 2224 case MoveVL2D_rule: 2225 case MoveLEG2D_rule: 2226 return true; 2227 default: 2228 return false; 2229 } 2230 } 2231 2232 bool Matcher::is_generic_vector(MachOper* opnd) { 2233 switch (opnd->opcode()) { 2234 case VEC: 2235 case LEGVEC: 2236 return true; 2237 default: 2238 return false; 2239 } 2240 } 2241 2242 //------------------------------------------------------------------------ 2243 2244 const RegMask* Matcher::predicate_reg_mask(void) { 2245 return &_VECTMASK_REG_mask; 2246 } 2247 2248 // Max vector size in bytes. 0 if not supported. 2249 int Matcher::vector_width_in_bytes(BasicType bt) { 2250 assert(is_java_primitive(bt), "only primitive type vectors"); 2251 if (UseSSE < 2) return 0; 2252 // SSE2 supports 128bit vectors for all types. 2253 // AVX2 supports 256bit vectors for all types. 2254 // AVX2/EVEX supports 512bit vectors for all types. 2255 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2256 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2257 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2258 size = (UseAVX > 2) ? 64 : 32; 2259 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2260 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2261 // Use flag to limit vector size. 2262 size = MIN2(size,(int)MaxVectorSize); 2263 // Minimum 2 values in vector (or 4 for bytes). 2264 switch (bt) { 2265 case T_DOUBLE: 2266 case T_LONG: 2267 if (size < 16) return 0; 2268 break; 2269 case T_FLOAT: 2270 case T_INT: 2271 if (size < 8) return 0; 2272 break; 2273 case T_BOOLEAN: 2274 if (size < 4) return 0; 2275 break; 2276 case T_CHAR: 2277 if (size < 4) return 0; 2278 break; 2279 case T_BYTE: 2280 if (size < 4) return 0; 2281 break; 2282 case T_SHORT: 2283 if (size < 4) return 0; 2284 break; 2285 default: 2286 ShouldNotReachHere(); 2287 } 2288 return size; 2289 } 2290 2291 // Limits on vector size (number of elements) loaded into vector. 2292 int Matcher::max_vector_size(const BasicType bt) { 2293 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2294 } 2295 int Matcher::min_vector_size(const BasicType bt) { 2296 int max_size = max_vector_size(bt); 2297 // Min size which can be loaded into vector is 4 bytes. 2298 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2299 // Support for calling svml double64 vectors 2300 if (bt == T_DOUBLE) { 2301 size = 1; 2302 } 2303 return MIN2(size,max_size); 2304 } 2305 2306 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) { 2307 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2308 // by default on Cascade Lake 2309 if (VM_Version::is_default_intel_cascade_lake()) { 2310 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2311 } 2312 return Matcher::max_vector_size(bt); 2313 } 2314 2315 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2316 return -1; 2317 } 2318 2319 // Vector ideal reg corresponding to specified size in bytes 2320 uint Matcher::vector_ideal_reg(int size) { 2321 assert(MaxVectorSize >= size, ""); 2322 switch(size) { 2323 case 4: return Op_VecS; 2324 case 8: return Op_VecD; 2325 case 16: return Op_VecX; 2326 case 32: return Op_VecY; 2327 case 64: return Op_VecZ; 2328 } 2329 ShouldNotReachHere(); 2330 return 0; 2331 } 2332 2333 // Check for shift by small constant as well 2334 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2335 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2336 shift->in(2)->get_int() <= 3 && 2337 // Are there other uses besides address expressions? 2338 !matcher->is_visited(shift)) { 2339 address_visited.set(shift->_idx); // Flag as address_visited 2340 mstack.push(shift->in(2), Matcher::Visit); 2341 Node *conv = shift->in(1); 2342 #ifdef _LP64 2343 // Allow Matcher to match the rule which bypass 2344 // ConvI2L operation for an array index on LP64 2345 // if the index value is positive. 2346 if (conv->Opcode() == Op_ConvI2L && 2347 conv->as_Type()->type()->is_long()->_lo >= 0 && 2348 // Are there other uses besides address expressions? 2349 !matcher->is_visited(conv)) { 2350 address_visited.set(conv->_idx); // Flag as address_visited 2351 mstack.push(conv->in(1), Matcher::Pre_Visit); 2352 } else 2353 #endif 2354 mstack.push(conv, Matcher::Pre_Visit); 2355 return true; 2356 } 2357 return false; 2358 } 2359 2360 // This function identifies sub-graphs in which a 'load' node is 2361 // input to two different nodes, and such that it can be matched 2362 // with BMI instructions like blsi, blsr, etc. 2363 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2364 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2365 // refers to the same node. 2366 // 2367 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2368 // This is a temporary solution until we make DAGs expressible in ADL. 2369 template<typename ConType> 2370 class FusedPatternMatcher { 2371 Node* _op1_node; 2372 Node* _mop_node; 2373 int _con_op; 2374 2375 static int match_next(Node* n, int next_op, int next_op_idx) { 2376 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2377 return -1; 2378 } 2379 2380 if (next_op_idx == -1) { // n is commutative, try rotations 2381 if (n->in(1)->Opcode() == next_op) { 2382 return 1; 2383 } else if (n->in(2)->Opcode() == next_op) { 2384 return 2; 2385 } 2386 } else { 2387 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2388 if (n->in(next_op_idx)->Opcode() == next_op) { 2389 return next_op_idx; 2390 } 2391 } 2392 return -1; 2393 } 2394 2395 public: 2396 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2397 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2398 2399 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2400 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2401 typename ConType::NativeType con_value) { 2402 if (_op1_node->Opcode() != op1) { 2403 return false; 2404 } 2405 if (_mop_node->outcnt() > 2) { 2406 return false; 2407 } 2408 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2409 if (op1_op2_idx == -1) { 2410 return false; 2411 } 2412 // Memory operation must be the other edge 2413 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2414 2415 // Check that the mop node is really what we want 2416 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2417 Node* op2_node = _op1_node->in(op1_op2_idx); 2418 if (op2_node->outcnt() > 1) { 2419 return false; 2420 } 2421 assert(op2_node->Opcode() == op2, "Should be"); 2422 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2423 if (op2_con_idx == -1) { 2424 return false; 2425 } 2426 // Memory operation must be the other edge 2427 int op2_mop_idx = (op2_con_idx & 1) + 1; 2428 // Check that the memory operation is the same node 2429 if (op2_node->in(op2_mop_idx) == _mop_node) { 2430 // Now check the constant 2431 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2432 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2433 return true; 2434 } 2435 } 2436 } 2437 return false; 2438 } 2439 }; 2440 2441 static bool is_bmi_pattern(Node* n, Node* m) { 2442 assert(UseBMI1Instructions, "sanity"); 2443 if (n != nullptr && m != nullptr) { 2444 if (m->Opcode() == Op_LoadI) { 2445 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2446 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2447 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2448 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2449 } else if (m->Opcode() == Op_LoadL) { 2450 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2451 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2452 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2453 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2454 } 2455 } 2456 return false; 2457 } 2458 2459 // Should the matcher clone input 'm' of node 'n'? 2460 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2461 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2462 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2463 mstack.push(m, Visit); 2464 return true; 2465 } 2466 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2467 mstack.push(m, Visit); // m = ShiftCntV 2468 return true; 2469 } 2470 if (is_encode_and_store_pattern(n, m)) { 2471 mstack.push(m, Visit); 2472 return true; 2473 } 2474 return false; 2475 } 2476 2477 // Should the Matcher clone shifts on addressing modes, expecting them 2478 // to be subsumed into complex addressing expressions or compute them 2479 // into registers? 2480 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2481 Node *off = m->in(AddPNode::Offset); 2482 if (off->is_Con()) { 2483 address_visited.test_set(m->_idx); // Flag as address_visited 2484 Node *adr = m->in(AddPNode::Address); 2485 2486 // Intel can handle 2 adds in addressing mode 2487 // AtomicAdd is not an addressing expression. 2488 // Cheap to find it by looking for screwy base. 2489 if (adr->is_AddP() && 2490 !adr->in(AddPNode::Base)->is_top() && 2491 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2492 // Are there other uses besides address expressions? 2493 !is_visited(adr)) { 2494 address_visited.set(adr->_idx); // Flag as address_visited 2495 Node *shift = adr->in(AddPNode::Offset); 2496 if (!clone_shift(shift, this, mstack, address_visited)) { 2497 mstack.push(shift, Pre_Visit); 2498 } 2499 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2500 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2501 } else { 2502 mstack.push(adr, Pre_Visit); 2503 } 2504 2505 // Clone X+offset as it also folds into most addressing expressions 2506 mstack.push(off, Visit); 2507 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2508 return true; 2509 } else if (clone_shift(off, this, mstack, address_visited)) { 2510 address_visited.test_set(m->_idx); // Flag as address_visited 2511 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2512 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2513 return true; 2514 } 2515 return false; 2516 } 2517 2518 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2519 switch (bt) { 2520 case BoolTest::eq: 2521 return Assembler::eq; 2522 case BoolTest::ne: 2523 return Assembler::neq; 2524 case BoolTest::le: 2525 case BoolTest::ule: 2526 return Assembler::le; 2527 case BoolTest::ge: 2528 case BoolTest::uge: 2529 return Assembler::nlt; 2530 case BoolTest::lt: 2531 case BoolTest::ult: 2532 return Assembler::lt; 2533 case BoolTest::gt: 2534 case BoolTest::ugt: 2535 return Assembler::nle; 2536 default : ShouldNotReachHere(); return Assembler::_false; 2537 } 2538 } 2539 2540 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2541 switch (bt) { 2542 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2543 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2544 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2545 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2546 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2547 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2548 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2549 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2550 } 2551 } 2552 2553 // Helper methods for MachSpillCopyNode::implementation(). 2554 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 2555 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2556 assert(ireg == Op_VecS || // 32bit vector 2557 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2558 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi), 2559 "no non-adjacent vector moves" ); 2560 if (masm) { 2561 switch (ireg) { 2562 case Op_VecS: // copy whole register 2563 case Op_VecD: 2564 case Op_VecX: 2565 #ifndef _LP64 2566 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2567 #else 2568 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2569 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2570 } else { 2571 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2572 } 2573 #endif 2574 break; 2575 case Op_VecY: 2576 #ifndef _LP64 2577 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2578 #else 2579 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2580 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2581 } else { 2582 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2583 } 2584 #endif 2585 break; 2586 case Op_VecZ: 2587 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2588 break; 2589 default: 2590 ShouldNotReachHere(); 2591 } 2592 #ifndef PRODUCT 2593 } else { 2594 switch (ireg) { 2595 case Op_VecS: 2596 case Op_VecD: 2597 case Op_VecX: 2598 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2599 break; 2600 case Op_VecY: 2601 case Op_VecZ: 2602 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2603 break; 2604 default: 2605 ShouldNotReachHere(); 2606 } 2607 #endif 2608 } 2609 } 2610 2611 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 2612 int stack_offset, int reg, uint ireg, outputStream* st) { 2613 if (masm) { 2614 if (is_load) { 2615 switch (ireg) { 2616 case Op_VecS: 2617 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2618 break; 2619 case Op_VecD: 2620 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2621 break; 2622 case Op_VecX: 2623 #ifndef _LP64 2624 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2625 #else 2626 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2627 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2628 } else { 2629 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2630 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2631 } 2632 #endif 2633 break; 2634 case Op_VecY: 2635 #ifndef _LP64 2636 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2637 #else 2638 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2639 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2640 } else { 2641 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2642 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2643 } 2644 #endif 2645 break; 2646 case Op_VecZ: 2647 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2648 break; 2649 default: 2650 ShouldNotReachHere(); 2651 } 2652 } else { // store 2653 switch (ireg) { 2654 case Op_VecS: 2655 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2656 break; 2657 case Op_VecD: 2658 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2659 break; 2660 case Op_VecX: 2661 #ifndef _LP64 2662 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2663 #else 2664 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2665 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2666 } 2667 else { 2668 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2669 } 2670 #endif 2671 break; 2672 case Op_VecY: 2673 #ifndef _LP64 2674 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2675 #else 2676 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2677 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2678 } 2679 else { 2680 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2681 } 2682 #endif 2683 break; 2684 case Op_VecZ: 2685 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2686 break; 2687 default: 2688 ShouldNotReachHere(); 2689 } 2690 } 2691 #ifndef PRODUCT 2692 } else { 2693 if (is_load) { 2694 switch (ireg) { 2695 case Op_VecS: 2696 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2697 break; 2698 case Op_VecD: 2699 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2700 break; 2701 case Op_VecX: 2702 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2703 break; 2704 case Op_VecY: 2705 case Op_VecZ: 2706 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2707 break; 2708 default: 2709 ShouldNotReachHere(); 2710 } 2711 } else { // store 2712 switch (ireg) { 2713 case Op_VecS: 2714 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2715 break; 2716 case Op_VecD: 2717 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2718 break; 2719 case Op_VecX: 2720 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2721 break; 2722 case Op_VecY: 2723 case Op_VecZ: 2724 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2725 break; 2726 default: 2727 ShouldNotReachHere(); 2728 } 2729 } 2730 #endif 2731 } 2732 } 2733 2734 template <class T> 2735 static inline GrowableArray<jvalue>* vreplicate_imm(BasicType bt, T con, int len) { 2736 GrowableArray<jvalue>* val = new GrowableArray<jvalue>(len); 2737 jvalue ele; 2738 switch (bt) { 2739 case T_BYTE: ele.b = con; break; 2740 case T_SHORT: ele.s = con; break; 2741 case T_INT: ele.i = con; break; 2742 case T_LONG: ele.j = con; break; 2743 case T_FLOAT: ele.f = con; break; 2744 case T_DOUBLE: ele.d = con; break; 2745 default: ShouldNotReachHere(); 2746 } 2747 for (int i = 0; i < len; i++) { 2748 val->append(ele); 2749 } 2750 return val; 2751 } 2752 2753 static inline jlong high_bit_set(BasicType bt) { 2754 switch (bt) { 2755 case T_BYTE: return 0x8080808080808080; 2756 case T_SHORT: return 0x8000800080008000; 2757 case T_INT: return 0x8000000080000000; 2758 case T_LONG: return 0x8000000000000000; 2759 default: 2760 ShouldNotReachHere(); 2761 return 0; 2762 } 2763 } 2764 2765 #ifndef PRODUCT 2766 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2767 st->print("nop \t# %d bytes pad for loops and calls", _count); 2768 } 2769 #endif 2770 2771 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const { 2772 __ nop(_count); 2773 } 2774 2775 uint MachNopNode::size(PhaseRegAlloc*) const { 2776 return _count; 2777 } 2778 2779 #ifndef PRODUCT 2780 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2781 st->print("# breakpoint"); 2782 } 2783 #endif 2784 2785 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const { 2786 __ int3(); 2787 } 2788 2789 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2790 return MachNode::size(ra_); 2791 } 2792 2793 %} 2794 2795 encode %{ 2796 2797 enc_class call_epilog %{ 2798 if (VerifyStackAtCalls) { 2799 // Check that stack depth is unchanged: find majik cookie on stack 2800 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2801 Label L; 2802 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2803 __ jccb(Assembler::equal, L); 2804 // Die if stack mismatch 2805 __ int3(); 2806 __ bind(L); 2807 } 2808 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) { 2809 // The last return value is not set by the callee but used to pass IsInit information to compiled code. 2810 // Search for the corresponding projection, get the register and emit code that initialized it. 2811 uint con = (tf()->range_cc()->cnt() - 1); 2812 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { 2813 ProjNode* proj = fast_out(i)->as_Proj(); 2814 if (proj->_con == con) { 2815 // Set IsInit if rax is non-null (a non-null value is returned buffered or scalarized) 2816 OptoReg::Name optoReg = ra_->get_reg_first(proj); 2817 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP)); 2818 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1; 2819 __ testq(rax, rax); 2820 __ setb(Assembler::notZero, toReg); 2821 __ movzbl(toReg, toReg); 2822 if (reg->is_stack()) { 2823 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size; 2824 __ movq(Address(rsp, st_off), toReg); 2825 } 2826 break; 2827 } 2828 } 2829 if (return_value_is_used()) { 2830 // An inline type is returned as fields in multiple registers. 2831 // Rax either contains an oop if the inline type is buffered or a pointer 2832 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax 2833 // if the lowest bit is set to allow C2 to use the oop after null checking. 2834 // rax &= (rax & 1) - 1 2835 __ movptr(rscratch1, rax); 2836 __ andptr(rscratch1, 0x1); 2837 __ subptr(rscratch1, 0x1); 2838 __ andptr(rax, rscratch1); 2839 } 2840 } 2841 %} 2842 2843 %} 2844 2845 // Operands for bound floating pointer register arguments 2846 operand rxmm0() %{ 2847 constraint(ALLOC_IN_RC(xmm0_reg)); 2848 match(VecX); 2849 format%{%} 2850 interface(REG_INTER); 2851 %} 2852 2853 //----------OPERANDS----------------------------------------------------------- 2854 // Operand definitions must precede instruction definitions for correct parsing 2855 // in the ADLC because operands constitute user defined types which are used in 2856 // instruction definitions. 2857 2858 // Vectors 2859 2860 // Dummy generic vector class. Should be used for all vector operands. 2861 // Replaced with vec[SDXYZ] during post-selection pass. 2862 operand vec() %{ 2863 constraint(ALLOC_IN_RC(dynamic)); 2864 match(VecX); 2865 match(VecY); 2866 match(VecZ); 2867 match(VecS); 2868 match(VecD); 2869 2870 format %{ %} 2871 interface(REG_INTER); 2872 %} 2873 2874 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2875 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2876 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2877 // runtime code generation via reg_class_dynamic. 2878 operand legVec() %{ 2879 constraint(ALLOC_IN_RC(dynamic)); 2880 match(VecX); 2881 match(VecY); 2882 match(VecZ); 2883 match(VecS); 2884 match(VecD); 2885 2886 format %{ %} 2887 interface(REG_INTER); 2888 %} 2889 2890 // Replaces vec during post-selection cleanup. See above. 2891 operand vecS() %{ 2892 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2893 match(VecS); 2894 2895 format %{ %} 2896 interface(REG_INTER); 2897 %} 2898 2899 // Replaces legVec during post-selection cleanup. See above. 2900 operand legVecS() %{ 2901 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2902 match(VecS); 2903 2904 format %{ %} 2905 interface(REG_INTER); 2906 %} 2907 2908 // Replaces vec during post-selection cleanup. See above. 2909 operand vecD() %{ 2910 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2911 match(VecD); 2912 2913 format %{ %} 2914 interface(REG_INTER); 2915 %} 2916 2917 // Replaces legVec during post-selection cleanup. See above. 2918 operand legVecD() %{ 2919 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2920 match(VecD); 2921 2922 format %{ %} 2923 interface(REG_INTER); 2924 %} 2925 2926 // Replaces vec during post-selection cleanup. See above. 2927 operand vecX() %{ 2928 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2929 match(VecX); 2930 2931 format %{ %} 2932 interface(REG_INTER); 2933 %} 2934 2935 // Replaces legVec during post-selection cleanup. See above. 2936 operand legVecX() %{ 2937 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2938 match(VecX); 2939 2940 format %{ %} 2941 interface(REG_INTER); 2942 %} 2943 2944 // Replaces vec during post-selection cleanup. See above. 2945 operand vecY() %{ 2946 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2947 match(VecY); 2948 2949 format %{ %} 2950 interface(REG_INTER); 2951 %} 2952 2953 // Replaces legVec during post-selection cleanup. See above. 2954 operand legVecY() %{ 2955 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2956 match(VecY); 2957 2958 format %{ %} 2959 interface(REG_INTER); 2960 %} 2961 2962 // Replaces vec during post-selection cleanup. See above. 2963 operand vecZ() %{ 2964 constraint(ALLOC_IN_RC(vectorz_reg)); 2965 match(VecZ); 2966 2967 format %{ %} 2968 interface(REG_INTER); 2969 %} 2970 2971 // Replaces legVec during post-selection cleanup. See above. 2972 operand legVecZ() %{ 2973 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2974 match(VecZ); 2975 2976 format %{ %} 2977 interface(REG_INTER); 2978 %} 2979 2980 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2981 2982 // ============================================================================ 2983 2984 instruct ShouldNotReachHere() %{ 2985 match(Halt); 2986 format %{ "stop\t# ShouldNotReachHere" %} 2987 ins_encode %{ 2988 if (is_reachable()) { 2989 __ stop(_halt_reason); 2990 } 2991 %} 2992 ins_pipe(pipe_slow); 2993 %} 2994 2995 // ============================================================================ 2996 2997 instruct addF_reg(regF dst, regF src) %{ 2998 predicate((UseSSE>=1) && (UseAVX == 0)); 2999 match(Set dst (AddF dst src)); 3000 3001 format %{ "addss $dst, $src" %} 3002 ins_cost(150); 3003 ins_encode %{ 3004 __ addss($dst$$XMMRegister, $src$$XMMRegister); 3005 %} 3006 ins_pipe(pipe_slow); 3007 %} 3008 3009 instruct addF_mem(regF dst, memory src) %{ 3010 predicate((UseSSE>=1) && (UseAVX == 0)); 3011 match(Set dst (AddF dst (LoadF src))); 3012 3013 format %{ "addss $dst, $src" %} 3014 ins_cost(150); 3015 ins_encode %{ 3016 __ addss($dst$$XMMRegister, $src$$Address); 3017 %} 3018 ins_pipe(pipe_slow); 3019 %} 3020 3021 instruct addF_imm(regF dst, immF con) %{ 3022 predicate((UseSSE>=1) && (UseAVX == 0)); 3023 match(Set dst (AddF dst con)); 3024 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3025 ins_cost(150); 3026 ins_encode %{ 3027 __ addss($dst$$XMMRegister, $constantaddress($con)); 3028 %} 3029 ins_pipe(pipe_slow); 3030 %} 3031 3032 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 3033 predicate(UseAVX > 0); 3034 match(Set dst (AddF src1 src2)); 3035 3036 format %{ "vaddss $dst, $src1, $src2" %} 3037 ins_cost(150); 3038 ins_encode %{ 3039 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3040 %} 3041 ins_pipe(pipe_slow); 3042 %} 3043 3044 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 3045 predicate(UseAVX > 0); 3046 match(Set dst (AddF src1 (LoadF src2))); 3047 3048 format %{ "vaddss $dst, $src1, $src2" %} 3049 ins_cost(150); 3050 ins_encode %{ 3051 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3052 %} 3053 ins_pipe(pipe_slow); 3054 %} 3055 3056 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 3057 predicate(UseAVX > 0); 3058 match(Set dst (AddF src con)); 3059 3060 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3061 ins_cost(150); 3062 ins_encode %{ 3063 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3064 %} 3065 ins_pipe(pipe_slow); 3066 %} 3067 3068 instruct addD_reg(regD dst, regD src) %{ 3069 predicate((UseSSE>=2) && (UseAVX == 0)); 3070 match(Set dst (AddD dst src)); 3071 3072 format %{ "addsd $dst, $src" %} 3073 ins_cost(150); 3074 ins_encode %{ 3075 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 3076 %} 3077 ins_pipe(pipe_slow); 3078 %} 3079 3080 instruct addD_mem(regD dst, memory src) %{ 3081 predicate((UseSSE>=2) && (UseAVX == 0)); 3082 match(Set dst (AddD dst (LoadD src))); 3083 3084 format %{ "addsd $dst, $src" %} 3085 ins_cost(150); 3086 ins_encode %{ 3087 __ addsd($dst$$XMMRegister, $src$$Address); 3088 %} 3089 ins_pipe(pipe_slow); 3090 %} 3091 3092 instruct addD_imm(regD dst, immD con) %{ 3093 predicate((UseSSE>=2) && (UseAVX == 0)); 3094 match(Set dst (AddD dst con)); 3095 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3096 ins_cost(150); 3097 ins_encode %{ 3098 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3099 %} 3100 ins_pipe(pipe_slow); 3101 %} 3102 3103 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3104 predicate(UseAVX > 0); 3105 match(Set dst (AddD src1 src2)); 3106 3107 format %{ "vaddsd $dst, $src1, $src2" %} 3108 ins_cost(150); 3109 ins_encode %{ 3110 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3111 %} 3112 ins_pipe(pipe_slow); 3113 %} 3114 3115 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3116 predicate(UseAVX > 0); 3117 match(Set dst (AddD src1 (LoadD src2))); 3118 3119 format %{ "vaddsd $dst, $src1, $src2" %} 3120 ins_cost(150); 3121 ins_encode %{ 3122 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3123 %} 3124 ins_pipe(pipe_slow); 3125 %} 3126 3127 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3128 predicate(UseAVX > 0); 3129 match(Set dst (AddD src con)); 3130 3131 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3132 ins_cost(150); 3133 ins_encode %{ 3134 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3135 %} 3136 ins_pipe(pipe_slow); 3137 %} 3138 3139 instruct subF_reg(regF dst, regF src) %{ 3140 predicate((UseSSE>=1) && (UseAVX == 0)); 3141 match(Set dst (SubF dst src)); 3142 3143 format %{ "subss $dst, $src" %} 3144 ins_cost(150); 3145 ins_encode %{ 3146 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3147 %} 3148 ins_pipe(pipe_slow); 3149 %} 3150 3151 instruct subF_mem(regF dst, memory src) %{ 3152 predicate((UseSSE>=1) && (UseAVX == 0)); 3153 match(Set dst (SubF dst (LoadF src))); 3154 3155 format %{ "subss $dst, $src" %} 3156 ins_cost(150); 3157 ins_encode %{ 3158 __ subss($dst$$XMMRegister, $src$$Address); 3159 %} 3160 ins_pipe(pipe_slow); 3161 %} 3162 3163 instruct subF_imm(regF dst, immF con) %{ 3164 predicate((UseSSE>=1) && (UseAVX == 0)); 3165 match(Set dst (SubF dst con)); 3166 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3167 ins_cost(150); 3168 ins_encode %{ 3169 __ subss($dst$$XMMRegister, $constantaddress($con)); 3170 %} 3171 ins_pipe(pipe_slow); 3172 %} 3173 3174 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3175 predicate(UseAVX > 0); 3176 match(Set dst (SubF src1 src2)); 3177 3178 format %{ "vsubss $dst, $src1, $src2" %} 3179 ins_cost(150); 3180 ins_encode %{ 3181 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3182 %} 3183 ins_pipe(pipe_slow); 3184 %} 3185 3186 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3187 predicate(UseAVX > 0); 3188 match(Set dst (SubF src1 (LoadF src2))); 3189 3190 format %{ "vsubss $dst, $src1, $src2" %} 3191 ins_cost(150); 3192 ins_encode %{ 3193 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3194 %} 3195 ins_pipe(pipe_slow); 3196 %} 3197 3198 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3199 predicate(UseAVX > 0); 3200 match(Set dst (SubF src con)); 3201 3202 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3203 ins_cost(150); 3204 ins_encode %{ 3205 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3206 %} 3207 ins_pipe(pipe_slow); 3208 %} 3209 3210 instruct subD_reg(regD dst, regD src) %{ 3211 predicate((UseSSE>=2) && (UseAVX == 0)); 3212 match(Set dst (SubD dst src)); 3213 3214 format %{ "subsd $dst, $src" %} 3215 ins_cost(150); 3216 ins_encode %{ 3217 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3218 %} 3219 ins_pipe(pipe_slow); 3220 %} 3221 3222 instruct subD_mem(regD dst, memory src) %{ 3223 predicate((UseSSE>=2) && (UseAVX == 0)); 3224 match(Set dst (SubD dst (LoadD src))); 3225 3226 format %{ "subsd $dst, $src" %} 3227 ins_cost(150); 3228 ins_encode %{ 3229 __ subsd($dst$$XMMRegister, $src$$Address); 3230 %} 3231 ins_pipe(pipe_slow); 3232 %} 3233 3234 instruct subD_imm(regD dst, immD con) %{ 3235 predicate((UseSSE>=2) && (UseAVX == 0)); 3236 match(Set dst (SubD dst con)); 3237 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3238 ins_cost(150); 3239 ins_encode %{ 3240 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3241 %} 3242 ins_pipe(pipe_slow); 3243 %} 3244 3245 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3246 predicate(UseAVX > 0); 3247 match(Set dst (SubD src1 src2)); 3248 3249 format %{ "vsubsd $dst, $src1, $src2" %} 3250 ins_cost(150); 3251 ins_encode %{ 3252 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3253 %} 3254 ins_pipe(pipe_slow); 3255 %} 3256 3257 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3258 predicate(UseAVX > 0); 3259 match(Set dst (SubD src1 (LoadD src2))); 3260 3261 format %{ "vsubsd $dst, $src1, $src2" %} 3262 ins_cost(150); 3263 ins_encode %{ 3264 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3265 %} 3266 ins_pipe(pipe_slow); 3267 %} 3268 3269 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3270 predicate(UseAVX > 0); 3271 match(Set dst (SubD src con)); 3272 3273 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3274 ins_cost(150); 3275 ins_encode %{ 3276 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3277 %} 3278 ins_pipe(pipe_slow); 3279 %} 3280 3281 instruct mulF_reg(regF dst, regF src) %{ 3282 predicate((UseSSE>=1) && (UseAVX == 0)); 3283 match(Set dst (MulF dst src)); 3284 3285 format %{ "mulss $dst, $src" %} 3286 ins_cost(150); 3287 ins_encode %{ 3288 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3289 %} 3290 ins_pipe(pipe_slow); 3291 %} 3292 3293 instruct mulF_mem(regF dst, memory src) %{ 3294 predicate((UseSSE>=1) && (UseAVX == 0)); 3295 match(Set dst (MulF dst (LoadF src))); 3296 3297 format %{ "mulss $dst, $src" %} 3298 ins_cost(150); 3299 ins_encode %{ 3300 __ mulss($dst$$XMMRegister, $src$$Address); 3301 %} 3302 ins_pipe(pipe_slow); 3303 %} 3304 3305 instruct mulF_imm(regF dst, immF con) %{ 3306 predicate((UseSSE>=1) && (UseAVX == 0)); 3307 match(Set dst (MulF dst con)); 3308 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3309 ins_cost(150); 3310 ins_encode %{ 3311 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3312 %} 3313 ins_pipe(pipe_slow); 3314 %} 3315 3316 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3317 predicate(UseAVX > 0); 3318 match(Set dst (MulF src1 src2)); 3319 3320 format %{ "vmulss $dst, $src1, $src2" %} 3321 ins_cost(150); 3322 ins_encode %{ 3323 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3324 %} 3325 ins_pipe(pipe_slow); 3326 %} 3327 3328 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3329 predicate(UseAVX > 0); 3330 match(Set dst (MulF src1 (LoadF src2))); 3331 3332 format %{ "vmulss $dst, $src1, $src2" %} 3333 ins_cost(150); 3334 ins_encode %{ 3335 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3336 %} 3337 ins_pipe(pipe_slow); 3338 %} 3339 3340 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3341 predicate(UseAVX > 0); 3342 match(Set dst (MulF src con)); 3343 3344 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3345 ins_cost(150); 3346 ins_encode %{ 3347 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3348 %} 3349 ins_pipe(pipe_slow); 3350 %} 3351 3352 instruct mulD_reg(regD dst, regD src) %{ 3353 predicate((UseSSE>=2) && (UseAVX == 0)); 3354 match(Set dst (MulD dst src)); 3355 3356 format %{ "mulsd $dst, $src" %} 3357 ins_cost(150); 3358 ins_encode %{ 3359 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3360 %} 3361 ins_pipe(pipe_slow); 3362 %} 3363 3364 instruct mulD_mem(regD dst, memory src) %{ 3365 predicate((UseSSE>=2) && (UseAVX == 0)); 3366 match(Set dst (MulD dst (LoadD src))); 3367 3368 format %{ "mulsd $dst, $src" %} 3369 ins_cost(150); 3370 ins_encode %{ 3371 __ mulsd($dst$$XMMRegister, $src$$Address); 3372 %} 3373 ins_pipe(pipe_slow); 3374 %} 3375 3376 instruct mulD_imm(regD dst, immD con) %{ 3377 predicate((UseSSE>=2) && (UseAVX == 0)); 3378 match(Set dst (MulD dst con)); 3379 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3380 ins_cost(150); 3381 ins_encode %{ 3382 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3383 %} 3384 ins_pipe(pipe_slow); 3385 %} 3386 3387 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3388 predicate(UseAVX > 0); 3389 match(Set dst (MulD src1 src2)); 3390 3391 format %{ "vmulsd $dst, $src1, $src2" %} 3392 ins_cost(150); 3393 ins_encode %{ 3394 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3395 %} 3396 ins_pipe(pipe_slow); 3397 %} 3398 3399 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3400 predicate(UseAVX > 0); 3401 match(Set dst (MulD src1 (LoadD src2))); 3402 3403 format %{ "vmulsd $dst, $src1, $src2" %} 3404 ins_cost(150); 3405 ins_encode %{ 3406 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3407 %} 3408 ins_pipe(pipe_slow); 3409 %} 3410 3411 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3412 predicate(UseAVX > 0); 3413 match(Set dst (MulD src con)); 3414 3415 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3416 ins_cost(150); 3417 ins_encode %{ 3418 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3419 %} 3420 ins_pipe(pipe_slow); 3421 %} 3422 3423 instruct divF_reg(regF dst, regF src) %{ 3424 predicate((UseSSE>=1) && (UseAVX == 0)); 3425 match(Set dst (DivF dst src)); 3426 3427 format %{ "divss $dst, $src" %} 3428 ins_cost(150); 3429 ins_encode %{ 3430 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3431 %} 3432 ins_pipe(pipe_slow); 3433 %} 3434 3435 instruct divF_mem(regF dst, memory src) %{ 3436 predicate((UseSSE>=1) && (UseAVX == 0)); 3437 match(Set dst (DivF dst (LoadF src))); 3438 3439 format %{ "divss $dst, $src" %} 3440 ins_cost(150); 3441 ins_encode %{ 3442 __ divss($dst$$XMMRegister, $src$$Address); 3443 %} 3444 ins_pipe(pipe_slow); 3445 %} 3446 3447 instruct divF_imm(regF dst, immF con) %{ 3448 predicate((UseSSE>=1) && (UseAVX == 0)); 3449 match(Set dst (DivF dst con)); 3450 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3451 ins_cost(150); 3452 ins_encode %{ 3453 __ divss($dst$$XMMRegister, $constantaddress($con)); 3454 %} 3455 ins_pipe(pipe_slow); 3456 %} 3457 3458 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3459 predicate(UseAVX > 0); 3460 match(Set dst (DivF src1 src2)); 3461 3462 format %{ "vdivss $dst, $src1, $src2" %} 3463 ins_cost(150); 3464 ins_encode %{ 3465 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3466 %} 3467 ins_pipe(pipe_slow); 3468 %} 3469 3470 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3471 predicate(UseAVX > 0); 3472 match(Set dst (DivF src1 (LoadF src2))); 3473 3474 format %{ "vdivss $dst, $src1, $src2" %} 3475 ins_cost(150); 3476 ins_encode %{ 3477 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3478 %} 3479 ins_pipe(pipe_slow); 3480 %} 3481 3482 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3483 predicate(UseAVX > 0); 3484 match(Set dst (DivF src con)); 3485 3486 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3487 ins_cost(150); 3488 ins_encode %{ 3489 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3490 %} 3491 ins_pipe(pipe_slow); 3492 %} 3493 3494 instruct divD_reg(regD dst, regD src) %{ 3495 predicate((UseSSE>=2) && (UseAVX == 0)); 3496 match(Set dst (DivD dst src)); 3497 3498 format %{ "divsd $dst, $src" %} 3499 ins_cost(150); 3500 ins_encode %{ 3501 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3502 %} 3503 ins_pipe(pipe_slow); 3504 %} 3505 3506 instruct divD_mem(regD dst, memory src) %{ 3507 predicate((UseSSE>=2) && (UseAVX == 0)); 3508 match(Set dst (DivD dst (LoadD src))); 3509 3510 format %{ "divsd $dst, $src" %} 3511 ins_cost(150); 3512 ins_encode %{ 3513 __ divsd($dst$$XMMRegister, $src$$Address); 3514 %} 3515 ins_pipe(pipe_slow); 3516 %} 3517 3518 instruct divD_imm(regD dst, immD con) %{ 3519 predicate((UseSSE>=2) && (UseAVX == 0)); 3520 match(Set dst (DivD dst con)); 3521 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3522 ins_cost(150); 3523 ins_encode %{ 3524 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3525 %} 3526 ins_pipe(pipe_slow); 3527 %} 3528 3529 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3530 predicate(UseAVX > 0); 3531 match(Set dst (DivD src1 src2)); 3532 3533 format %{ "vdivsd $dst, $src1, $src2" %} 3534 ins_cost(150); 3535 ins_encode %{ 3536 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3537 %} 3538 ins_pipe(pipe_slow); 3539 %} 3540 3541 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3542 predicate(UseAVX > 0); 3543 match(Set dst (DivD src1 (LoadD src2))); 3544 3545 format %{ "vdivsd $dst, $src1, $src2" %} 3546 ins_cost(150); 3547 ins_encode %{ 3548 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3549 %} 3550 ins_pipe(pipe_slow); 3551 %} 3552 3553 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3554 predicate(UseAVX > 0); 3555 match(Set dst (DivD src con)); 3556 3557 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3558 ins_cost(150); 3559 ins_encode %{ 3560 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3561 %} 3562 ins_pipe(pipe_slow); 3563 %} 3564 3565 instruct absF_reg(regF dst) %{ 3566 predicate((UseSSE>=1) && (UseAVX == 0)); 3567 match(Set dst (AbsF dst)); 3568 ins_cost(150); 3569 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3570 ins_encode %{ 3571 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3572 %} 3573 ins_pipe(pipe_slow); 3574 %} 3575 3576 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3577 predicate(UseAVX > 0); 3578 match(Set dst (AbsF src)); 3579 ins_cost(150); 3580 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3581 ins_encode %{ 3582 int vlen_enc = Assembler::AVX_128bit; 3583 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3584 ExternalAddress(float_signmask()), vlen_enc); 3585 %} 3586 ins_pipe(pipe_slow); 3587 %} 3588 3589 instruct absD_reg(regD dst) %{ 3590 predicate((UseSSE>=2) && (UseAVX == 0)); 3591 match(Set dst (AbsD dst)); 3592 ins_cost(150); 3593 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3594 "# abs double by sign masking" %} 3595 ins_encode %{ 3596 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3597 %} 3598 ins_pipe(pipe_slow); 3599 %} 3600 3601 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3602 predicate(UseAVX > 0); 3603 match(Set dst (AbsD src)); 3604 ins_cost(150); 3605 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3606 "# abs double by sign masking" %} 3607 ins_encode %{ 3608 int vlen_enc = Assembler::AVX_128bit; 3609 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3610 ExternalAddress(double_signmask()), vlen_enc); 3611 %} 3612 ins_pipe(pipe_slow); 3613 %} 3614 3615 instruct negF_reg(regF dst) %{ 3616 predicate((UseSSE>=1) && (UseAVX == 0)); 3617 match(Set dst (NegF dst)); 3618 ins_cost(150); 3619 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3620 ins_encode %{ 3621 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3622 %} 3623 ins_pipe(pipe_slow); 3624 %} 3625 3626 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3627 predicate(UseAVX > 0); 3628 match(Set dst (NegF src)); 3629 ins_cost(150); 3630 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3631 ins_encode %{ 3632 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3633 ExternalAddress(float_signflip())); 3634 %} 3635 ins_pipe(pipe_slow); 3636 %} 3637 3638 instruct negD_reg(regD dst) %{ 3639 predicate((UseSSE>=2) && (UseAVX == 0)); 3640 match(Set dst (NegD dst)); 3641 ins_cost(150); 3642 format %{ "xorpd $dst, [0x8000000000000000]\t" 3643 "# neg double by sign flipping" %} 3644 ins_encode %{ 3645 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3646 %} 3647 ins_pipe(pipe_slow); 3648 %} 3649 3650 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3651 predicate(UseAVX > 0); 3652 match(Set dst (NegD src)); 3653 ins_cost(150); 3654 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3655 "# neg double by sign flipping" %} 3656 ins_encode %{ 3657 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3658 ExternalAddress(double_signflip())); 3659 %} 3660 ins_pipe(pipe_slow); 3661 %} 3662 3663 // sqrtss instruction needs destination register to be pre initialized for best performance 3664 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3665 instruct sqrtF_reg(regF dst) %{ 3666 predicate(UseSSE>=1); 3667 match(Set dst (SqrtF dst)); 3668 format %{ "sqrtss $dst, $dst" %} 3669 ins_encode %{ 3670 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3671 %} 3672 ins_pipe(pipe_slow); 3673 %} 3674 3675 // sqrtsd instruction needs destination register to be pre initialized for best performance 3676 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3677 instruct sqrtD_reg(regD dst) %{ 3678 predicate(UseSSE>=2); 3679 match(Set dst (SqrtD dst)); 3680 format %{ "sqrtsd $dst, $dst" %} 3681 ins_encode %{ 3682 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3683 %} 3684 ins_pipe(pipe_slow); 3685 %} 3686 3687 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3688 effect(TEMP tmp); 3689 match(Set dst (ConvF2HF src)); 3690 ins_cost(125); 3691 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3692 ins_encode %{ 3693 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3694 %} 3695 ins_pipe( pipe_slow ); 3696 %} 3697 3698 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3699 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3700 effect(TEMP ktmp, TEMP rtmp); 3701 match(Set mem (StoreC mem (ConvF2HF src))); 3702 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3703 ins_encode %{ 3704 __ movl($rtmp$$Register, 0x1); 3705 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3706 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3707 %} 3708 ins_pipe( pipe_slow ); 3709 %} 3710 3711 instruct vconvF2HF(vec dst, vec src) %{ 3712 match(Set dst (VectorCastF2HF src)); 3713 format %{ "vector_conv_F2HF $dst $src" %} 3714 ins_encode %{ 3715 int vlen_enc = vector_length_encoding(this, $src); 3716 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3717 %} 3718 ins_pipe( pipe_slow ); 3719 %} 3720 3721 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3722 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3723 format %{ "vcvtps2ph $mem,$src" %} 3724 ins_encode %{ 3725 int vlen_enc = vector_length_encoding(this, $src); 3726 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3727 %} 3728 ins_pipe( pipe_slow ); 3729 %} 3730 3731 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3732 match(Set dst (ConvHF2F src)); 3733 format %{ "vcvtph2ps $dst,$src" %} 3734 ins_encode %{ 3735 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3736 %} 3737 ins_pipe( pipe_slow ); 3738 %} 3739 3740 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3741 match(Set dst (VectorCastHF2F (LoadVector mem))); 3742 format %{ "vcvtph2ps $dst,$mem" %} 3743 ins_encode %{ 3744 int vlen_enc = vector_length_encoding(this); 3745 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3746 %} 3747 ins_pipe( pipe_slow ); 3748 %} 3749 3750 instruct vconvHF2F(vec dst, vec src) %{ 3751 match(Set dst (VectorCastHF2F src)); 3752 ins_cost(125); 3753 format %{ "vector_conv_HF2F $dst,$src" %} 3754 ins_encode %{ 3755 int vlen_enc = vector_length_encoding(this); 3756 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3757 %} 3758 ins_pipe( pipe_slow ); 3759 %} 3760 3761 // ---------------------------------------- VectorReinterpret ------------------------------------ 3762 instruct reinterpret_mask(kReg dst) %{ 3763 predicate(n->bottom_type()->isa_vectmask() && 3764 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3765 match(Set dst (VectorReinterpret dst)); 3766 ins_cost(125); 3767 format %{ "vector_reinterpret $dst\t!" %} 3768 ins_encode %{ 3769 // empty 3770 %} 3771 ins_pipe( pipe_slow ); 3772 %} 3773 3774 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3775 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3776 n->bottom_type()->isa_vectmask() && 3777 n->in(1)->bottom_type()->isa_vectmask() && 3778 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3779 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3780 match(Set dst (VectorReinterpret src)); 3781 effect(TEMP xtmp); 3782 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3783 ins_encode %{ 3784 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3785 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3786 assert(src_sz == dst_sz , "src and dst size mismatch"); 3787 int vlen_enc = vector_length_encoding(src_sz); 3788 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3789 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3790 %} 3791 ins_pipe( pipe_slow ); 3792 %} 3793 3794 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3795 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3796 n->bottom_type()->isa_vectmask() && 3797 n->in(1)->bottom_type()->isa_vectmask() && 3798 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3799 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3800 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3801 match(Set dst (VectorReinterpret src)); 3802 effect(TEMP xtmp); 3803 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3804 ins_encode %{ 3805 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3806 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3807 assert(src_sz == dst_sz , "src and dst size mismatch"); 3808 int vlen_enc = vector_length_encoding(src_sz); 3809 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3810 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3811 %} 3812 ins_pipe( pipe_slow ); 3813 %} 3814 3815 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3816 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3817 n->bottom_type()->isa_vectmask() && 3818 n->in(1)->bottom_type()->isa_vectmask() && 3819 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3820 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3821 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3822 match(Set dst (VectorReinterpret src)); 3823 effect(TEMP xtmp); 3824 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3825 ins_encode %{ 3826 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3827 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3828 assert(src_sz == dst_sz , "src and dst size mismatch"); 3829 int vlen_enc = vector_length_encoding(src_sz); 3830 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3831 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3832 %} 3833 ins_pipe( pipe_slow ); 3834 %} 3835 3836 instruct reinterpret(vec dst) %{ 3837 predicate(!n->bottom_type()->isa_vectmask() && 3838 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3839 match(Set dst (VectorReinterpret dst)); 3840 ins_cost(125); 3841 format %{ "vector_reinterpret $dst\t!" %} 3842 ins_encode %{ 3843 // empty 3844 %} 3845 ins_pipe( pipe_slow ); 3846 %} 3847 3848 instruct reinterpret_expand(vec dst, vec src) %{ 3849 predicate(UseAVX == 0 && 3850 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3851 match(Set dst (VectorReinterpret src)); 3852 ins_cost(125); 3853 effect(TEMP dst); 3854 format %{ "vector_reinterpret_expand $dst,$src" %} 3855 ins_encode %{ 3856 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3857 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3858 3859 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3860 if (src_vlen_in_bytes == 4) { 3861 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3862 } else { 3863 assert(src_vlen_in_bytes == 8, ""); 3864 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3865 } 3866 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3867 %} 3868 ins_pipe( pipe_slow ); 3869 %} 3870 3871 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3872 predicate(UseAVX > 0 && 3873 !n->bottom_type()->isa_vectmask() && 3874 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3875 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3876 match(Set dst (VectorReinterpret src)); 3877 ins_cost(125); 3878 format %{ "vector_reinterpret_expand $dst,$src" %} 3879 ins_encode %{ 3880 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3881 %} 3882 ins_pipe( pipe_slow ); 3883 %} 3884 3885 3886 instruct vreinterpret_expand(legVec dst, vec src) %{ 3887 predicate(UseAVX > 0 && 3888 !n->bottom_type()->isa_vectmask() && 3889 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3890 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3891 match(Set dst (VectorReinterpret src)); 3892 ins_cost(125); 3893 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3894 ins_encode %{ 3895 switch (Matcher::vector_length_in_bytes(this, $src)) { 3896 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3897 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3898 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3899 default: ShouldNotReachHere(); 3900 } 3901 %} 3902 ins_pipe( pipe_slow ); 3903 %} 3904 3905 instruct reinterpret_shrink(vec dst, legVec src) %{ 3906 predicate(!n->bottom_type()->isa_vectmask() && 3907 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3908 match(Set dst (VectorReinterpret src)); 3909 ins_cost(125); 3910 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3911 ins_encode %{ 3912 switch (Matcher::vector_length_in_bytes(this)) { 3913 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3914 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3915 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3916 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3917 default: ShouldNotReachHere(); 3918 } 3919 %} 3920 ins_pipe( pipe_slow ); 3921 %} 3922 3923 // ---------------------------------------------------------------------------------------------------- 3924 3925 #ifdef _LP64 3926 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3927 match(Set dst (RoundDoubleMode src rmode)); 3928 format %{ "roundsd $dst,$src" %} 3929 ins_cost(150); 3930 ins_encode %{ 3931 assert(UseSSE >= 4, "required"); 3932 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) { 3933 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3934 } 3935 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3936 %} 3937 ins_pipe(pipe_slow); 3938 %} 3939 3940 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3941 match(Set dst (RoundDoubleMode con rmode)); 3942 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3943 ins_cost(150); 3944 ins_encode %{ 3945 assert(UseSSE >= 4, "required"); 3946 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3947 %} 3948 ins_pipe(pipe_slow); 3949 %} 3950 3951 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3952 predicate(Matcher::vector_length(n) < 8); 3953 match(Set dst (RoundDoubleModeV src rmode)); 3954 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3955 ins_encode %{ 3956 assert(UseAVX > 0, "required"); 3957 int vlen_enc = vector_length_encoding(this); 3958 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3959 %} 3960 ins_pipe( pipe_slow ); 3961 %} 3962 3963 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3964 predicate(Matcher::vector_length(n) == 8); 3965 match(Set dst (RoundDoubleModeV src rmode)); 3966 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3967 ins_encode %{ 3968 assert(UseAVX > 2, "required"); 3969 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3970 %} 3971 ins_pipe( pipe_slow ); 3972 %} 3973 3974 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3975 predicate(Matcher::vector_length(n) < 8); 3976 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3977 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3978 ins_encode %{ 3979 assert(UseAVX > 0, "required"); 3980 int vlen_enc = vector_length_encoding(this); 3981 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3982 %} 3983 ins_pipe( pipe_slow ); 3984 %} 3985 3986 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3987 predicate(Matcher::vector_length(n) == 8); 3988 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3989 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3990 ins_encode %{ 3991 assert(UseAVX > 2, "required"); 3992 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3993 %} 3994 ins_pipe( pipe_slow ); 3995 %} 3996 #endif // _LP64 3997 3998 instruct onspinwait() %{ 3999 match(OnSpinWait); 4000 ins_cost(200); 4001 4002 format %{ 4003 $$template 4004 $$emit$$"pause\t! membar_onspinwait" 4005 %} 4006 ins_encode %{ 4007 __ pause(); 4008 %} 4009 ins_pipe(pipe_slow); 4010 %} 4011 4012 // a * b + c 4013 instruct fmaD_reg(regD a, regD b, regD c) %{ 4014 match(Set c (FmaD c (Binary a b))); 4015 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 4016 ins_cost(150); 4017 ins_encode %{ 4018 assert(UseFMA, "Needs FMA instructions support."); 4019 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4020 %} 4021 ins_pipe( pipe_slow ); 4022 %} 4023 4024 // a * b + c 4025 instruct fmaF_reg(regF a, regF b, regF c) %{ 4026 match(Set c (FmaF c (Binary a b))); 4027 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 4028 ins_cost(150); 4029 ins_encode %{ 4030 assert(UseFMA, "Needs FMA instructions support."); 4031 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4032 %} 4033 ins_pipe( pipe_slow ); 4034 %} 4035 4036 // ====================VECTOR INSTRUCTIONS===================================== 4037 4038 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4039 instruct MoveVec2Leg(legVec dst, vec src) %{ 4040 match(Set dst src); 4041 format %{ "" %} 4042 ins_encode %{ 4043 ShouldNotReachHere(); 4044 %} 4045 ins_pipe( fpu_reg_reg ); 4046 %} 4047 4048 instruct MoveLeg2Vec(vec dst, legVec src) %{ 4049 match(Set dst src); 4050 format %{ "" %} 4051 ins_encode %{ 4052 ShouldNotReachHere(); 4053 %} 4054 ins_pipe( fpu_reg_reg ); 4055 %} 4056 4057 // ============================================================================ 4058 4059 // Load vectors generic operand pattern 4060 instruct loadV(vec dst, memory mem) %{ 4061 match(Set dst (LoadVector mem)); 4062 ins_cost(125); 4063 format %{ "load_vector $dst,$mem" %} 4064 ins_encode %{ 4065 __ load_vector($dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 4066 %} 4067 ins_pipe( pipe_slow ); 4068 %} 4069 4070 // Store vectors generic operand pattern. 4071 instruct storeV(memory mem, vec src) %{ 4072 match(Set mem (StoreVector mem src)); 4073 ins_cost(145); 4074 format %{ "store_vector $mem,$src\n\t" %} 4075 ins_encode %{ 4076 switch (Matcher::vector_length_in_bytes(this, $src)) { 4077 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 4078 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 4079 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 4080 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 4081 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 4082 default: ShouldNotReachHere(); 4083 } 4084 %} 4085 ins_pipe( pipe_slow ); 4086 %} 4087 4088 // ---------------------------------------- Gather ------------------------------------ 4089 4090 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE 4091 4092 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 4093 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) && 4094 Matcher::vector_length_in_bytes(n) <= 32); 4095 match(Set dst (LoadVectorGather mem idx)); 4096 effect(TEMP dst, TEMP tmp, TEMP mask); 4097 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 4098 ins_encode %{ 4099 int vlen_enc = vector_length_encoding(this); 4100 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4101 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4102 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4103 __ lea($tmp$$Register, $mem$$Address); 4104 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4105 %} 4106 ins_pipe( pipe_slow ); 4107 %} 4108 4109 4110 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 4111 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4112 !is_subword_type(Matcher::vector_element_basic_type(n))); 4113 match(Set dst (LoadVectorGather mem idx)); 4114 effect(TEMP dst, TEMP tmp, TEMP ktmp); 4115 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 4116 ins_encode %{ 4117 int vlen_enc = vector_length_encoding(this); 4118 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4119 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister); 4120 __ lea($tmp$$Register, $mem$$Address); 4121 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4122 %} 4123 ins_pipe( pipe_slow ); 4124 %} 4125 4126 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4127 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4128 !is_subword_type(Matcher::vector_element_basic_type(n))); 4129 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4130 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4131 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4132 ins_encode %{ 4133 assert(UseAVX > 2, "sanity"); 4134 int vlen_enc = vector_length_encoding(this); 4135 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4136 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4137 // Note: Since gather instruction partially updates the opmask register used 4138 // for predication hense moving mask operand to a temporary. 4139 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4140 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4141 __ lea($tmp$$Register, $mem$$Address); 4142 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4143 %} 4144 ins_pipe( pipe_slow ); 4145 %} 4146 4147 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegI rtmp) %{ 4148 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4149 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4150 effect(TEMP tmp, TEMP rtmp); 4151 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %} 4152 ins_encode %{ 4153 int vlen_enc = vector_length_encoding(this); 4154 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4155 __ lea($tmp$$Register, $mem$$Address); 4156 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp$$Register, vlen_enc); 4157 %} 4158 ins_pipe( pipe_slow ); 4159 %} 4160 4161 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegP idx_base_temp, 4162 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4163 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4164 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4165 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4166 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4167 ins_encode %{ 4168 int vlen_enc = vector_length_encoding(this); 4169 int vector_len = Matcher::vector_length(this); 4170 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4171 __ lea($tmp$$Register, $mem$$Address); 4172 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4173 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, noreg, $xtmp1$$XMMRegister, 4174 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4175 %} 4176 ins_pipe( pipe_slow ); 4177 %} 4178 4179 instruct vgather_subwordLE8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegI rtmp, rFlagsReg cr) %{ 4180 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4181 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4182 effect(TEMP tmp, TEMP rtmp, KILL cr); 4183 format %{ "vector_gatherLE8_off $dst, $mem, $idx_base, $offset\t! using $tmp and $rtmp as TEMP" %} 4184 ins_encode %{ 4185 int vlen_enc = vector_length_encoding(this); 4186 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4187 __ lea($tmp$$Register, $mem$$Address); 4188 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, $rtmp$$Register, vlen_enc); 4189 %} 4190 ins_pipe( pipe_slow ); 4191 %} 4192 4193 4194 instruct vgather_subwordGT8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegP idx_base_temp, 4195 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4196 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4197 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4198 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4199 format %{ "vector_gatherGT8_off $dst, $mem, $idx_base, $offset\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4200 ins_encode %{ 4201 int vlen_enc = vector_length_encoding(this); 4202 int vector_len = Matcher::vector_length(this); 4203 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4204 __ lea($tmp$$Register, $mem$$Address); 4205 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4206 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, noreg, $xtmp1$$XMMRegister, 4207 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4208 %} 4209 ins_pipe( pipe_slow ); 4210 %} 4211 4212 4213 #ifdef _LP64 4214 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4215 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4216 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4217 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4218 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4219 ins_encode %{ 4220 int vlen_enc = vector_length_encoding(this); 4221 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4222 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4223 __ lea($tmp$$Register, $mem$$Address); 4224 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4225 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4226 %} 4227 ins_pipe( pipe_slow ); 4228 %} 4229 4230 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4231 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4232 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4233 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4234 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4235 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4236 ins_encode %{ 4237 int vlen_enc = vector_length_encoding(this); 4238 int vector_len = Matcher::vector_length(this); 4239 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4240 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4241 __ lea($tmp$$Register, $mem$$Address); 4242 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4243 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4244 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4245 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4246 %} 4247 ins_pipe( pipe_slow ); 4248 %} 4249 4250 instruct vgather_masked_subwordLE8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4251 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4252 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4253 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4254 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4255 ins_encode %{ 4256 int vlen_enc = vector_length_encoding(this); 4257 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4258 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4259 __ lea($tmp$$Register, $mem$$Address); 4260 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4261 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4262 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4263 %} 4264 ins_pipe( pipe_slow ); 4265 %} 4266 4267 instruct vgather_masked_subwordGT8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4268 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4269 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4270 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4271 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4272 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4273 ins_encode %{ 4274 int vlen_enc = vector_length_encoding(this); 4275 int vector_len = Matcher::vector_length(this); 4276 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4277 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4278 __ lea($tmp$$Register, $mem$$Address); 4279 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4280 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4281 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4282 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4283 %} 4284 ins_pipe( pipe_slow ); 4285 %} 4286 4287 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4288 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4289 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4290 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4291 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4292 ins_encode %{ 4293 int vlen_enc = vector_length_encoding(this); 4294 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4295 __ lea($tmp$$Register, $mem$$Address); 4296 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4297 if (elem_bt == T_SHORT) { 4298 __ movl($mask_idx$$Register, 0x55555555); 4299 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4300 } 4301 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4302 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4303 %} 4304 ins_pipe( pipe_slow ); 4305 %} 4306 4307 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4308 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4309 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4310 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4311 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4312 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4313 ins_encode %{ 4314 int vlen_enc = vector_length_encoding(this); 4315 int vector_len = Matcher::vector_length(this); 4316 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4317 __ lea($tmp$$Register, $mem$$Address); 4318 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4319 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4320 if (elem_bt == T_SHORT) { 4321 __ movl($mask_idx$$Register, 0x55555555); 4322 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4323 } 4324 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4325 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4326 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4327 %} 4328 ins_pipe( pipe_slow ); 4329 %} 4330 4331 instruct vgather_masked_subwordLE8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4332 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4333 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4334 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4335 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4336 ins_encode %{ 4337 int vlen_enc = vector_length_encoding(this); 4338 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4339 __ lea($tmp$$Register, $mem$$Address); 4340 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4341 if (elem_bt == T_SHORT) { 4342 __ movl($mask_idx$$Register, 0x55555555); 4343 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4344 } 4345 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4346 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4347 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4348 %} 4349 ins_pipe( pipe_slow ); 4350 %} 4351 4352 instruct vgather_masked_subwordGT8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4353 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4354 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4355 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4356 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4357 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4358 ins_encode %{ 4359 int vlen_enc = vector_length_encoding(this); 4360 int vector_len = Matcher::vector_length(this); 4361 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4362 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4363 __ lea($tmp$$Register, $mem$$Address); 4364 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4365 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4366 if (elem_bt == T_SHORT) { 4367 __ movl($mask_idx$$Register, 0x55555555); 4368 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4369 } 4370 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4371 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4372 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4373 %} 4374 ins_pipe( pipe_slow ); 4375 %} 4376 #endif 4377 4378 // ====================Scatter======================================= 4379 4380 // Scatter INT, LONG, FLOAT, DOUBLE 4381 4382 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4383 predicate(UseAVX > 2); 4384 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4385 effect(TEMP tmp, TEMP ktmp); 4386 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4387 ins_encode %{ 4388 int vlen_enc = vector_length_encoding(this, $src); 4389 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4390 4391 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4392 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4393 4394 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4395 __ lea($tmp$$Register, $mem$$Address); 4396 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4397 %} 4398 ins_pipe( pipe_slow ); 4399 %} 4400 4401 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4402 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4403 effect(TEMP tmp, TEMP ktmp); 4404 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4405 ins_encode %{ 4406 int vlen_enc = vector_length_encoding(this, $src); 4407 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4408 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4409 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4410 // Note: Since scatter instruction partially updates the opmask register used 4411 // for predication hense moving mask operand to a temporary. 4412 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4413 __ lea($tmp$$Register, $mem$$Address); 4414 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4415 %} 4416 ins_pipe( pipe_slow ); 4417 %} 4418 4419 // ====================REPLICATE======================================= 4420 4421 // Replicate byte scalar to be vector 4422 instruct vReplB_reg(vec dst, rRegI src) %{ 4423 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4424 match(Set dst (Replicate src)); 4425 format %{ "replicateB $dst,$src" %} 4426 ins_encode %{ 4427 uint vlen = Matcher::vector_length(this); 4428 if (UseAVX >= 2) { 4429 int vlen_enc = vector_length_encoding(this); 4430 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4431 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4432 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4433 } else { 4434 __ movdl($dst$$XMMRegister, $src$$Register); 4435 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4436 } 4437 } else { 4438 assert(UseAVX < 2, ""); 4439 __ movdl($dst$$XMMRegister, $src$$Register); 4440 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4441 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4442 if (vlen >= 16) { 4443 assert(vlen == 16, ""); 4444 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4445 } 4446 } 4447 %} 4448 ins_pipe( pipe_slow ); 4449 %} 4450 4451 instruct ReplB_mem(vec dst, memory mem) %{ 4452 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4453 match(Set dst (Replicate (LoadB mem))); 4454 format %{ "replicateB $dst,$mem" %} 4455 ins_encode %{ 4456 int vlen_enc = vector_length_encoding(this); 4457 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4458 %} 4459 ins_pipe( pipe_slow ); 4460 %} 4461 4462 // ====================ReplicateS======================================= 4463 4464 instruct vReplS_reg(vec dst, rRegI src) %{ 4465 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4466 match(Set dst (Replicate src)); 4467 format %{ "replicateS $dst,$src" %} 4468 ins_encode %{ 4469 uint vlen = Matcher::vector_length(this); 4470 int vlen_enc = vector_length_encoding(this); 4471 if (UseAVX >= 2) { 4472 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4473 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4474 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4475 } else { 4476 __ movdl($dst$$XMMRegister, $src$$Register); 4477 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4478 } 4479 } else { 4480 assert(UseAVX < 2, ""); 4481 __ movdl($dst$$XMMRegister, $src$$Register); 4482 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4483 if (vlen >= 8) { 4484 assert(vlen == 8, ""); 4485 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4486 } 4487 } 4488 %} 4489 ins_pipe( pipe_slow ); 4490 %} 4491 4492 instruct ReplS_mem(vec dst, memory mem) %{ 4493 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4494 match(Set dst (Replicate (LoadS mem))); 4495 format %{ "replicateS $dst,$mem" %} 4496 ins_encode %{ 4497 int vlen_enc = vector_length_encoding(this); 4498 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4499 %} 4500 ins_pipe( pipe_slow ); 4501 %} 4502 4503 // ====================ReplicateI======================================= 4504 4505 instruct ReplI_reg(vec dst, rRegI src) %{ 4506 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4507 match(Set dst (Replicate src)); 4508 format %{ "replicateI $dst,$src" %} 4509 ins_encode %{ 4510 uint vlen = Matcher::vector_length(this); 4511 int vlen_enc = vector_length_encoding(this); 4512 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4513 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4514 } else if (VM_Version::supports_avx2()) { 4515 __ movdl($dst$$XMMRegister, $src$$Register); 4516 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4517 } else { 4518 __ movdl($dst$$XMMRegister, $src$$Register); 4519 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4520 } 4521 %} 4522 ins_pipe( pipe_slow ); 4523 %} 4524 4525 instruct ReplI_mem(vec dst, memory mem) %{ 4526 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4527 match(Set dst (Replicate (LoadI mem))); 4528 format %{ "replicateI $dst,$mem" %} 4529 ins_encode %{ 4530 int vlen_enc = vector_length_encoding(this); 4531 if (VM_Version::supports_avx2()) { 4532 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4533 } else if (VM_Version::supports_avx()) { 4534 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4535 } else { 4536 __ movdl($dst$$XMMRegister, $mem$$Address); 4537 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4538 } 4539 %} 4540 ins_pipe( pipe_slow ); 4541 %} 4542 4543 instruct ReplI_imm(vec dst, immI con) %{ 4544 predicate(Matcher::is_non_long_integral_vector(n)); 4545 match(Set dst (Replicate con)); 4546 format %{ "replicateI $dst,$con" %} 4547 ins_encode %{ 4548 InternalAddress addr = $constantaddress(Matcher::vector_element_basic_type(this), 4549 vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4550 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 8) / 4551 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4552 BasicType bt = Matcher::vector_element_basic_type(this); 4553 int vlen = Matcher::vector_length_in_bytes(this); 4554 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4555 %} 4556 ins_pipe( pipe_slow ); 4557 %} 4558 4559 // Replicate scalar zero to be vector 4560 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4561 predicate(Matcher::is_non_long_integral_vector(n)); 4562 match(Set dst (Replicate zero)); 4563 format %{ "replicateI $dst,$zero" %} 4564 ins_encode %{ 4565 int vlen_enc = vector_length_encoding(this); 4566 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4567 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4568 } else { 4569 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4570 } 4571 %} 4572 ins_pipe( fpu_reg_reg ); 4573 %} 4574 4575 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4576 predicate(UseSSE >= 2 && Matcher::is_non_long_integral_vector(n)); 4577 match(Set dst (Replicate con)); 4578 format %{ "vallones $dst" %} 4579 ins_encode %{ 4580 int vector_len = vector_length_encoding(this); 4581 __ vallones($dst$$XMMRegister, vector_len); 4582 %} 4583 ins_pipe( pipe_slow ); 4584 %} 4585 4586 // ====================ReplicateL======================================= 4587 4588 #ifdef _LP64 4589 // Replicate long (8 byte) scalar to be vector 4590 instruct ReplL_reg(vec dst, rRegL src) %{ 4591 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4592 match(Set dst (Replicate src)); 4593 format %{ "replicateL $dst,$src" %} 4594 ins_encode %{ 4595 int vlen = Matcher::vector_length(this); 4596 int vlen_enc = vector_length_encoding(this); 4597 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4598 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4599 } else if (VM_Version::supports_avx2()) { 4600 __ movdq($dst$$XMMRegister, $src$$Register); 4601 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4602 } else { 4603 __ movdq($dst$$XMMRegister, $src$$Register); 4604 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4605 } 4606 %} 4607 ins_pipe( pipe_slow ); 4608 %} 4609 #else // _LP64 4610 // Replicate long (8 byte) scalar to be vector 4611 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 4612 predicate(Matcher::vector_length(n) <= 4 && Matcher::vector_element_basic_type(n) == T_LONG); 4613 match(Set dst (Replicate src)); 4614 effect(TEMP dst, USE src, TEMP tmp); 4615 format %{ "replicateL $dst,$src" %} 4616 ins_encode %{ 4617 uint vlen = Matcher::vector_length(this); 4618 if (vlen == 2) { 4619 __ movdl($dst$$XMMRegister, $src$$Register); 4620 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4621 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4622 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4623 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4624 int vlen_enc = Assembler::AVX_256bit; 4625 __ movdl($dst$$XMMRegister, $src$$Register); 4626 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4627 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4628 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4629 } else { 4630 __ movdl($dst$$XMMRegister, $src$$Register); 4631 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4632 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4633 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4634 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4635 } 4636 %} 4637 ins_pipe( pipe_slow ); 4638 %} 4639 4640 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 4641 predicate(Matcher::vector_length(n) == 8 && Matcher::vector_element_basic_type(n) == T_LONG); 4642 match(Set dst (Replicate src)); 4643 effect(TEMP dst, USE src, TEMP tmp); 4644 format %{ "replicateL $dst,$src" %} 4645 ins_encode %{ 4646 if (VM_Version::supports_avx512vl()) { 4647 __ movdl($dst$$XMMRegister, $src$$Register); 4648 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4649 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4650 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4651 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4652 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4653 } else { 4654 int vlen_enc = Assembler::AVX_512bit; 4655 __ movdl($dst$$XMMRegister, $src$$Register); 4656 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4657 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4658 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4659 } 4660 %} 4661 ins_pipe( pipe_slow ); 4662 %} 4663 #endif // _LP64 4664 4665 instruct ReplL_mem(vec dst, memory mem) %{ 4666 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4667 match(Set dst (Replicate (LoadL mem))); 4668 format %{ "replicateL $dst,$mem" %} 4669 ins_encode %{ 4670 int vlen_enc = vector_length_encoding(this); 4671 if (VM_Version::supports_avx2()) { 4672 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4673 } else if (VM_Version::supports_sse3()) { 4674 __ movddup($dst$$XMMRegister, $mem$$Address); 4675 } else { 4676 __ movq($dst$$XMMRegister, $mem$$Address); 4677 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4678 } 4679 %} 4680 ins_pipe( pipe_slow ); 4681 %} 4682 4683 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4684 instruct ReplL_imm(vec dst, immL con) %{ 4685 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4686 match(Set dst (Replicate con)); 4687 format %{ "replicateL $dst,$con" %} 4688 ins_encode %{ 4689 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, 1)); 4690 int vlen = Matcher::vector_length_in_bytes(this); 4691 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4692 %} 4693 ins_pipe( pipe_slow ); 4694 %} 4695 4696 instruct ReplL_zero(vec dst, immL0 zero) %{ 4697 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4698 match(Set dst (Replicate zero)); 4699 format %{ "replicateL $dst,$zero" %} 4700 ins_encode %{ 4701 int vlen_enc = vector_length_encoding(this); 4702 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4703 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4704 } else { 4705 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4706 } 4707 %} 4708 ins_pipe( fpu_reg_reg ); 4709 %} 4710 4711 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4712 predicate(UseSSE >= 2 && Matcher::vector_element_basic_type(n) == T_LONG); 4713 match(Set dst (Replicate con)); 4714 format %{ "vallones $dst" %} 4715 ins_encode %{ 4716 int vector_len = vector_length_encoding(this); 4717 __ vallones($dst$$XMMRegister, vector_len); 4718 %} 4719 ins_pipe( pipe_slow ); 4720 %} 4721 4722 // ====================ReplicateF======================================= 4723 4724 instruct vReplF_reg(vec dst, vlRegF src) %{ 4725 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4726 match(Set dst (Replicate src)); 4727 format %{ "replicateF $dst,$src" %} 4728 ins_encode %{ 4729 uint vlen = Matcher::vector_length(this); 4730 int vlen_enc = vector_length_encoding(this); 4731 if (vlen <= 4) { 4732 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4733 } else if (VM_Version::supports_avx2()) { 4734 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4735 } else { 4736 assert(vlen == 8, "sanity"); 4737 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4738 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4739 } 4740 %} 4741 ins_pipe( pipe_slow ); 4742 %} 4743 4744 instruct ReplF_reg(vec dst, vlRegF src) %{ 4745 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4746 match(Set dst (Replicate src)); 4747 format %{ "replicateF $dst,$src" %} 4748 ins_encode %{ 4749 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4750 %} 4751 ins_pipe( pipe_slow ); 4752 %} 4753 4754 instruct ReplF_mem(vec dst, memory mem) %{ 4755 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4756 match(Set dst (Replicate (LoadF mem))); 4757 format %{ "replicateF $dst,$mem" %} 4758 ins_encode %{ 4759 int vlen_enc = vector_length_encoding(this); 4760 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4761 %} 4762 ins_pipe( pipe_slow ); 4763 %} 4764 4765 // Replicate float scalar immediate to be vector by loading from const table. 4766 instruct ReplF_imm(vec dst, immF con) %{ 4767 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4768 match(Set dst (Replicate con)); 4769 format %{ "replicateF $dst,$con" %} 4770 ins_encode %{ 4771 InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant, 4772 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 2)); 4773 int vlen = Matcher::vector_length_in_bytes(this); 4774 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4775 %} 4776 ins_pipe( pipe_slow ); 4777 %} 4778 4779 instruct ReplF_zero(vec dst, immF0 zero) %{ 4780 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4781 match(Set dst (Replicate zero)); 4782 format %{ "replicateF $dst,$zero" %} 4783 ins_encode %{ 4784 int vlen_enc = vector_length_encoding(this); 4785 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4786 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4787 } else { 4788 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4789 } 4790 %} 4791 ins_pipe( fpu_reg_reg ); 4792 %} 4793 4794 // ====================ReplicateD======================================= 4795 4796 // Replicate double (8 bytes) scalar to be vector 4797 instruct vReplD_reg(vec dst, vlRegD src) %{ 4798 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4799 match(Set dst (Replicate src)); 4800 format %{ "replicateD $dst,$src" %} 4801 ins_encode %{ 4802 uint vlen = Matcher::vector_length(this); 4803 int vlen_enc = vector_length_encoding(this); 4804 if (vlen <= 2) { 4805 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4806 } else if (VM_Version::supports_avx2()) { 4807 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4808 } else { 4809 assert(vlen == 4, "sanity"); 4810 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4811 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4812 } 4813 %} 4814 ins_pipe( pipe_slow ); 4815 %} 4816 4817 instruct ReplD_reg(vec dst, vlRegD src) %{ 4818 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4819 match(Set dst (Replicate src)); 4820 format %{ "replicateD $dst,$src" %} 4821 ins_encode %{ 4822 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4823 %} 4824 ins_pipe( pipe_slow ); 4825 %} 4826 4827 instruct ReplD_mem(vec dst, memory mem) %{ 4828 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4829 match(Set dst (Replicate (LoadD mem))); 4830 format %{ "replicateD $dst,$mem" %} 4831 ins_encode %{ 4832 if (Matcher::vector_length(this) >= 4) { 4833 int vlen_enc = vector_length_encoding(this); 4834 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4835 } else { 4836 __ movddup($dst$$XMMRegister, $mem$$Address); 4837 } 4838 %} 4839 ins_pipe( pipe_slow ); 4840 %} 4841 4842 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4843 instruct ReplD_imm(vec dst, immD con) %{ 4844 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4845 match(Set dst (Replicate con)); 4846 format %{ "replicateD $dst,$con" %} 4847 ins_encode %{ 4848 InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, 1)); 4849 int vlen = Matcher::vector_length_in_bytes(this); 4850 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4851 %} 4852 ins_pipe( pipe_slow ); 4853 %} 4854 4855 instruct ReplD_zero(vec dst, immD0 zero) %{ 4856 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4857 match(Set dst (Replicate zero)); 4858 format %{ "replicateD $dst,$zero" %} 4859 ins_encode %{ 4860 int vlen_enc = vector_length_encoding(this); 4861 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4862 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4863 } else { 4864 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4865 } 4866 %} 4867 ins_pipe( fpu_reg_reg ); 4868 %} 4869 4870 // ====================VECTOR INSERT======================================= 4871 4872 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4873 predicate(Matcher::vector_length_in_bytes(n) < 32); 4874 match(Set dst (VectorInsert (Binary dst val) idx)); 4875 format %{ "vector_insert $dst,$val,$idx" %} 4876 ins_encode %{ 4877 assert(UseSSE >= 4, "required"); 4878 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4879 4880 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4881 4882 assert(is_integral_type(elem_bt), ""); 4883 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4884 4885 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4886 %} 4887 ins_pipe( pipe_slow ); 4888 %} 4889 4890 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4891 predicate(Matcher::vector_length_in_bytes(n) == 32); 4892 match(Set dst (VectorInsert (Binary src val) idx)); 4893 effect(TEMP vtmp); 4894 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4895 ins_encode %{ 4896 int vlen_enc = Assembler::AVX_256bit; 4897 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4898 int elem_per_lane = 16/type2aelembytes(elem_bt); 4899 int log2epr = log2(elem_per_lane); 4900 4901 assert(is_integral_type(elem_bt), "sanity"); 4902 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4903 4904 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4905 uint y_idx = ($idx$$constant >> log2epr) & 1; 4906 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4907 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4908 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4909 %} 4910 ins_pipe( pipe_slow ); 4911 %} 4912 4913 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4914 predicate(Matcher::vector_length_in_bytes(n) == 64); 4915 match(Set dst (VectorInsert (Binary src val) idx)); 4916 effect(TEMP vtmp); 4917 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4918 ins_encode %{ 4919 assert(UseAVX > 2, "sanity"); 4920 4921 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4922 int elem_per_lane = 16/type2aelembytes(elem_bt); 4923 int log2epr = log2(elem_per_lane); 4924 4925 assert(is_integral_type(elem_bt), ""); 4926 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4927 4928 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4929 uint y_idx = ($idx$$constant >> log2epr) & 3; 4930 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4931 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4932 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4933 %} 4934 ins_pipe( pipe_slow ); 4935 %} 4936 4937 #ifdef _LP64 4938 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4939 predicate(Matcher::vector_length(n) == 2); 4940 match(Set dst (VectorInsert (Binary dst val) idx)); 4941 format %{ "vector_insert $dst,$val,$idx" %} 4942 ins_encode %{ 4943 assert(UseSSE >= 4, "required"); 4944 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4945 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4946 4947 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4948 %} 4949 ins_pipe( pipe_slow ); 4950 %} 4951 4952 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4953 predicate(Matcher::vector_length(n) == 4); 4954 match(Set dst (VectorInsert (Binary src val) idx)); 4955 effect(TEMP vtmp); 4956 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4957 ins_encode %{ 4958 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4959 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4960 4961 uint x_idx = $idx$$constant & right_n_bits(1); 4962 uint y_idx = ($idx$$constant >> 1) & 1; 4963 int vlen_enc = Assembler::AVX_256bit; 4964 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4965 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4966 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4967 %} 4968 ins_pipe( pipe_slow ); 4969 %} 4970 4971 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4972 predicate(Matcher::vector_length(n) == 8); 4973 match(Set dst (VectorInsert (Binary src val) idx)); 4974 effect(TEMP vtmp); 4975 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4976 ins_encode %{ 4977 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4978 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4979 4980 uint x_idx = $idx$$constant & right_n_bits(1); 4981 uint y_idx = ($idx$$constant >> 1) & 3; 4982 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4983 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4984 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4985 %} 4986 ins_pipe( pipe_slow ); 4987 %} 4988 #endif 4989 4990 instruct insertF(vec dst, regF val, immU8 idx) %{ 4991 predicate(Matcher::vector_length(n) < 8); 4992 match(Set dst (VectorInsert (Binary dst val) idx)); 4993 format %{ "vector_insert $dst,$val,$idx" %} 4994 ins_encode %{ 4995 assert(UseSSE >= 4, "sanity"); 4996 4997 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4998 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4999 5000 uint x_idx = $idx$$constant & right_n_bits(2); 5001 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5002 %} 5003 ins_pipe( pipe_slow ); 5004 %} 5005 5006 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 5007 predicate(Matcher::vector_length(n) >= 8); 5008 match(Set dst (VectorInsert (Binary src val) idx)); 5009 effect(TEMP vtmp); 5010 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5011 ins_encode %{ 5012 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 5013 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5014 5015 int vlen = Matcher::vector_length(this); 5016 uint x_idx = $idx$$constant & right_n_bits(2); 5017 if (vlen == 8) { 5018 uint y_idx = ($idx$$constant >> 2) & 1; 5019 int vlen_enc = Assembler::AVX_256bit; 5020 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5021 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5022 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5023 } else { 5024 assert(vlen == 16, "sanity"); 5025 uint y_idx = ($idx$$constant >> 2) & 3; 5026 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5027 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5028 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5029 } 5030 %} 5031 ins_pipe( pipe_slow ); 5032 %} 5033 5034 #ifdef _LP64 5035 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 5036 predicate(Matcher::vector_length(n) == 2); 5037 match(Set dst (VectorInsert (Binary dst val) idx)); 5038 effect(TEMP tmp); 5039 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 5040 ins_encode %{ 5041 assert(UseSSE >= 4, "sanity"); 5042 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5043 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5044 5045 __ movq($tmp$$Register, $val$$XMMRegister); 5046 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 5047 %} 5048 ins_pipe( pipe_slow ); 5049 %} 5050 5051 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 5052 predicate(Matcher::vector_length(n) == 4); 5053 match(Set dst (VectorInsert (Binary src val) idx)); 5054 effect(TEMP vtmp, TEMP tmp); 5055 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 5056 ins_encode %{ 5057 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5058 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5059 5060 uint x_idx = $idx$$constant & right_n_bits(1); 5061 uint y_idx = ($idx$$constant >> 1) & 1; 5062 int vlen_enc = Assembler::AVX_256bit; 5063 __ movq($tmp$$Register, $val$$XMMRegister); 5064 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5065 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 5066 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5067 %} 5068 ins_pipe( pipe_slow ); 5069 %} 5070 5071 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 5072 predicate(Matcher::vector_length(n) == 8); 5073 match(Set dst (VectorInsert (Binary src val) idx)); 5074 effect(TEMP tmp, TEMP vtmp); 5075 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5076 ins_encode %{ 5077 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5078 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5079 5080 uint x_idx = $idx$$constant & right_n_bits(1); 5081 uint y_idx = ($idx$$constant >> 1) & 3; 5082 __ movq($tmp$$Register, $val$$XMMRegister); 5083 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5084 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 5085 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5086 %} 5087 ins_pipe( pipe_slow ); 5088 %} 5089 #endif 5090 5091 // ====================REDUCTION ARITHMETIC======================================= 5092 5093 // =======================Int Reduction========================================== 5094 5095 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5096 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 5097 match(Set dst (AddReductionVI src1 src2)); 5098 match(Set dst (MulReductionVI src1 src2)); 5099 match(Set dst (AndReductionV src1 src2)); 5100 match(Set dst ( OrReductionV src1 src2)); 5101 match(Set dst (XorReductionV src1 src2)); 5102 match(Set dst (MinReductionV src1 src2)); 5103 match(Set dst (MaxReductionV src1 src2)); 5104 effect(TEMP vtmp1, TEMP vtmp2); 5105 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5106 ins_encode %{ 5107 int opcode = this->ideal_Opcode(); 5108 int vlen = Matcher::vector_length(this, $src2); 5109 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5110 %} 5111 ins_pipe( pipe_slow ); 5112 %} 5113 5114 // =======================Long Reduction========================================== 5115 5116 #ifdef _LP64 5117 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5118 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 5119 match(Set dst (AddReductionVL src1 src2)); 5120 match(Set dst (MulReductionVL src1 src2)); 5121 match(Set dst (AndReductionV src1 src2)); 5122 match(Set dst ( OrReductionV src1 src2)); 5123 match(Set dst (XorReductionV src1 src2)); 5124 match(Set dst (MinReductionV src1 src2)); 5125 match(Set dst (MaxReductionV src1 src2)); 5126 effect(TEMP vtmp1, TEMP vtmp2); 5127 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5128 ins_encode %{ 5129 int opcode = this->ideal_Opcode(); 5130 int vlen = Matcher::vector_length(this, $src2); 5131 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5132 %} 5133 ins_pipe( pipe_slow ); 5134 %} 5135 5136 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 5137 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 5138 match(Set dst (AddReductionVL src1 src2)); 5139 match(Set dst (MulReductionVL src1 src2)); 5140 match(Set dst (AndReductionV src1 src2)); 5141 match(Set dst ( OrReductionV src1 src2)); 5142 match(Set dst (XorReductionV src1 src2)); 5143 match(Set dst (MinReductionV src1 src2)); 5144 match(Set dst (MaxReductionV src1 src2)); 5145 effect(TEMP vtmp1, TEMP vtmp2); 5146 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5147 ins_encode %{ 5148 int opcode = this->ideal_Opcode(); 5149 int vlen = Matcher::vector_length(this, $src2); 5150 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5151 %} 5152 ins_pipe( pipe_slow ); 5153 %} 5154 #endif // _LP64 5155 5156 // =======================Float Reduction========================================== 5157 5158 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 5159 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src 5160 match(Set dst (AddReductionVF dst src)); 5161 match(Set dst (MulReductionVF dst src)); 5162 effect(TEMP dst, TEMP vtmp); 5163 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 5164 ins_encode %{ 5165 int opcode = this->ideal_Opcode(); 5166 int vlen = Matcher::vector_length(this, $src); 5167 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5168 %} 5169 ins_pipe( pipe_slow ); 5170 %} 5171 5172 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 5173 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5174 match(Set dst (AddReductionVF dst src)); 5175 match(Set dst (MulReductionVF dst src)); 5176 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5177 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5178 ins_encode %{ 5179 int opcode = this->ideal_Opcode(); 5180 int vlen = Matcher::vector_length(this, $src); 5181 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5182 %} 5183 ins_pipe( pipe_slow ); 5184 %} 5185 5186 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5187 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src 5188 match(Set dst (AddReductionVF dst src)); 5189 match(Set dst (MulReductionVF dst src)); 5190 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5191 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5192 ins_encode %{ 5193 int opcode = this->ideal_Opcode(); 5194 int vlen = Matcher::vector_length(this, $src); 5195 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5196 %} 5197 ins_pipe( pipe_slow ); 5198 %} 5199 5200 5201 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{ 5202 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5203 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5204 // src1 contains reduction identity 5205 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5206 match(Set dst (AddReductionVF src1 src2)); 5207 match(Set dst (MulReductionVF src1 src2)); 5208 effect(TEMP dst); 5209 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %} 5210 ins_encode %{ 5211 int opcode = this->ideal_Opcode(); 5212 int vlen = Matcher::vector_length(this, $src2); 5213 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5214 %} 5215 ins_pipe( pipe_slow ); 5216 %} 5217 5218 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{ 5219 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5220 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5221 // src1 contains reduction identity 5222 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5223 match(Set dst (AddReductionVF src1 src2)); 5224 match(Set dst (MulReductionVF src1 src2)); 5225 effect(TEMP dst, TEMP vtmp); 5226 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5227 ins_encode %{ 5228 int opcode = this->ideal_Opcode(); 5229 int vlen = Matcher::vector_length(this, $src2); 5230 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5231 %} 5232 ins_pipe( pipe_slow ); 5233 %} 5234 5235 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{ 5236 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5237 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5238 // src1 contains reduction identity 5239 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5240 match(Set dst (AddReductionVF src1 src2)); 5241 match(Set dst (MulReductionVF src1 src2)); 5242 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5243 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5244 ins_encode %{ 5245 int opcode = this->ideal_Opcode(); 5246 int vlen = Matcher::vector_length(this, $src2); 5247 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5248 %} 5249 ins_pipe( pipe_slow ); 5250 %} 5251 5252 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5253 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5254 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5255 // src1 contains reduction identity 5256 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2 5257 match(Set dst (AddReductionVF src1 src2)); 5258 match(Set dst (MulReductionVF src1 src2)); 5259 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5260 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5261 ins_encode %{ 5262 int opcode = this->ideal_Opcode(); 5263 int vlen = Matcher::vector_length(this, $src2); 5264 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5265 %} 5266 ins_pipe( pipe_slow ); 5267 %} 5268 5269 // =======================Double Reduction========================================== 5270 5271 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 5272 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src 5273 match(Set dst (AddReductionVD dst src)); 5274 match(Set dst (MulReductionVD dst src)); 5275 effect(TEMP dst, TEMP vtmp); 5276 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 5277 ins_encode %{ 5278 int opcode = this->ideal_Opcode(); 5279 int vlen = Matcher::vector_length(this, $src); 5280 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5281 %} 5282 ins_pipe( pipe_slow ); 5283 %} 5284 5285 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 5286 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src 5287 match(Set dst (AddReductionVD dst src)); 5288 match(Set dst (MulReductionVD dst src)); 5289 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5290 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5291 ins_encode %{ 5292 int opcode = this->ideal_Opcode(); 5293 int vlen = Matcher::vector_length(this, $src); 5294 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5295 %} 5296 ins_pipe( pipe_slow ); 5297 %} 5298 5299 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5300 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5301 match(Set dst (AddReductionVD dst src)); 5302 match(Set dst (MulReductionVD dst src)); 5303 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5304 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5305 ins_encode %{ 5306 int opcode = this->ideal_Opcode(); 5307 int vlen = Matcher::vector_length(this, $src); 5308 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5309 %} 5310 ins_pipe( pipe_slow ); 5311 %} 5312 5313 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{ 5314 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5315 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5316 // src1 contains reduction identity 5317 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5318 match(Set dst (AddReductionVD src1 src2)); 5319 match(Set dst (MulReductionVD src1 src2)); 5320 effect(TEMP dst); 5321 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %} 5322 ins_encode %{ 5323 int opcode = this->ideal_Opcode(); 5324 int vlen = Matcher::vector_length(this, $src2); 5325 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5326 %} 5327 ins_pipe( pipe_slow ); 5328 %} 5329 5330 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{ 5331 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5332 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5333 // src1 contains reduction identity 5334 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5335 match(Set dst (AddReductionVD src1 src2)); 5336 match(Set dst (MulReductionVD src1 src2)); 5337 effect(TEMP dst, TEMP vtmp); 5338 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5339 ins_encode %{ 5340 int opcode = this->ideal_Opcode(); 5341 int vlen = Matcher::vector_length(this, $src2); 5342 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5343 %} 5344 ins_pipe( pipe_slow ); 5345 %} 5346 5347 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5348 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5349 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5350 // src1 contains reduction identity 5351 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5352 match(Set dst (AddReductionVD src1 src2)); 5353 match(Set dst (MulReductionVD src1 src2)); 5354 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5355 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5356 ins_encode %{ 5357 int opcode = this->ideal_Opcode(); 5358 int vlen = Matcher::vector_length(this, $src2); 5359 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5360 %} 5361 ins_pipe( pipe_slow ); 5362 %} 5363 5364 // =======================Byte Reduction========================================== 5365 5366 #ifdef _LP64 5367 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5368 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5369 match(Set dst (AddReductionVI src1 src2)); 5370 match(Set dst (AndReductionV src1 src2)); 5371 match(Set dst ( OrReductionV src1 src2)); 5372 match(Set dst (XorReductionV src1 src2)); 5373 match(Set dst (MinReductionV src1 src2)); 5374 match(Set dst (MaxReductionV src1 src2)); 5375 effect(TEMP vtmp1, TEMP vtmp2); 5376 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5377 ins_encode %{ 5378 int opcode = this->ideal_Opcode(); 5379 int vlen = Matcher::vector_length(this, $src2); 5380 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5381 %} 5382 ins_pipe( pipe_slow ); 5383 %} 5384 5385 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5386 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5387 match(Set dst (AddReductionVI src1 src2)); 5388 match(Set dst (AndReductionV src1 src2)); 5389 match(Set dst ( OrReductionV src1 src2)); 5390 match(Set dst (XorReductionV src1 src2)); 5391 match(Set dst (MinReductionV src1 src2)); 5392 match(Set dst (MaxReductionV src1 src2)); 5393 effect(TEMP vtmp1, TEMP vtmp2); 5394 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5395 ins_encode %{ 5396 int opcode = this->ideal_Opcode(); 5397 int vlen = Matcher::vector_length(this, $src2); 5398 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5399 %} 5400 ins_pipe( pipe_slow ); 5401 %} 5402 #endif 5403 5404 // =======================Short Reduction========================================== 5405 5406 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5407 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5408 match(Set dst (AddReductionVI src1 src2)); 5409 match(Set dst (MulReductionVI src1 src2)); 5410 match(Set dst (AndReductionV src1 src2)); 5411 match(Set dst ( OrReductionV src1 src2)); 5412 match(Set dst (XorReductionV src1 src2)); 5413 match(Set dst (MinReductionV src1 src2)); 5414 match(Set dst (MaxReductionV src1 src2)); 5415 effect(TEMP vtmp1, TEMP vtmp2); 5416 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5417 ins_encode %{ 5418 int opcode = this->ideal_Opcode(); 5419 int vlen = Matcher::vector_length(this, $src2); 5420 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5421 %} 5422 ins_pipe( pipe_slow ); 5423 %} 5424 5425 // =======================Mul Reduction========================================== 5426 5427 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5428 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5429 Matcher::vector_length(n->in(2)) <= 32); // src2 5430 match(Set dst (MulReductionVI src1 src2)); 5431 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5432 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5433 ins_encode %{ 5434 int opcode = this->ideal_Opcode(); 5435 int vlen = Matcher::vector_length(this, $src2); 5436 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5437 %} 5438 ins_pipe( pipe_slow ); 5439 %} 5440 5441 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5442 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5443 Matcher::vector_length(n->in(2)) == 64); // src2 5444 match(Set dst (MulReductionVI src1 src2)); 5445 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5446 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5447 ins_encode %{ 5448 int opcode = this->ideal_Opcode(); 5449 int vlen = Matcher::vector_length(this, $src2); 5450 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5451 %} 5452 ins_pipe( pipe_slow ); 5453 %} 5454 5455 //--------------------Min/Max Float Reduction -------------------- 5456 // Float Min Reduction 5457 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 5458 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5459 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5460 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5461 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5462 Matcher::vector_length(n->in(2)) == 2); 5463 match(Set dst (MinReductionV src1 src2)); 5464 match(Set dst (MaxReductionV src1 src2)); 5465 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5466 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5467 ins_encode %{ 5468 assert(UseAVX > 0, "sanity"); 5469 5470 int opcode = this->ideal_Opcode(); 5471 int vlen = Matcher::vector_length(this, $src2); 5472 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5473 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5474 %} 5475 ins_pipe( pipe_slow ); 5476 %} 5477 5478 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5479 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5480 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5481 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5482 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5483 Matcher::vector_length(n->in(2)) >= 4); 5484 match(Set dst (MinReductionV src1 src2)); 5485 match(Set dst (MaxReductionV src1 src2)); 5486 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5487 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5488 ins_encode %{ 5489 assert(UseAVX > 0, "sanity"); 5490 5491 int opcode = this->ideal_Opcode(); 5492 int vlen = Matcher::vector_length(this, $src2); 5493 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5494 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5495 %} 5496 ins_pipe( pipe_slow ); 5497 %} 5498 5499 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 5500 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5501 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5502 Matcher::vector_length(n->in(2)) == 2); 5503 match(Set dst (MinReductionV dst src)); 5504 match(Set dst (MaxReductionV dst src)); 5505 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5506 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5507 ins_encode %{ 5508 assert(UseAVX > 0, "sanity"); 5509 5510 int opcode = this->ideal_Opcode(); 5511 int vlen = Matcher::vector_length(this, $src); 5512 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5513 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5514 %} 5515 ins_pipe( pipe_slow ); 5516 %} 5517 5518 5519 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5520 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5521 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5522 Matcher::vector_length(n->in(2)) >= 4); 5523 match(Set dst (MinReductionV dst src)); 5524 match(Set dst (MaxReductionV dst src)); 5525 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5526 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5527 ins_encode %{ 5528 assert(UseAVX > 0, "sanity"); 5529 5530 int opcode = this->ideal_Opcode(); 5531 int vlen = Matcher::vector_length(this, $src); 5532 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5533 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5534 %} 5535 ins_pipe( pipe_slow ); 5536 %} 5537 5538 5539 //--------------------Min Double Reduction -------------------- 5540 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5541 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5542 rFlagsReg cr) %{ 5543 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5544 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5545 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5546 Matcher::vector_length(n->in(2)) == 2); 5547 match(Set dst (MinReductionV src1 src2)); 5548 match(Set dst (MaxReductionV src1 src2)); 5549 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5550 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5551 ins_encode %{ 5552 assert(UseAVX > 0, "sanity"); 5553 5554 int opcode = this->ideal_Opcode(); 5555 int vlen = Matcher::vector_length(this, $src2); 5556 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5557 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5558 %} 5559 ins_pipe( pipe_slow ); 5560 %} 5561 5562 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5563 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5564 rFlagsReg cr) %{ 5565 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5566 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5567 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5568 Matcher::vector_length(n->in(2)) >= 4); 5569 match(Set dst (MinReductionV src1 src2)); 5570 match(Set dst (MaxReductionV src1 src2)); 5571 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5572 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5573 ins_encode %{ 5574 assert(UseAVX > 0, "sanity"); 5575 5576 int opcode = this->ideal_Opcode(); 5577 int vlen = Matcher::vector_length(this, $src2); 5578 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5579 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5580 %} 5581 ins_pipe( pipe_slow ); 5582 %} 5583 5584 5585 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5586 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5587 rFlagsReg cr) %{ 5588 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5589 Matcher::vector_length(n->in(2)) == 2); 5590 match(Set dst (MinReductionV dst src)); 5591 match(Set dst (MaxReductionV dst src)); 5592 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5593 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5594 ins_encode %{ 5595 assert(UseAVX > 0, "sanity"); 5596 5597 int opcode = this->ideal_Opcode(); 5598 int vlen = Matcher::vector_length(this, $src); 5599 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5600 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5601 %} 5602 ins_pipe( pipe_slow ); 5603 %} 5604 5605 instruct minmax_reductionD_av(legRegD dst, legVec src, 5606 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5607 rFlagsReg cr) %{ 5608 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5609 Matcher::vector_length(n->in(2)) >= 4); 5610 match(Set dst (MinReductionV dst src)); 5611 match(Set dst (MaxReductionV dst src)); 5612 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5613 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5614 ins_encode %{ 5615 assert(UseAVX > 0, "sanity"); 5616 5617 int opcode = this->ideal_Opcode(); 5618 int vlen = Matcher::vector_length(this, $src); 5619 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5620 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5621 %} 5622 ins_pipe( pipe_slow ); 5623 %} 5624 5625 // ====================VECTOR ARITHMETIC======================================= 5626 5627 // --------------------------------- ADD -------------------------------------- 5628 5629 // Bytes vector add 5630 instruct vaddB(vec dst, vec src) %{ 5631 predicate(UseAVX == 0); 5632 match(Set dst (AddVB dst src)); 5633 format %{ "paddb $dst,$src\t! add packedB" %} 5634 ins_encode %{ 5635 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5636 %} 5637 ins_pipe( pipe_slow ); 5638 %} 5639 5640 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5641 predicate(UseAVX > 0); 5642 match(Set dst (AddVB src1 src2)); 5643 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5644 ins_encode %{ 5645 int vlen_enc = vector_length_encoding(this); 5646 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5647 %} 5648 ins_pipe( pipe_slow ); 5649 %} 5650 5651 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5652 predicate((UseAVX > 0) && 5653 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5654 match(Set dst (AddVB src (LoadVector mem))); 5655 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5656 ins_encode %{ 5657 int vlen_enc = vector_length_encoding(this); 5658 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5659 %} 5660 ins_pipe( pipe_slow ); 5661 %} 5662 5663 // Shorts/Chars vector add 5664 instruct vaddS(vec dst, vec src) %{ 5665 predicate(UseAVX == 0); 5666 match(Set dst (AddVS dst src)); 5667 format %{ "paddw $dst,$src\t! add packedS" %} 5668 ins_encode %{ 5669 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5670 %} 5671 ins_pipe( pipe_slow ); 5672 %} 5673 5674 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5675 predicate(UseAVX > 0); 5676 match(Set dst (AddVS src1 src2)); 5677 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5678 ins_encode %{ 5679 int vlen_enc = vector_length_encoding(this); 5680 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5681 %} 5682 ins_pipe( pipe_slow ); 5683 %} 5684 5685 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5686 predicate((UseAVX > 0) && 5687 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5688 match(Set dst (AddVS src (LoadVector mem))); 5689 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5690 ins_encode %{ 5691 int vlen_enc = vector_length_encoding(this); 5692 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5693 %} 5694 ins_pipe( pipe_slow ); 5695 %} 5696 5697 // Integers vector add 5698 instruct vaddI(vec dst, vec src) %{ 5699 predicate(UseAVX == 0); 5700 match(Set dst (AddVI dst src)); 5701 format %{ "paddd $dst,$src\t! add packedI" %} 5702 ins_encode %{ 5703 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5704 %} 5705 ins_pipe( pipe_slow ); 5706 %} 5707 5708 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5709 predicate(UseAVX > 0); 5710 match(Set dst (AddVI src1 src2)); 5711 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5712 ins_encode %{ 5713 int vlen_enc = vector_length_encoding(this); 5714 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5715 %} 5716 ins_pipe( pipe_slow ); 5717 %} 5718 5719 5720 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5721 predicate((UseAVX > 0) && 5722 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5723 match(Set dst (AddVI src (LoadVector mem))); 5724 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5725 ins_encode %{ 5726 int vlen_enc = vector_length_encoding(this); 5727 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5728 %} 5729 ins_pipe( pipe_slow ); 5730 %} 5731 5732 // Longs vector add 5733 instruct vaddL(vec dst, vec src) %{ 5734 predicate(UseAVX == 0); 5735 match(Set dst (AddVL dst src)); 5736 format %{ "paddq $dst,$src\t! add packedL" %} 5737 ins_encode %{ 5738 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5739 %} 5740 ins_pipe( pipe_slow ); 5741 %} 5742 5743 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5744 predicate(UseAVX > 0); 5745 match(Set dst (AddVL src1 src2)); 5746 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5747 ins_encode %{ 5748 int vlen_enc = vector_length_encoding(this); 5749 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5750 %} 5751 ins_pipe( pipe_slow ); 5752 %} 5753 5754 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5755 predicate((UseAVX > 0) && 5756 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5757 match(Set dst (AddVL src (LoadVector mem))); 5758 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5759 ins_encode %{ 5760 int vlen_enc = vector_length_encoding(this); 5761 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5762 %} 5763 ins_pipe( pipe_slow ); 5764 %} 5765 5766 // Floats vector add 5767 instruct vaddF(vec dst, vec src) %{ 5768 predicate(UseAVX == 0); 5769 match(Set dst (AddVF dst src)); 5770 format %{ "addps $dst,$src\t! add packedF" %} 5771 ins_encode %{ 5772 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5773 %} 5774 ins_pipe( pipe_slow ); 5775 %} 5776 5777 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5778 predicate(UseAVX > 0); 5779 match(Set dst (AddVF src1 src2)); 5780 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5781 ins_encode %{ 5782 int vlen_enc = vector_length_encoding(this); 5783 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5784 %} 5785 ins_pipe( pipe_slow ); 5786 %} 5787 5788 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5789 predicate((UseAVX > 0) && 5790 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5791 match(Set dst (AddVF src (LoadVector mem))); 5792 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5793 ins_encode %{ 5794 int vlen_enc = vector_length_encoding(this); 5795 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5796 %} 5797 ins_pipe( pipe_slow ); 5798 %} 5799 5800 // Doubles vector add 5801 instruct vaddD(vec dst, vec src) %{ 5802 predicate(UseAVX == 0); 5803 match(Set dst (AddVD dst src)); 5804 format %{ "addpd $dst,$src\t! add packedD" %} 5805 ins_encode %{ 5806 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5807 %} 5808 ins_pipe( pipe_slow ); 5809 %} 5810 5811 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5812 predicate(UseAVX > 0); 5813 match(Set dst (AddVD src1 src2)); 5814 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5815 ins_encode %{ 5816 int vlen_enc = vector_length_encoding(this); 5817 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5818 %} 5819 ins_pipe( pipe_slow ); 5820 %} 5821 5822 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5823 predicate((UseAVX > 0) && 5824 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5825 match(Set dst (AddVD src (LoadVector mem))); 5826 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5827 ins_encode %{ 5828 int vlen_enc = vector_length_encoding(this); 5829 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5830 %} 5831 ins_pipe( pipe_slow ); 5832 %} 5833 5834 // --------------------------------- SUB -------------------------------------- 5835 5836 // Bytes vector sub 5837 instruct vsubB(vec dst, vec src) %{ 5838 predicate(UseAVX == 0); 5839 match(Set dst (SubVB dst src)); 5840 format %{ "psubb $dst,$src\t! sub packedB" %} 5841 ins_encode %{ 5842 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5843 %} 5844 ins_pipe( pipe_slow ); 5845 %} 5846 5847 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5848 predicate(UseAVX > 0); 5849 match(Set dst (SubVB src1 src2)); 5850 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5851 ins_encode %{ 5852 int vlen_enc = vector_length_encoding(this); 5853 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5854 %} 5855 ins_pipe( pipe_slow ); 5856 %} 5857 5858 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5859 predicate((UseAVX > 0) && 5860 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5861 match(Set dst (SubVB src (LoadVector mem))); 5862 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5863 ins_encode %{ 5864 int vlen_enc = vector_length_encoding(this); 5865 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5866 %} 5867 ins_pipe( pipe_slow ); 5868 %} 5869 5870 // Shorts/Chars vector sub 5871 instruct vsubS(vec dst, vec src) %{ 5872 predicate(UseAVX == 0); 5873 match(Set dst (SubVS dst src)); 5874 format %{ "psubw $dst,$src\t! sub packedS" %} 5875 ins_encode %{ 5876 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5877 %} 5878 ins_pipe( pipe_slow ); 5879 %} 5880 5881 5882 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5883 predicate(UseAVX > 0); 5884 match(Set dst (SubVS src1 src2)); 5885 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5886 ins_encode %{ 5887 int vlen_enc = vector_length_encoding(this); 5888 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5889 %} 5890 ins_pipe( pipe_slow ); 5891 %} 5892 5893 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5894 predicate((UseAVX > 0) && 5895 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5896 match(Set dst (SubVS src (LoadVector mem))); 5897 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5898 ins_encode %{ 5899 int vlen_enc = vector_length_encoding(this); 5900 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5901 %} 5902 ins_pipe( pipe_slow ); 5903 %} 5904 5905 // Integers vector sub 5906 instruct vsubI(vec dst, vec src) %{ 5907 predicate(UseAVX == 0); 5908 match(Set dst (SubVI dst src)); 5909 format %{ "psubd $dst,$src\t! sub packedI" %} 5910 ins_encode %{ 5911 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5912 %} 5913 ins_pipe( pipe_slow ); 5914 %} 5915 5916 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5917 predicate(UseAVX > 0); 5918 match(Set dst (SubVI src1 src2)); 5919 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5920 ins_encode %{ 5921 int vlen_enc = vector_length_encoding(this); 5922 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5923 %} 5924 ins_pipe( pipe_slow ); 5925 %} 5926 5927 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5928 predicate((UseAVX > 0) && 5929 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5930 match(Set dst (SubVI src (LoadVector mem))); 5931 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5932 ins_encode %{ 5933 int vlen_enc = vector_length_encoding(this); 5934 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5935 %} 5936 ins_pipe( pipe_slow ); 5937 %} 5938 5939 // Longs vector sub 5940 instruct vsubL(vec dst, vec src) %{ 5941 predicate(UseAVX == 0); 5942 match(Set dst (SubVL dst src)); 5943 format %{ "psubq $dst,$src\t! sub packedL" %} 5944 ins_encode %{ 5945 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5946 %} 5947 ins_pipe( pipe_slow ); 5948 %} 5949 5950 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5951 predicate(UseAVX > 0); 5952 match(Set dst (SubVL src1 src2)); 5953 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5954 ins_encode %{ 5955 int vlen_enc = vector_length_encoding(this); 5956 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5957 %} 5958 ins_pipe( pipe_slow ); 5959 %} 5960 5961 5962 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5963 predicate((UseAVX > 0) && 5964 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5965 match(Set dst (SubVL src (LoadVector mem))); 5966 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5967 ins_encode %{ 5968 int vlen_enc = vector_length_encoding(this); 5969 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5970 %} 5971 ins_pipe( pipe_slow ); 5972 %} 5973 5974 // Floats vector sub 5975 instruct vsubF(vec dst, vec src) %{ 5976 predicate(UseAVX == 0); 5977 match(Set dst (SubVF dst src)); 5978 format %{ "subps $dst,$src\t! sub packedF" %} 5979 ins_encode %{ 5980 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5981 %} 5982 ins_pipe( pipe_slow ); 5983 %} 5984 5985 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5986 predicate(UseAVX > 0); 5987 match(Set dst (SubVF src1 src2)); 5988 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5989 ins_encode %{ 5990 int vlen_enc = vector_length_encoding(this); 5991 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5992 %} 5993 ins_pipe( pipe_slow ); 5994 %} 5995 5996 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5997 predicate((UseAVX > 0) && 5998 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5999 match(Set dst (SubVF src (LoadVector mem))); 6000 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 6001 ins_encode %{ 6002 int vlen_enc = vector_length_encoding(this); 6003 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6004 %} 6005 ins_pipe( pipe_slow ); 6006 %} 6007 6008 // Doubles vector sub 6009 instruct vsubD(vec dst, vec src) %{ 6010 predicate(UseAVX == 0); 6011 match(Set dst (SubVD dst src)); 6012 format %{ "subpd $dst,$src\t! sub packedD" %} 6013 ins_encode %{ 6014 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 6015 %} 6016 ins_pipe( pipe_slow ); 6017 %} 6018 6019 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 6020 predicate(UseAVX > 0); 6021 match(Set dst (SubVD src1 src2)); 6022 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 6023 ins_encode %{ 6024 int vlen_enc = vector_length_encoding(this); 6025 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6026 %} 6027 ins_pipe( pipe_slow ); 6028 %} 6029 6030 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 6031 predicate((UseAVX > 0) && 6032 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6033 match(Set dst (SubVD src (LoadVector mem))); 6034 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 6035 ins_encode %{ 6036 int vlen_enc = vector_length_encoding(this); 6037 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6038 %} 6039 ins_pipe( pipe_slow ); 6040 %} 6041 6042 // --------------------------------- MUL -------------------------------------- 6043 6044 // Byte vector mul 6045 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 6046 predicate(Matcher::vector_length_in_bytes(n) <= 8); 6047 match(Set dst (MulVB src1 src2)); 6048 effect(TEMP dst, TEMP xtmp); 6049 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6050 ins_encode %{ 6051 assert(UseSSE > 3, "required"); 6052 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 6053 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 6054 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6055 __ psllw($dst$$XMMRegister, 8); 6056 __ psrlw($dst$$XMMRegister, 8); 6057 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6058 %} 6059 ins_pipe( pipe_slow ); 6060 %} 6061 6062 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 6063 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 6064 match(Set dst (MulVB src1 src2)); 6065 effect(TEMP dst, TEMP xtmp); 6066 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6067 ins_encode %{ 6068 assert(UseSSE > 3, "required"); 6069 // Odd-index elements 6070 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 6071 __ psrlw($dst$$XMMRegister, 8); 6072 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 6073 __ psrlw($xtmp$$XMMRegister, 8); 6074 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6075 __ psllw($dst$$XMMRegister, 8); 6076 // Even-index elements 6077 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6078 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 6079 __ psllw($xtmp$$XMMRegister, 8); 6080 __ psrlw($xtmp$$XMMRegister, 8); 6081 // Combine 6082 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 6083 %} 6084 ins_pipe( pipe_slow ); 6085 %} 6086 6087 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6088 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 6089 match(Set dst (MulVB src1 src2)); 6090 effect(TEMP xtmp1, TEMP xtmp2); 6091 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6092 ins_encode %{ 6093 int vlen_enc = vector_length_encoding(this); 6094 // Odd-index elements 6095 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 6096 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 6097 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6098 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 6099 // Even-index elements 6100 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6101 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6102 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6103 // Combine 6104 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6105 %} 6106 ins_pipe( pipe_slow ); 6107 %} 6108 6109 // Shorts/Chars vector mul 6110 instruct vmulS(vec dst, vec src) %{ 6111 predicate(UseAVX == 0); 6112 match(Set dst (MulVS dst src)); 6113 format %{ "pmullw $dst,$src\t! mul packedS" %} 6114 ins_encode %{ 6115 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6116 %} 6117 ins_pipe( pipe_slow ); 6118 %} 6119 6120 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 6121 predicate(UseAVX > 0); 6122 match(Set dst (MulVS src1 src2)); 6123 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 6124 ins_encode %{ 6125 int vlen_enc = vector_length_encoding(this); 6126 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6127 %} 6128 ins_pipe( pipe_slow ); 6129 %} 6130 6131 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 6132 predicate((UseAVX > 0) && 6133 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6134 match(Set dst (MulVS src (LoadVector mem))); 6135 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 6136 ins_encode %{ 6137 int vlen_enc = vector_length_encoding(this); 6138 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6139 %} 6140 ins_pipe( pipe_slow ); 6141 %} 6142 6143 // Integers vector mul 6144 instruct vmulI(vec dst, vec src) %{ 6145 predicate(UseAVX == 0); 6146 match(Set dst (MulVI dst src)); 6147 format %{ "pmulld $dst,$src\t! mul packedI" %} 6148 ins_encode %{ 6149 assert(UseSSE > 3, "required"); 6150 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6151 %} 6152 ins_pipe( pipe_slow ); 6153 %} 6154 6155 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 6156 predicate(UseAVX > 0); 6157 match(Set dst (MulVI src1 src2)); 6158 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 6159 ins_encode %{ 6160 int vlen_enc = vector_length_encoding(this); 6161 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6162 %} 6163 ins_pipe( pipe_slow ); 6164 %} 6165 6166 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 6167 predicate((UseAVX > 0) && 6168 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6169 match(Set dst (MulVI src (LoadVector mem))); 6170 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 6171 ins_encode %{ 6172 int vlen_enc = vector_length_encoding(this); 6173 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6174 %} 6175 ins_pipe( pipe_slow ); 6176 %} 6177 6178 // Longs vector mul 6179 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 6180 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6181 VM_Version::supports_avx512dq()) || 6182 VM_Version::supports_avx512vldq()); 6183 match(Set dst (MulVL src1 src2)); 6184 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 6185 ins_encode %{ 6186 assert(UseAVX > 2, "required"); 6187 int vlen_enc = vector_length_encoding(this); 6188 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6189 %} 6190 ins_pipe( pipe_slow ); 6191 %} 6192 6193 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 6194 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6195 VM_Version::supports_avx512dq()) || 6196 (Matcher::vector_length_in_bytes(n) > 8 && 6197 VM_Version::supports_avx512vldq())); 6198 match(Set dst (MulVL src (LoadVector mem))); 6199 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 6200 ins_encode %{ 6201 assert(UseAVX > 2, "required"); 6202 int vlen_enc = vector_length_encoding(this); 6203 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6204 %} 6205 ins_pipe( pipe_slow ); 6206 %} 6207 6208 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 6209 predicate(UseAVX == 0); 6210 match(Set dst (MulVL src1 src2)); 6211 effect(TEMP dst, TEMP xtmp); 6212 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6213 ins_encode %{ 6214 assert(VM_Version::supports_sse4_1(), "required"); 6215 // Get the lo-hi products, only the lower 32 bits is in concerns 6216 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 6217 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 6218 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 6219 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 6220 __ psllq($dst$$XMMRegister, 32); 6221 // Get the lo-lo products 6222 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6223 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 6224 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 6225 %} 6226 ins_pipe( pipe_slow ); 6227 %} 6228 6229 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6230 predicate(UseAVX > 0 && 6231 ((Matcher::vector_length_in_bytes(n) == 64 && 6232 !VM_Version::supports_avx512dq()) || 6233 (Matcher::vector_length_in_bytes(n) < 64 && 6234 !VM_Version::supports_avx512vldq()))); 6235 match(Set dst (MulVL src1 src2)); 6236 effect(TEMP xtmp1, TEMP xtmp2); 6237 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6238 ins_encode %{ 6239 int vlen_enc = vector_length_encoding(this); 6240 // Get the lo-hi products, only the lower 32 bits is in concerns 6241 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 6242 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6243 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 6244 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6245 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 6246 // Get the lo-lo products 6247 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6248 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6249 %} 6250 ins_pipe( pipe_slow ); 6251 %} 6252 6253 // Floats vector mul 6254 instruct vmulF(vec dst, vec src) %{ 6255 predicate(UseAVX == 0); 6256 match(Set dst (MulVF dst src)); 6257 format %{ "mulps $dst,$src\t! mul packedF" %} 6258 ins_encode %{ 6259 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6260 %} 6261 ins_pipe( pipe_slow ); 6262 %} 6263 6264 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 6265 predicate(UseAVX > 0); 6266 match(Set dst (MulVF src1 src2)); 6267 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 6268 ins_encode %{ 6269 int vlen_enc = vector_length_encoding(this); 6270 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6271 %} 6272 ins_pipe( pipe_slow ); 6273 %} 6274 6275 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 6276 predicate((UseAVX > 0) && 6277 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6278 match(Set dst (MulVF src (LoadVector mem))); 6279 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 6280 ins_encode %{ 6281 int vlen_enc = vector_length_encoding(this); 6282 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6283 %} 6284 ins_pipe( pipe_slow ); 6285 %} 6286 6287 // Doubles vector mul 6288 instruct vmulD(vec dst, vec src) %{ 6289 predicate(UseAVX == 0); 6290 match(Set dst (MulVD dst src)); 6291 format %{ "mulpd $dst,$src\t! mul packedD" %} 6292 ins_encode %{ 6293 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6294 %} 6295 ins_pipe( pipe_slow ); 6296 %} 6297 6298 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 6299 predicate(UseAVX > 0); 6300 match(Set dst (MulVD src1 src2)); 6301 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 6302 ins_encode %{ 6303 int vlen_enc = vector_length_encoding(this); 6304 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6305 %} 6306 ins_pipe( pipe_slow ); 6307 %} 6308 6309 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 6310 predicate((UseAVX > 0) && 6311 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6312 match(Set dst (MulVD src (LoadVector mem))); 6313 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 6314 ins_encode %{ 6315 int vlen_enc = vector_length_encoding(this); 6316 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6317 %} 6318 ins_pipe( pipe_slow ); 6319 %} 6320 6321 // --------------------------------- DIV -------------------------------------- 6322 6323 // Floats vector div 6324 instruct vdivF(vec dst, vec src) %{ 6325 predicate(UseAVX == 0); 6326 match(Set dst (DivVF dst src)); 6327 format %{ "divps $dst,$src\t! div packedF" %} 6328 ins_encode %{ 6329 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6330 %} 6331 ins_pipe( pipe_slow ); 6332 %} 6333 6334 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 6335 predicate(UseAVX > 0); 6336 match(Set dst (DivVF src1 src2)); 6337 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 6338 ins_encode %{ 6339 int vlen_enc = vector_length_encoding(this); 6340 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6341 %} 6342 ins_pipe( pipe_slow ); 6343 %} 6344 6345 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6346 predicate((UseAVX > 0) && 6347 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6348 match(Set dst (DivVF src (LoadVector mem))); 6349 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6350 ins_encode %{ 6351 int vlen_enc = vector_length_encoding(this); 6352 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6353 %} 6354 ins_pipe( pipe_slow ); 6355 %} 6356 6357 // Doubles vector div 6358 instruct vdivD(vec dst, vec src) %{ 6359 predicate(UseAVX == 0); 6360 match(Set dst (DivVD dst src)); 6361 format %{ "divpd $dst,$src\t! div packedD" %} 6362 ins_encode %{ 6363 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6364 %} 6365 ins_pipe( pipe_slow ); 6366 %} 6367 6368 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6369 predicate(UseAVX > 0); 6370 match(Set dst (DivVD src1 src2)); 6371 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6372 ins_encode %{ 6373 int vlen_enc = vector_length_encoding(this); 6374 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6375 %} 6376 ins_pipe( pipe_slow ); 6377 %} 6378 6379 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6380 predicate((UseAVX > 0) && 6381 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6382 match(Set dst (DivVD src (LoadVector mem))); 6383 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6384 ins_encode %{ 6385 int vlen_enc = vector_length_encoding(this); 6386 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6387 %} 6388 ins_pipe( pipe_slow ); 6389 %} 6390 6391 // ------------------------------ MinMax --------------------------------------- 6392 6393 // Byte, Short, Int vector Min/Max 6394 instruct minmax_reg_sse(vec dst, vec src) %{ 6395 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6396 UseAVX == 0); 6397 match(Set dst (MinV dst src)); 6398 match(Set dst (MaxV dst src)); 6399 format %{ "vector_minmax $dst,$src\t! " %} 6400 ins_encode %{ 6401 assert(UseSSE >= 4, "required"); 6402 6403 int opcode = this->ideal_Opcode(); 6404 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6405 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6406 %} 6407 ins_pipe( pipe_slow ); 6408 %} 6409 6410 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6411 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6412 UseAVX > 0); 6413 match(Set dst (MinV src1 src2)); 6414 match(Set dst (MaxV src1 src2)); 6415 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6416 ins_encode %{ 6417 int opcode = this->ideal_Opcode(); 6418 int vlen_enc = vector_length_encoding(this); 6419 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6420 6421 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6422 %} 6423 ins_pipe( pipe_slow ); 6424 %} 6425 6426 // Long vector Min/Max 6427 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6428 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6429 UseAVX == 0); 6430 match(Set dst (MinV dst src)); 6431 match(Set dst (MaxV src dst)); 6432 effect(TEMP dst, TEMP tmp); 6433 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6434 ins_encode %{ 6435 assert(UseSSE >= 4, "required"); 6436 6437 int opcode = this->ideal_Opcode(); 6438 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6439 assert(elem_bt == T_LONG, "sanity"); 6440 6441 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6442 %} 6443 ins_pipe( pipe_slow ); 6444 %} 6445 6446 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6447 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6448 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6449 match(Set dst (MinV src1 src2)); 6450 match(Set dst (MaxV src1 src2)); 6451 effect(TEMP dst); 6452 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6453 ins_encode %{ 6454 int vlen_enc = vector_length_encoding(this); 6455 int opcode = this->ideal_Opcode(); 6456 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6457 assert(elem_bt == T_LONG, "sanity"); 6458 6459 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6460 %} 6461 ins_pipe( pipe_slow ); 6462 %} 6463 6464 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6465 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6466 Matcher::vector_element_basic_type(n) == T_LONG); 6467 match(Set dst (MinV src1 src2)); 6468 match(Set dst (MaxV src1 src2)); 6469 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6470 ins_encode %{ 6471 assert(UseAVX > 2, "required"); 6472 6473 int vlen_enc = vector_length_encoding(this); 6474 int opcode = this->ideal_Opcode(); 6475 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6476 assert(elem_bt == T_LONG, "sanity"); 6477 6478 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6479 %} 6480 ins_pipe( pipe_slow ); 6481 %} 6482 6483 // Float/Double vector Min/Max 6484 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6485 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6486 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6487 UseAVX > 0); 6488 match(Set dst (MinV a b)); 6489 match(Set dst (MaxV a b)); 6490 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6491 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6492 ins_encode %{ 6493 assert(UseAVX > 0, "required"); 6494 6495 int opcode = this->ideal_Opcode(); 6496 int vlen_enc = vector_length_encoding(this); 6497 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6498 6499 __ vminmax_fp(opcode, elem_bt, 6500 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6501 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6502 %} 6503 ins_pipe( pipe_slow ); 6504 %} 6505 6506 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6507 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6508 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6509 match(Set dst (MinV a b)); 6510 match(Set dst (MaxV a b)); 6511 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6512 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6513 ins_encode %{ 6514 assert(UseAVX > 2, "required"); 6515 6516 int opcode = this->ideal_Opcode(); 6517 int vlen_enc = vector_length_encoding(this); 6518 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6519 6520 __ evminmax_fp(opcode, elem_bt, 6521 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6522 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6523 %} 6524 ins_pipe( pipe_slow ); 6525 %} 6526 6527 // --------------------------------- Signum/CopySign --------------------------- 6528 6529 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6530 match(Set dst (SignumF dst (Binary zero one))); 6531 effect(KILL cr); 6532 format %{ "signumF $dst, $dst" %} 6533 ins_encode %{ 6534 int opcode = this->ideal_Opcode(); 6535 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6536 %} 6537 ins_pipe( pipe_slow ); 6538 %} 6539 6540 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6541 match(Set dst (SignumD dst (Binary zero one))); 6542 effect(KILL cr); 6543 format %{ "signumD $dst, $dst" %} 6544 ins_encode %{ 6545 int opcode = this->ideal_Opcode(); 6546 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6547 %} 6548 ins_pipe( pipe_slow ); 6549 %} 6550 6551 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6552 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6553 match(Set dst (SignumVF src (Binary zero one))); 6554 match(Set dst (SignumVD src (Binary zero one))); 6555 effect(TEMP dst, TEMP xtmp1); 6556 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6557 ins_encode %{ 6558 int opcode = this->ideal_Opcode(); 6559 int vec_enc = vector_length_encoding(this); 6560 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6561 $xtmp1$$XMMRegister, vec_enc); 6562 %} 6563 ins_pipe( pipe_slow ); 6564 %} 6565 6566 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6567 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6568 match(Set dst (SignumVF src (Binary zero one))); 6569 match(Set dst (SignumVD src (Binary zero one))); 6570 effect(TEMP dst, TEMP ktmp1); 6571 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6572 ins_encode %{ 6573 int opcode = this->ideal_Opcode(); 6574 int vec_enc = vector_length_encoding(this); 6575 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6576 $ktmp1$$KRegister, vec_enc); 6577 %} 6578 ins_pipe( pipe_slow ); 6579 %} 6580 6581 // --------------------------------------- 6582 // For copySign use 0xE4 as writemask for vpternlog 6583 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6584 // C (xmm2) is set to 0x7FFFFFFF 6585 // Wherever xmm2 is 0, we want to pick from B (sign) 6586 // Wherever xmm2 is 1, we want to pick from A (src) 6587 // 6588 // A B C Result 6589 // 0 0 0 0 6590 // 0 0 1 0 6591 // 0 1 0 1 6592 // 0 1 1 0 6593 // 1 0 0 0 6594 // 1 0 1 1 6595 // 1 1 0 1 6596 // 1 1 1 1 6597 // 6598 // Result going from high bit to low bit is 0x11100100 = 0xe4 6599 // --------------------------------------- 6600 6601 #ifdef _LP64 6602 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6603 match(Set dst (CopySignF dst src)); 6604 effect(TEMP tmp1, TEMP tmp2); 6605 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6606 ins_encode %{ 6607 __ movl($tmp2$$Register, 0x7FFFFFFF); 6608 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6609 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6610 %} 6611 ins_pipe( pipe_slow ); 6612 %} 6613 6614 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6615 match(Set dst (CopySignD dst (Binary src zero))); 6616 ins_cost(100); 6617 effect(TEMP tmp1, TEMP tmp2); 6618 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6619 ins_encode %{ 6620 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6621 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6622 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6623 %} 6624 ins_pipe( pipe_slow ); 6625 %} 6626 6627 #endif // _LP64 6628 6629 //----------------------------- CompressBits/ExpandBits ------------------------ 6630 6631 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6632 predicate(n->bottom_type()->isa_int()); 6633 match(Set dst (CompressBits src mask)); 6634 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6635 ins_encode %{ 6636 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6637 %} 6638 ins_pipe( pipe_slow ); 6639 %} 6640 6641 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6642 predicate(n->bottom_type()->isa_int()); 6643 match(Set dst (ExpandBits src mask)); 6644 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6645 ins_encode %{ 6646 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6647 %} 6648 ins_pipe( pipe_slow ); 6649 %} 6650 6651 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6652 predicate(n->bottom_type()->isa_int()); 6653 match(Set dst (CompressBits src (LoadI mask))); 6654 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6655 ins_encode %{ 6656 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6657 %} 6658 ins_pipe( pipe_slow ); 6659 %} 6660 6661 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6662 predicate(n->bottom_type()->isa_int()); 6663 match(Set dst (ExpandBits src (LoadI mask))); 6664 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6665 ins_encode %{ 6666 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6667 %} 6668 ins_pipe( pipe_slow ); 6669 %} 6670 6671 // --------------------------------- Sqrt -------------------------------------- 6672 6673 instruct vsqrtF_reg(vec dst, vec src) %{ 6674 match(Set dst (SqrtVF src)); 6675 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6676 ins_encode %{ 6677 assert(UseAVX > 0, "required"); 6678 int vlen_enc = vector_length_encoding(this); 6679 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6680 %} 6681 ins_pipe( pipe_slow ); 6682 %} 6683 6684 instruct vsqrtF_mem(vec dst, memory mem) %{ 6685 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6686 match(Set dst (SqrtVF (LoadVector mem))); 6687 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6688 ins_encode %{ 6689 assert(UseAVX > 0, "required"); 6690 int vlen_enc = vector_length_encoding(this); 6691 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6692 %} 6693 ins_pipe( pipe_slow ); 6694 %} 6695 6696 // Floating point vector sqrt 6697 instruct vsqrtD_reg(vec dst, vec src) %{ 6698 match(Set dst (SqrtVD src)); 6699 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6700 ins_encode %{ 6701 assert(UseAVX > 0, "required"); 6702 int vlen_enc = vector_length_encoding(this); 6703 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6704 %} 6705 ins_pipe( pipe_slow ); 6706 %} 6707 6708 instruct vsqrtD_mem(vec dst, memory mem) %{ 6709 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6710 match(Set dst (SqrtVD (LoadVector mem))); 6711 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6712 ins_encode %{ 6713 assert(UseAVX > 0, "required"); 6714 int vlen_enc = vector_length_encoding(this); 6715 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6716 %} 6717 ins_pipe( pipe_slow ); 6718 %} 6719 6720 // ------------------------------ Shift --------------------------------------- 6721 6722 // Left and right shift count vectors are the same on x86 6723 // (only lowest bits of xmm reg are used for count). 6724 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6725 match(Set dst (LShiftCntV cnt)); 6726 match(Set dst (RShiftCntV cnt)); 6727 format %{ "movdl $dst,$cnt\t! load shift count" %} 6728 ins_encode %{ 6729 __ movdl($dst$$XMMRegister, $cnt$$Register); 6730 %} 6731 ins_pipe( pipe_slow ); 6732 %} 6733 6734 // Byte vector shift 6735 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6736 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6737 match(Set dst ( LShiftVB src shift)); 6738 match(Set dst ( RShiftVB src shift)); 6739 match(Set dst (URShiftVB src shift)); 6740 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6741 format %{"vector_byte_shift $dst,$src,$shift" %} 6742 ins_encode %{ 6743 assert(UseSSE > 3, "required"); 6744 int opcode = this->ideal_Opcode(); 6745 bool sign = (opcode != Op_URShiftVB); 6746 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6747 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6748 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6749 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6750 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6751 %} 6752 ins_pipe( pipe_slow ); 6753 %} 6754 6755 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6756 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6757 UseAVX <= 1); 6758 match(Set dst ( LShiftVB src shift)); 6759 match(Set dst ( RShiftVB src shift)); 6760 match(Set dst (URShiftVB src shift)); 6761 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6762 format %{"vector_byte_shift $dst,$src,$shift" %} 6763 ins_encode %{ 6764 assert(UseSSE > 3, "required"); 6765 int opcode = this->ideal_Opcode(); 6766 bool sign = (opcode != Op_URShiftVB); 6767 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6768 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6769 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6770 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6771 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6772 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6773 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6774 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6775 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6776 %} 6777 ins_pipe( pipe_slow ); 6778 %} 6779 6780 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6781 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6782 UseAVX > 1); 6783 match(Set dst ( LShiftVB src shift)); 6784 match(Set dst ( RShiftVB src shift)); 6785 match(Set dst (URShiftVB src shift)); 6786 effect(TEMP dst, TEMP tmp); 6787 format %{"vector_byte_shift $dst,$src,$shift" %} 6788 ins_encode %{ 6789 int opcode = this->ideal_Opcode(); 6790 bool sign = (opcode != Op_URShiftVB); 6791 int vlen_enc = Assembler::AVX_256bit; 6792 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6793 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6794 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6795 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6796 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6797 %} 6798 ins_pipe( pipe_slow ); 6799 %} 6800 6801 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6802 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6803 match(Set dst ( LShiftVB src shift)); 6804 match(Set dst ( RShiftVB src shift)); 6805 match(Set dst (URShiftVB src shift)); 6806 effect(TEMP dst, TEMP tmp); 6807 format %{"vector_byte_shift $dst,$src,$shift" %} 6808 ins_encode %{ 6809 assert(UseAVX > 1, "required"); 6810 int opcode = this->ideal_Opcode(); 6811 bool sign = (opcode != Op_URShiftVB); 6812 int vlen_enc = Assembler::AVX_256bit; 6813 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6814 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6815 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6816 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6817 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6818 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6819 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6820 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6821 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6822 %} 6823 ins_pipe( pipe_slow ); 6824 %} 6825 6826 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6827 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6828 match(Set dst ( LShiftVB src shift)); 6829 match(Set dst (RShiftVB src shift)); 6830 match(Set dst (URShiftVB src shift)); 6831 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6832 format %{"vector_byte_shift $dst,$src,$shift" %} 6833 ins_encode %{ 6834 assert(UseAVX > 2, "required"); 6835 int opcode = this->ideal_Opcode(); 6836 bool sign = (opcode != Op_URShiftVB); 6837 int vlen_enc = Assembler::AVX_512bit; 6838 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6839 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6840 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6841 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6842 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6843 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6844 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6845 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6846 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6847 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6848 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 6849 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6850 %} 6851 ins_pipe( pipe_slow ); 6852 %} 6853 6854 // Shorts vector logical right shift produces incorrect Java result 6855 // for negative data because java code convert short value into int with 6856 // sign extension before a shift. But char vectors are fine since chars are 6857 // unsigned values. 6858 // Shorts/Chars vector left shift 6859 instruct vshiftS(vec dst, vec src, vec shift) %{ 6860 predicate(!n->as_ShiftV()->is_var_shift()); 6861 match(Set dst ( LShiftVS src shift)); 6862 match(Set dst ( RShiftVS src shift)); 6863 match(Set dst (URShiftVS src shift)); 6864 effect(TEMP dst, USE src, USE shift); 6865 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6866 ins_encode %{ 6867 int opcode = this->ideal_Opcode(); 6868 if (UseAVX > 0) { 6869 int vlen_enc = vector_length_encoding(this); 6870 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6871 } else { 6872 int vlen = Matcher::vector_length(this); 6873 if (vlen == 2) { 6874 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6875 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6876 } else if (vlen == 4) { 6877 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6878 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6879 } else { 6880 assert (vlen == 8, "sanity"); 6881 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6882 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6883 } 6884 } 6885 %} 6886 ins_pipe( pipe_slow ); 6887 %} 6888 6889 // Integers vector left shift 6890 instruct vshiftI(vec dst, vec src, vec shift) %{ 6891 predicate(!n->as_ShiftV()->is_var_shift()); 6892 match(Set dst ( LShiftVI src shift)); 6893 match(Set dst ( RShiftVI src shift)); 6894 match(Set dst (URShiftVI src shift)); 6895 effect(TEMP dst, USE src, USE shift); 6896 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6897 ins_encode %{ 6898 int opcode = this->ideal_Opcode(); 6899 if (UseAVX > 0) { 6900 int vlen_enc = vector_length_encoding(this); 6901 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6902 } else { 6903 int vlen = Matcher::vector_length(this); 6904 if (vlen == 2) { 6905 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6906 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6907 } else { 6908 assert(vlen == 4, "sanity"); 6909 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6910 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6911 } 6912 } 6913 %} 6914 ins_pipe( pipe_slow ); 6915 %} 6916 6917 // Integers vector left constant shift 6918 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6919 match(Set dst (LShiftVI src (LShiftCntV shift))); 6920 match(Set dst (RShiftVI src (RShiftCntV shift))); 6921 match(Set dst (URShiftVI src (RShiftCntV shift))); 6922 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6923 ins_encode %{ 6924 int opcode = this->ideal_Opcode(); 6925 if (UseAVX > 0) { 6926 int vector_len = vector_length_encoding(this); 6927 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6928 } else { 6929 int vlen = Matcher::vector_length(this); 6930 if (vlen == 2) { 6931 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6932 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6933 } else { 6934 assert(vlen == 4, "sanity"); 6935 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6936 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6937 } 6938 } 6939 %} 6940 ins_pipe( pipe_slow ); 6941 %} 6942 6943 // Longs vector shift 6944 instruct vshiftL(vec dst, vec src, vec shift) %{ 6945 predicate(!n->as_ShiftV()->is_var_shift()); 6946 match(Set dst ( LShiftVL src shift)); 6947 match(Set dst (URShiftVL src shift)); 6948 effect(TEMP dst, USE src, USE shift); 6949 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6950 ins_encode %{ 6951 int opcode = this->ideal_Opcode(); 6952 if (UseAVX > 0) { 6953 int vlen_enc = vector_length_encoding(this); 6954 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6955 } else { 6956 assert(Matcher::vector_length(this) == 2, ""); 6957 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6958 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6959 } 6960 %} 6961 ins_pipe( pipe_slow ); 6962 %} 6963 6964 // Longs vector constant shift 6965 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 6966 match(Set dst (LShiftVL src (LShiftCntV shift))); 6967 match(Set dst (URShiftVL src (RShiftCntV shift))); 6968 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 6969 ins_encode %{ 6970 int opcode = this->ideal_Opcode(); 6971 if (UseAVX > 0) { 6972 int vector_len = vector_length_encoding(this); 6973 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6974 } else { 6975 assert(Matcher::vector_length(this) == 2, ""); 6976 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6977 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6978 } 6979 %} 6980 ins_pipe( pipe_slow ); 6981 %} 6982 6983 // -------------------ArithmeticRightShift ----------------------------------- 6984 // Long vector arithmetic right shift 6985 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 6986 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 6987 match(Set dst (RShiftVL src shift)); 6988 effect(TEMP dst, TEMP tmp); 6989 format %{ "vshiftq $dst,$src,$shift" %} 6990 ins_encode %{ 6991 uint vlen = Matcher::vector_length(this); 6992 if (vlen == 2) { 6993 assert(UseSSE >= 2, "required"); 6994 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6995 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6996 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6997 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6998 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6999 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 7000 } else { 7001 assert(vlen == 4, "sanity"); 7002 assert(UseAVX > 1, "required"); 7003 int vlen_enc = Assembler::AVX_256bit; 7004 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7005 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 7006 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7007 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7008 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7009 } 7010 %} 7011 ins_pipe( pipe_slow ); 7012 %} 7013 7014 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 7015 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 7016 match(Set dst (RShiftVL src shift)); 7017 format %{ "vshiftq $dst,$src,$shift" %} 7018 ins_encode %{ 7019 int vlen_enc = vector_length_encoding(this); 7020 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7021 %} 7022 ins_pipe( pipe_slow ); 7023 %} 7024 7025 // ------------------- Variable Shift ----------------------------- 7026 // Byte variable shift 7027 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7028 predicate(Matcher::vector_length(n) <= 8 && 7029 n->as_ShiftV()->is_var_shift() && 7030 !VM_Version::supports_avx512bw()); 7031 match(Set dst ( LShiftVB src shift)); 7032 match(Set dst ( RShiftVB src shift)); 7033 match(Set dst (URShiftVB src shift)); 7034 effect(TEMP dst, TEMP vtmp); 7035 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7036 ins_encode %{ 7037 assert(UseAVX >= 2, "required"); 7038 7039 int opcode = this->ideal_Opcode(); 7040 int vlen_enc = Assembler::AVX_128bit; 7041 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7042 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7043 %} 7044 ins_pipe( pipe_slow ); 7045 %} 7046 7047 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7048 predicate(Matcher::vector_length(n) == 16 && 7049 n->as_ShiftV()->is_var_shift() && 7050 !VM_Version::supports_avx512bw()); 7051 match(Set dst ( LShiftVB src shift)); 7052 match(Set dst ( RShiftVB src shift)); 7053 match(Set dst (URShiftVB src shift)); 7054 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7055 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7056 ins_encode %{ 7057 assert(UseAVX >= 2, "required"); 7058 7059 int opcode = this->ideal_Opcode(); 7060 int vlen_enc = Assembler::AVX_128bit; 7061 // Shift lower half and get word result in dst 7062 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7063 7064 // Shift upper half and get word result in vtmp1 7065 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7066 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7067 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7068 7069 // Merge and down convert the two word results to byte in dst 7070 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7071 %} 7072 ins_pipe( pipe_slow ); 7073 %} 7074 7075 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 7076 predicate(Matcher::vector_length(n) == 32 && 7077 n->as_ShiftV()->is_var_shift() && 7078 !VM_Version::supports_avx512bw()); 7079 match(Set dst ( LShiftVB src shift)); 7080 match(Set dst ( RShiftVB src shift)); 7081 match(Set dst (URShiftVB src shift)); 7082 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 7083 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 7084 ins_encode %{ 7085 assert(UseAVX >= 2, "required"); 7086 7087 int opcode = this->ideal_Opcode(); 7088 int vlen_enc = Assembler::AVX_128bit; 7089 // Process lower 128 bits and get result in dst 7090 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7091 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7092 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7093 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7094 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7095 7096 // Process higher 128 bits and get result in vtmp3 7097 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7098 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7099 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 7100 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 7101 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 7102 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7103 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 7104 7105 // Merge the two results in dst 7106 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7107 %} 7108 ins_pipe( pipe_slow ); 7109 %} 7110 7111 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 7112 predicate(Matcher::vector_length(n) <= 32 && 7113 n->as_ShiftV()->is_var_shift() && 7114 VM_Version::supports_avx512bw()); 7115 match(Set dst ( LShiftVB src shift)); 7116 match(Set dst ( RShiftVB src shift)); 7117 match(Set dst (URShiftVB src shift)); 7118 effect(TEMP dst, TEMP vtmp); 7119 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7120 ins_encode %{ 7121 assert(UseAVX > 2, "required"); 7122 7123 int opcode = this->ideal_Opcode(); 7124 int vlen_enc = vector_length_encoding(this); 7125 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7126 %} 7127 ins_pipe( pipe_slow ); 7128 %} 7129 7130 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7131 predicate(Matcher::vector_length(n) == 64 && 7132 n->as_ShiftV()->is_var_shift() && 7133 VM_Version::supports_avx512bw()); 7134 match(Set dst ( LShiftVB src shift)); 7135 match(Set dst ( RShiftVB src shift)); 7136 match(Set dst (URShiftVB src shift)); 7137 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7138 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7139 ins_encode %{ 7140 assert(UseAVX > 2, "required"); 7141 7142 int opcode = this->ideal_Opcode(); 7143 int vlen_enc = Assembler::AVX_256bit; 7144 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7145 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7146 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7147 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7148 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7149 %} 7150 ins_pipe( pipe_slow ); 7151 %} 7152 7153 // Short variable shift 7154 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7155 predicate(Matcher::vector_length(n) <= 8 && 7156 n->as_ShiftV()->is_var_shift() && 7157 !VM_Version::supports_avx512bw()); 7158 match(Set dst ( LShiftVS src shift)); 7159 match(Set dst ( RShiftVS src shift)); 7160 match(Set dst (URShiftVS src shift)); 7161 effect(TEMP dst, TEMP vtmp); 7162 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7163 ins_encode %{ 7164 assert(UseAVX >= 2, "required"); 7165 7166 int opcode = this->ideal_Opcode(); 7167 bool sign = (opcode != Op_URShiftVS); 7168 int vlen_enc = Assembler::AVX_256bit; 7169 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 7170 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 7171 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7172 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7173 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 7174 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7175 %} 7176 ins_pipe( pipe_slow ); 7177 %} 7178 7179 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7180 predicate(Matcher::vector_length(n) == 16 && 7181 n->as_ShiftV()->is_var_shift() && 7182 !VM_Version::supports_avx512bw()); 7183 match(Set dst ( LShiftVS src shift)); 7184 match(Set dst ( RShiftVS src shift)); 7185 match(Set dst (URShiftVS src shift)); 7186 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7187 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7188 ins_encode %{ 7189 assert(UseAVX >= 2, "required"); 7190 7191 int opcode = this->ideal_Opcode(); 7192 bool sign = (opcode != Op_URShiftVS); 7193 int vlen_enc = Assembler::AVX_256bit; 7194 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 7195 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7196 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7197 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7198 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7199 7200 // Shift upper half, with result in dst using vtmp1 as TEMP 7201 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 7202 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 7203 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7204 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7205 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7206 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7207 7208 // Merge lower and upper half result into dst 7209 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7210 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 7211 %} 7212 ins_pipe( pipe_slow ); 7213 %} 7214 7215 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 7216 predicate(n->as_ShiftV()->is_var_shift() && 7217 VM_Version::supports_avx512bw()); 7218 match(Set dst ( LShiftVS src shift)); 7219 match(Set dst ( RShiftVS src shift)); 7220 match(Set dst (URShiftVS src shift)); 7221 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 7222 ins_encode %{ 7223 assert(UseAVX > 2, "required"); 7224 7225 int opcode = this->ideal_Opcode(); 7226 int vlen_enc = vector_length_encoding(this); 7227 if (!VM_Version::supports_avx512vl()) { 7228 vlen_enc = Assembler::AVX_512bit; 7229 } 7230 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7231 %} 7232 ins_pipe( pipe_slow ); 7233 %} 7234 7235 //Integer variable shift 7236 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 7237 predicate(n->as_ShiftV()->is_var_shift()); 7238 match(Set dst ( LShiftVI src shift)); 7239 match(Set dst ( RShiftVI src shift)); 7240 match(Set dst (URShiftVI src shift)); 7241 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 7242 ins_encode %{ 7243 assert(UseAVX >= 2, "required"); 7244 7245 int opcode = this->ideal_Opcode(); 7246 int vlen_enc = vector_length_encoding(this); 7247 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7248 %} 7249 ins_pipe( pipe_slow ); 7250 %} 7251 7252 //Long variable shift 7253 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 7254 predicate(n->as_ShiftV()->is_var_shift()); 7255 match(Set dst ( LShiftVL src shift)); 7256 match(Set dst (URShiftVL src shift)); 7257 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 7258 ins_encode %{ 7259 assert(UseAVX >= 2, "required"); 7260 7261 int opcode = this->ideal_Opcode(); 7262 int vlen_enc = vector_length_encoding(this); 7263 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7264 %} 7265 ins_pipe( pipe_slow ); 7266 %} 7267 7268 //Long variable right shift arithmetic 7269 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 7270 predicate(Matcher::vector_length(n) <= 4 && 7271 n->as_ShiftV()->is_var_shift() && 7272 UseAVX == 2); 7273 match(Set dst (RShiftVL src shift)); 7274 effect(TEMP dst, TEMP vtmp); 7275 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 7276 ins_encode %{ 7277 int opcode = this->ideal_Opcode(); 7278 int vlen_enc = vector_length_encoding(this); 7279 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 7280 $vtmp$$XMMRegister); 7281 %} 7282 ins_pipe( pipe_slow ); 7283 %} 7284 7285 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 7286 predicate(n->as_ShiftV()->is_var_shift() && 7287 UseAVX > 2); 7288 match(Set dst (RShiftVL src shift)); 7289 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 7290 ins_encode %{ 7291 int opcode = this->ideal_Opcode(); 7292 int vlen_enc = vector_length_encoding(this); 7293 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7294 %} 7295 ins_pipe( pipe_slow ); 7296 %} 7297 7298 // --------------------------------- AND -------------------------------------- 7299 7300 instruct vand(vec dst, vec src) %{ 7301 predicate(UseAVX == 0); 7302 match(Set dst (AndV dst src)); 7303 format %{ "pand $dst,$src\t! and vectors" %} 7304 ins_encode %{ 7305 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7306 %} 7307 ins_pipe( pipe_slow ); 7308 %} 7309 7310 instruct vand_reg(vec dst, vec src1, vec src2) %{ 7311 predicate(UseAVX > 0); 7312 match(Set dst (AndV src1 src2)); 7313 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 7314 ins_encode %{ 7315 int vlen_enc = vector_length_encoding(this); 7316 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7317 %} 7318 ins_pipe( pipe_slow ); 7319 %} 7320 7321 instruct vand_mem(vec dst, vec src, memory mem) %{ 7322 predicate((UseAVX > 0) && 7323 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7324 match(Set dst (AndV src (LoadVector mem))); 7325 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 7326 ins_encode %{ 7327 int vlen_enc = vector_length_encoding(this); 7328 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7329 %} 7330 ins_pipe( pipe_slow ); 7331 %} 7332 7333 // --------------------------------- OR --------------------------------------- 7334 7335 instruct vor(vec dst, vec src) %{ 7336 predicate(UseAVX == 0); 7337 match(Set dst (OrV dst src)); 7338 format %{ "por $dst,$src\t! or vectors" %} 7339 ins_encode %{ 7340 __ por($dst$$XMMRegister, $src$$XMMRegister); 7341 %} 7342 ins_pipe( pipe_slow ); 7343 %} 7344 7345 instruct vor_reg(vec dst, vec src1, vec src2) %{ 7346 predicate(UseAVX > 0); 7347 match(Set dst (OrV src1 src2)); 7348 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7349 ins_encode %{ 7350 int vlen_enc = vector_length_encoding(this); 7351 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7352 %} 7353 ins_pipe( pipe_slow ); 7354 %} 7355 7356 instruct vor_mem(vec dst, vec src, memory mem) %{ 7357 predicate((UseAVX > 0) && 7358 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7359 match(Set dst (OrV src (LoadVector mem))); 7360 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7361 ins_encode %{ 7362 int vlen_enc = vector_length_encoding(this); 7363 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7364 %} 7365 ins_pipe( pipe_slow ); 7366 %} 7367 7368 // --------------------------------- XOR -------------------------------------- 7369 7370 instruct vxor(vec dst, vec src) %{ 7371 predicate(UseAVX == 0); 7372 match(Set dst (XorV dst src)); 7373 format %{ "pxor $dst,$src\t! xor vectors" %} 7374 ins_encode %{ 7375 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7376 %} 7377 ins_pipe( pipe_slow ); 7378 %} 7379 7380 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7381 predicate(UseAVX > 0); 7382 match(Set dst (XorV src1 src2)); 7383 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7384 ins_encode %{ 7385 int vlen_enc = vector_length_encoding(this); 7386 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7387 %} 7388 ins_pipe( pipe_slow ); 7389 %} 7390 7391 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7392 predicate((UseAVX > 0) && 7393 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7394 match(Set dst (XorV src (LoadVector mem))); 7395 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7396 ins_encode %{ 7397 int vlen_enc = vector_length_encoding(this); 7398 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7399 %} 7400 ins_pipe( pipe_slow ); 7401 %} 7402 7403 // --------------------------------- VectorCast -------------------------------------- 7404 7405 instruct vcastBtoX(vec dst, vec src) %{ 7406 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE); 7407 match(Set dst (VectorCastB2X src)); 7408 format %{ "vector_cast_b2x $dst,$src\t!" %} 7409 ins_encode %{ 7410 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7411 int vlen_enc = vector_length_encoding(this); 7412 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7413 %} 7414 ins_pipe( pipe_slow ); 7415 %} 7416 7417 instruct vcastBtoD(legVec dst, legVec src) %{ 7418 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE); 7419 match(Set dst (VectorCastB2X src)); 7420 format %{ "vector_cast_b2x $dst,$src\t!" %} 7421 ins_encode %{ 7422 int vlen_enc = vector_length_encoding(this); 7423 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7424 %} 7425 ins_pipe( pipe_slow ); 7426 %} 7427 7428 instruct castStoX(vec dst, vec src) %{ 7429 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7430 Matcher::vector_length(n->in(1)) <= 8 && // src 7431 Matcher::vector_element_basic_type(n) == T_BYTE); 7432 match(Set dst (VectorCastS2X src)); 7433 format %{ "vector_cast_s2x $dst,$src" %} 7434 ins_encode %{ 7435 assert(UseAVX > 0, "required"); 7436 7437 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7438 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7439 %} 7440 ins_pipe( pipe_slow ); 7441 %} 7442 7443 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7444 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7445 Matcher::vector_length(n->in(1)) == 16 && // src 7446 Matcher::vector_element_basic_type(n) == T_BYTE); 7447 effect(TEMP dst, TEMP vtmp); 7448 match(Set dst (VectorCastS2X src)); 7449 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7450 ins_encode %{ 7451 assert(UseAVX > 0, "required"); 7452 7453 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7454 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7455 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7456 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7457 %} 7458 ins_pipe( pipe_slow ); 7459 %} 7460 7461 instruct vcastStoX_evex(vec dst, vec src) %{ 7462 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7463 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7464 match(Set dst (VectorCastS2X src)); 7465 format %{ "vector_cast_s2x $dst,$src\t!" %} 7466 ins_encode %{ 7467 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7468 int src_vlen_enc = vector_length_encoding(this, $src); 7469 int vlen_enc = vector_length_encoding(this); 7470 switch (to_elem_bt) { 7471 case T_BYTE: 7472 if (!VM_Version::supports_avx512vl()) { 7473 vlen_enc = Assembler::AVX_512bit; 7474 } 7475 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7476 break; 7477 case T_INT: 7478 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7479 break; 7480 case T_FLOAT: 7481 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7482 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7483 break; 7484 case T_LONG: 7485 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7486 break; 7487 case T_DOUBLE: { 7488 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7489 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7490 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7491 break; 7492 } 7493 default: 7494 ShouldNotReachHere(); 7495 } 7496 %} 7497 ins_pipe( pipe_slow ); 7498 %} 7499 7500 instruct castItoX(vec dst, vec src) %{ 7501 predicate(UseAVX <= 2 && 7502 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7503 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7504 match(Set dst (VectorCastI2X src)); 7505 format %{ "vector_cast_i2x $dst,$src" %} 7506 ins_encode %{ 7507 assert(UseAVX > 0, "required"); 7508 7509 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7510 int vlen_enc = vector_length_encoding(this, $src); 7511 7512 if (to_elem_bt == T_BYTE) { 7513 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7514 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7515 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7516 } else { 7517 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7518 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7519 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7520 } 7521 %} 7522 ins_pipe( pipe_slow ); 7523 %} 7524 7525 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7526 predicate(UseAVX <= 2 && 7527 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7528 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7529 match(Set dst (VectorCastI2X src)); 7530 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7531 effect(TEMP dst, TEMP vtmp); 7532 ins_encode %{ 7533 assert(UseAVX > 0, "required"); 7534 7535 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7536 int vlen_enc = vector_length_encoding(this, $src); 7537 7538 if (to_elem_bt == T_BYTE) { 7539 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7540 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7541 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7542 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7543 } else { 7544 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7545 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7546 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7547 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7548 } 7549 %} 7550 ins_pipe( pipe_slow ); 7551 %} 7552 7553 instruct vcastItoX_evex(vec dst, vec src) %{ 7554 predicate(UseAVX > 2 || 7555 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7556 match(Set dst (VectorCastI2X src)); 7557 format %{ "vector_cast_i2x $dst,$src\t!" %} 7558 ins_encode %{ 7559 assert(UseAVX > 0, "required"); 7560 7561 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7562 int src_vlen_enc = vector_length_encoding(this, $src); 7563 int dst_vlen_enc = vector_length_encoding(this); 7564 switch (dst_elem_bt) { 7565 case T_BYTE: 7566 if (!VM_Version::supports_avx512vl()) { 7567 src_vlen_enc = Assembler::AVX_512bit; 7568 } 7569 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7570 break; 7571 case T_SHORT: 7572 if (!VM_Version::supports_avx512vl()) { 7573 src_vlen_enc = Assembler::AVX_512bit; 7574 } 7575 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7576 break; 7577 case T_FLOAT: 7578 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7579 break; 7580 case T_LONG: 7581 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7582 break; 7583 case T_DOUBLE: 7584 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7585 break; 7586 default: 7587 ShouldNotReachHere(); 7588 } 7589 %} 7590 ins_pipe( pipe_slow ); 7591 %} 7592 7593 instruct vcastLtoBS(vec dst, vec src) %{ 7594 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7595 UseAVX <= 2); 7596 match(Set dst (VectorCastL2X src)); 7597 format %{ "vector_cast_l2x $dst,$src" %} 7598 ins_encode %{ 7599 assert(UseAVX > 0, "required"); 7600 7601 int vlen = Matcher::vector_length_in_bytes(this, $src); 7602 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7603 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7604 : ExternalAddress(vector_int_to_short_mask()); 7605 if (vlen <= 16) { 7606 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7607 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7608 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7609 } else { 7610 assert(vlen <= 32, "required"); 7611 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7612 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7613 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7614 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7615 } 7616 if (to_elem_bt == T_BYTE) { 7617 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7618 } 7619 %} 7620 ins_pipe( pipe_slow ); 7621 %} 7622 7623 instruct vcastLtoX_evex(vec dst, vec src) %{ 7624 predicate(UseAVX > 2 || 7625 (Matcher::vector_element_basic_type(n) == T_INT || 7626 Matcher::vector_element_basic_type(n) == T_FLOAT || 7627 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7628 match(Set dst (VectorCastL2X src)); 7629 format %{ "vector_cast_l2x $dst,$src\t!" %} 7630 ins_encode %{ 7631 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7632 int vlen = Matcher::vector_length_in_bytes(this, $src); 7633 int vlen_enc = vector_length_encoding(this, $src); 7634 switch (to_elem_bt) { 7635 case T_BYTE: 7636 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7637 vlen_enc = Assembler::AVX_512bit; 7638 } 7639 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7640 break; 7641 case T_SHORT: 7642 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7643 vlen_enc = Assembler::AVX_512bit; 7644 } 7645 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7646 break; 7647 case T_INT: 7648 if (vlen == 8) { 7649 if ($dst$$XMMRegister != $src$$XMMRegister) { 7650 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7651 } 7652 } else if (vlen == 16) { 7653 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7654 } else if (vlen == 32) { 7655 if (UseAVX > 2) { 7656 if (!VM_Version::supports_avx512vl()) { 7657 vlen_enc = Assembler::AVX_512bit; 7658 } 7659 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7660 } else { 7661 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7662 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7663 } 7664 } else { // vlen == 64 7665 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7666 } 7667 break; 7668 case T_FLOAT: 7669 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7670 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7671 break; 7672 case T_DOUBLE: 7673 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7674 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7675 break; 7676 7677 default: assert(false, "%s", type2name(to_elem_bt)); 7678 } 7679 %} 7680 ins_pipe( pipe_slow ); 7681 %} 7682 7683 instruct vcastFtoD_reg(vec dst, vec src) %{ 7684 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7685 match(Set dst (VectorCastF2X src)); 7686 format %{ "vector_cast_f2d $dst,$src\t!" %} 7687 ins_encode %{ 7688 int vlen_enc = vector_length_encoding(this); 7689 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7690 %} 7691 ins_pipe( pipe_slow ); 7692 %} 7693 7694 7695 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7696 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7697 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7698 match(Set dst (VectorCastF2X src)); 7699 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7700 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7701 ins_encode %{ 7702 int vlen_enc = vector_length_encoding(this, $src); 7703 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7704 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7705 // 32 bit addresses for register indirect addressing mode since stub constants 7706 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7707 // However, targets are free to increase this limit, but having a large code cache size 7708 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7709 // cap we save a temporary register allocation which in limiting case can prevent 7710 // spilling in high register pressure blocks. 7711 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7712 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7713 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7714 %} 7715 ins_pipe( pipe_slow ); 7716 %} 7717 7718 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7719 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7720 is_integral_type(Matcher::vector_element_basic_type(n))); 7721 match(Set dst (VectorCastF2X src)); 7722 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7723 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7724 ins_encode %{ 7725 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7726 if (to_elem_bt == T_LONG) { 7727 int vlen_enc = vector_length_encoding(this); 7728 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7729 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7730 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7731 } else { 7732 int vlen_enc = vector_length_encoding(this, $src); 7733 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7734 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7735 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7736 } 7737 %} 7738 ins_pipe( pipe_slow ); 7739 %} 7740 7741 instruct vcastDtoF_reg(vec dst, vec src) %{ 7742 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7743 match(Set dst (VectorCastD2X src)); 7744 format %{ "vector_cast_d2x $dst,$src\t!" %} 7745 ins_encode %{ 7746 int vlen_enc = vector_length_encoding(this, $src); 7747 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7748 %} 7749 ins_pipe( pipe_slow ); 7750 %} 7751 7752 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7753 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7754 is_integral_type(Matcher::vector_element_basic_type(n))); 7755 match(Set dst (VectorCastD2X src)); 7756 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7757 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7758 ins_encode %{ 7759 int vlen_enc = vector_length_encoding(this, $src); 7760 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7761 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7762 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7763 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7764 %} 7765 ins_pipe( pipe_slow ); 7766 %} 7767 7768 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7769 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7770 is_integral_type(Matcher::vector_element_basic_type(n))); 7771 match(Set dst (VectorCastD2X src)); 7772 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7773 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7774 ins_encode %{ 7775 int vlen_enc = vector_length_encoding(this, $src); 7776 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7777 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7778 ExternalAddress(vector_float_signflip()); 7779 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7780 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7781 %} 7782 ins_pipe( pipe_slow ); 7783 %} 7784 7785 instruct vucast(vec dst, vec src) %{ 7786 match(Set dst (VectorUCastB2X src)); 7787 match(Set dst (VectorUCastS2X src)); 7788 match(Set dst (VectorUCastI2X src)); 7789 format %{ "vector_ucast $dst,$src\t!" %} 7790 ins_encode %{ 7791 assert(UseAVX > 0, "required"); 7792 7793 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7794 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7795 int vlen_enc = vector_length_encoding(this); 7796 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7797 %} 7798 ins_pipe( pipe_slow ); 7799 %} 7800 7801 #ifdef _LP64 7802 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7803 predicate(!VM_Version::supports_avx512vl() && 7804 Matcher::vector_length_in_bytes(n) < 64 && 7805 Matcher::vector_element_basic_type(n) == T_INT); 7806 match(Set dst (RoundVF src)); 7807 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7808 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7809 ins_encode %{ 7810 int vlen_enc = vector_length_encoding(this); 7811 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7812 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7813 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7814 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7815 %} 7816 ins_pipe( pipe_slow ); 7817 %} 7818 7819 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7820 predicate((VM_Version::supports_avx512vl() || 7821 Matcher::vector_length_in_bytes(n) == 64) && 7822 Matcher::vector_element_basic_type(n) == T_INT); 7823 match(Set dst (RoundVF src)); 7824 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7825 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7826 ins_encode %{ 7827 int vlen_enc = vector_length_encoding(this); 7828 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7829 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7830 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7831 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7832 %} 7833 ins_pipe( pipe_slow ); 7834 %} 7835 7836 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7837 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7838 match(Set dst (RoundVD src)); 7839 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7840 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7841 ins_encode %{ 7842 int vlen_enc = vector_length_encoding(this); 7843 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7844 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 7845 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 7846 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7847 %} 7848 ins_pipe( pipe_slow ); 7849 %} 7850 7851 #endif // _LP64 7852 7853 // --------------------------------- VectorMaskCmp -------------------------------------- 7854 7855 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7856 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7857 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7858 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7859 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7860 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7861 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7862 ins_encode %{ 7863 int vlen_enc = vector_length_encoding(this, $src1); 7864 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7865 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7866 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7867 } else { 7868 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7869 } 7870 %} 7871 ins_pipe( pipe_slow ); 7872 %} 7873 7874 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7875 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7876 n->bottom_type()->isa_vectmask() == nullptr && 7877 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7878 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7879 effect(TEMP ktmp); 7880 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7881 ins_encode %{ 7882 int vlen_enc = Assembler::AVX_512bit; 7883 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7884 KRegister mask = k0; // The comparison itself is not being masked. 7885 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7886 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7887 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7888 } else { 7889 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7890 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7891 } 7892 %} 7893 ins_pipe( pipe_slow ); 7894 %} 7895 7896 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7897 predicate(n->bottom_type()->isa_vectmask() && 7898 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7899 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7900 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7901 ins_encode %{ 7902 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7903 int vlen_enc = vector_length_encoding(this, $src1); 7904 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7905 KRegister mask = k0; // The comparison itself is not being masked. 7906 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7907 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7908 } else { 7909 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7910 } 7911 %} 7912 ins_pipe( pipe_slow ); 7913 %} 7914 7915 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7916 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7917 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7918 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7919 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7920 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7921 (n->in(2)->get_int() == BoolTest::eq || 7922 n->in(2)->get_int() == BoolTest::lt || 7923 n->in(2)->get_int() == BoolTest::gt)); // cond 7924 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7925 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7926 ins_encode %{ 7927 int vlen_enc = vector_length_encoding(this, $src1); 7928 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7929 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7930 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 7931 %} 7932 ins_pipe( pipe_slow ); 7933 %} 7934 7935 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7936 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7937 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7938 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7939 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7940 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7941 (n->in(2)->get_int() == BoolTest::ne || 7942 n->in(2)->get_int() == BoolTest::le || 7943 n->in(2)->get_int() == BoolTest::ge)); // cond 7944 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7945 effect(TEMP dst, TEMP xtmp); 7946 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7947 ins_encode %{ 7948 int vlen_enc = vector_length_encoding(this, $src1); 7949 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7950 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7951 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7952 %} 7953 ins_pipe( pipe_slow ); 7954 %} 7955 7956 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7957 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7958 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7959 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7960 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7961 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7962 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7963 effect(TEMP dst, TEMP xtmp); 7964 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7965 ins_encode %{ 7966 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 7967 int vlen_enc = vector_length_encoding(this, $src1); 7968 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7969 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7970 7971 if (vlen_enc == Assembler::AVX_128bit) { 7972 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7973 } else { 7974 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7975 } 7976 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7977 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7978 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7979 %} 7980 ins_pipe( pipe_slow ); 7981 %} 7982 7983 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7984 predicate((n->bottom_type()->isa_vectmask() == nullptr && 7985 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 7986 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7987 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7988 effect(TEMP ktmp); 7989 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7990 ins_encode %{ 7991 assert(UseAVX > 2, "required"); 7992 7993 int vlen_enc = vector_length_encoding(this, $src1); 7994 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7995 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7996 KRegister mask = k0; // The comparison itself is not being masked. 7997 bool merge = false; 7998 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7999 8000 switch (src1_elem_bt) { 8001 case T_INT: { 8002 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8003 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8004 break; 8005 } 8006 case T_LONG: { 8007 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8008 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8009 break; 8010 } 8011 default: assert(false, "%s", type2name(src1_elem_bt)); 8012 } 8013 %} 8014 ins_pipe( pipe_slow ); 8015 %} 8016 8017 8018 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 8019 predicate(n->bottom_type()->isa_vectmask() && 8020 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8021 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8022 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 8023 ins_encode %{ 8024 assert(UseAVX > 2, "required"); 8025 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 8026 8027 int vlen_enc = vector_length_encoding(this, $src1); 8028 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8029 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8030 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8031 8032 // Comparison i 8033 switch (src1_elem_bt) { 8034 case T_BYTE: { 8035 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8036 break; 8037 } 8038 case T_SHORT: { 8039 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8040 break; 8041 } 8042 case T_INT: { 8043 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8044 break; 8045 } 8046 case T_LONG: { 8047 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8048 break; 8049 } 8050 default: assert(false, "%s", type2name(src1_elem_bt)); 8051 } 8052 %} 8053 ins_pipe( pipe_slow ); 8054 %} 8055 8056 // Extract 8057 8058 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 8059 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 8060 match(Set dst (ExtractI src idx)); 8061 match(Set dst (ExtractS src idx)); 8062 #ifdef _LP64 8063 match(Set dst (ExtractB src idx)); 8064 #endif 8065 format %{ "extractI $dst,$src,$idx\t!" %} 8066 ins_encode %{ 8067 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8068 8069 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8070 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8071 %} 8072 ins_pipe( pipe_slow ); 8073 %} 8074 8075 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 8076 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 8077 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 8078 match(Set dst (ExtractI src idx)); 8079 match(Set dst (ExtractS src idx)); 8080 #ifdef _LP64 8081 match(Set dst (ExtractB src idx)); 8082 #endif 8083 effect(TEMP vtmp); 8084 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 8085 ins_encode %{ 8086 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8087 8088 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8089 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8090 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 8091 %} 8092 ins_pipe( pipe_slow ); 8093 %} 8094 8095 #ifdef _LP64 8096 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 8097 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 8098 match(Set dst (ExtractL src idx)); 8099 format %{ "extractL $dst,$src,$idx\t!" %} 8100 ins_encode %{ 8101 assert(UseSSE >= 4, "required"); 8102 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8103 8104 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8105 %} 8106 ins_pipe( pipe_slow ); 8107 %} 8108 8109 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 8110 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8111 Matcher::vector_length(n->in(1)) == 8); // src 8112 match(Set dst (ExtractL src idx)); 8113 effect(TEMP vtmp); 8114 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 8115 ins_encode %{ 8116 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8117 8118 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8119 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 8120 %} 8121 ins_pipe( pipe_slow ); 8122 %} 8123 #endif 8124 8125 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8126 predicate(Matcher::vector_length(n->in(1)) <= 4); 8127 match(Set dst (ExtractF src idx)); 8128 effect(TEMP dst, TEMP vtmp); 8129 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8130 ins_encode %{ 8131 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8132 8133 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 8134 %} 8135 ins_pipe( pipe_slow ); 8136 %} 8137 8138 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8139 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 8140 Matcher::vector_length(n->in(1)/*src*/) == 16); 8141 match(Set dst (ExtractF src idx)); 8142 effect(TEMP vtmp); 8143 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8144 ins_encode %{ 8145 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8146 8147 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8148 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 8149 %} 8150 ins_pipe( pipe_slow ); 8151 %} 8152 8153 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 8154 predicate(Matcher::vector_length(n->in(1)) == 2); // src 8155 match(Set dst (ExtractD src idx)); 8156 format %{ "extractD $dst,$src,$idx\t!" %} 8157 ins_encode %{ 8158 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8159 8160 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8161 %} 8162 ins_pipe( pipe_slow ); 8163 %} 8164 8165 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 8166 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8167 Matcher::vector_length(n->in(1)) == 8); // src 8168 match(Set dst (ExtractD src idx)); 8169 effect(TEMP vtmp); 8170 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 8171 ins_encode %{ 8172 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8173 8174 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8175 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 8176 %} 8177 ins_pipe( pipe_slow ); 8178 %} 8179 8180 // --------------------------------- Vector Blend -------------------------------------- 8181 8182 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 8183 predicate(UseAVX == 0); 8184 match(Set dst (VectorBlend (Binary dst src) mask)); 8185 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 8186 effect(TEMP tmp); 8187 ins_encode %{ 8188 assert(UseSSE >= 4, "required"); 8189 8190 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 8191 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 8192 } 8193 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 8194 %} 8195 ins_pipe( pipe_slow ); 8196 %} 8197 8198 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8199 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8200 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8201 Matcher::vector_length_in_bytes(n) <= 32 && 8202 is_integral_type(Matcher::vector_element_basic_type(n))); 8203 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8204 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8205 ins_encode %{ 8206 int vlen_enc = vector_length_encoding(this); 8207 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8208 %} 8209 ins_pipe( pipe_slow ); 8210 %} 8211 8212 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8213 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8214 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8215 Matcher::vector_length_in_bytes(n) <= 32 && 8216 !is_integral_type(Matcher::vector_element_basic_type(n))); 8217 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8218 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8219 ins_encode %{ 8220 int vlen_enc = vector_length_encoding(this); 8221 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8222 %} 8223 ins_pipe( pipe_slow ); 8224 %} 8225 8226 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ 8227 predicate(UseAVX > 0 && EnableX86ECoreOpts && 8228 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8229 Matcher::vector_length_in_bytes(n) <= 32); 8230 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8231 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} 8232 effect(TEMP vtmp, TEMP dst); 8233 ins_encode %{ 8234 int vlen_enc = vector_length_encoding(this); 8235 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8236 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8237 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8238 %} 8239 ins_pipe( pipe_slow ); 8240 %} 8241 8242 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 8243 predicate(Matcher::vector_length_in_bytes(n) == 64 && 8244 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 8245 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8246 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8247 effect(TEMP ktmp); 8248 ins_encode %{ 8249 int vlen_enc = Assembler::AVX_512bit; 8250 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8251 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 8252 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8253 %} 8254 ins_pipe( pipe_slow ); 8255 %} 8256 8257 8258 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 8259 predicate(n->in(2)->bottom_type()->isa_vectmask() && 8260 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 8261 VM_Version::supports_avx512bw())); 8262 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8263 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8264 ins_encode %{ 8265 int vlen_enc = vector_length_encoding(this); 8266 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8267 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8268 %} 8269 ins_pipe( pipe_slow ); 8270 %} 8271 8272 // --------------------------------- ABS -------------------------------------- 8273 // a = |a| 8274 instruct vabsB_reg(vec dst, vec src) %{ 8275 match(Set dst (AbsVB src)); 8276 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 8277 ins_encode %{ 8278 uint vlen = Matcher::vector_length(this); 8279 if (vlen <= 16) { 8280 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8281 } else { 8282 int vlen_enc = vector_length_encoding(this); 8283 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8284 } 8285 %} 8286 ins_pipe( pipe_slow ); 8287 %} 8288 8289 instruct vabsS_reg(vec dst, vec src) %{ 8290 match(Set dst (AbsVS src)); 8291 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 8292 ins_encode %{ 8293 uint vlen = Matcher::vector_length(this); 8294 if (vlen <= 8) { 8295 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8296 } else { 8297 int vlen_enc = vector_length_encoding(this); 8298 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8299 } 8300 %} 8301 ins_pipe( pipe_slow ); 8302 %} 8303 8304 instruct vabsI_reg(vec dst, vec src) %{ 8305 match(Set dst (AbsVI src)); 8306 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 8307 ins_encode %{ 8308 uint vlen = Matcher::vector_length(this); 8309 if (vlen <= 4) { 8310 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8311 } else { 8312 int vlen_enc = vector_length_encoding(this); 8313 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8314 } 8315 %} 8316 ins_pipe( pipe_slow ); 8317 %} 8318 8319 instruct vabsL_reg(vec dst, vec src) %{ 8320 match(Set dst (AbsVL src)); 8321 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 8322 ins_encode %{ 8323 assert(UseAVX > 2, "required"); 8324 int vlen_enc = vector_length_encoding(this); 8325 if (!VM_Version::supports_avx512vl()) { 8326 vlen_enc = Assembler::AVX_512bit; 8327 } 8328 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8329 %} 8330 ins_pipe( pipe_slow ); 8331 %} 8332 8333 // --------------------------------- ABSNEG -------------------------------------- 8334 8335 instruct vabsnegF(vec dst, vec src) %{ 8336 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 8337 match(Set dst (AbsVF src)); 8338 match(Set dst (NegVF src)); 8339 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 8340 ins_cost(150); 8341 ins_encode %{ 8342 int opcode = this->ideal_Opcode(); 8343 int vlen = Matcher::vector_length(this); 8344 if (vlen == 2) { 8345 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8346 } else { 8347 assert(vlen == 8 || vlen == 16, "required"); 8348 int vlen_enc = vector_length_encoding(this); 8349 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8350 } 8351 %} 8352 ins_pipe( pipe_slow ); 8353 %} 8354 8355 instruct vabsneg4F(vec dst) %{ 8356 predicate(Matcher::vector_length(n) == 4); 8357 match(Set dst (AbsVF dst)); 8358 match(Set dst (NegVF dst)); 8359 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 8360 ins_cost(150); 8361 ins_encode %{ 8362 int opcode = this->ideal_Opcode(); 8363 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 8364 %} 8365 ins_pipe( pipe_slow ); 8366 %} 8367 8368 instruct vabsnegD(vec dst, vec src) %{ 8369 match(Set dst (AbsVD src)); 8370 match(Set dst (NegVD src)); 8371 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 8372 ins_encode %{ 8373 int opcode = this->ideal_Opcode(); 8374 uint vlen = Matcher::vector_length(this); 8375 if (vlen == 2) { 8376 assert(UseSSE >= 2, "required"); 8377 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8378 } else { 8379 int vlen_enc = vector_length_encoding(this); 8380 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8381 } 8382 %} 8383 ins_pipe( pipe_slow ); 8384 %} 8385 8386 //------------------------------------- VectorTest -------------------------------------------- 8387 8388 #ifdef _LP64 8389 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8390 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8391 match(Set cr (VectorTest src1 src2)); 8392 effect(TEMP vtmp); 8393 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8394 ins_encode %{ 8395 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8396 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8397 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8398 %} 8399 ins_pipe( pipe_slow ); 8400 %} 8401 8402 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8403 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8404 match(Set cr (VectorTest src1 src2)); 8405 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8406 ins_encode %{ 8407 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8408 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8409 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8410 %} 8411 ins_pipe( pipe_slow ); 8412 %} 8413 8414 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8415 predicate((Matcher::vector_length(n->in(1)) < 8 || 8416 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8417 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8418 match(Set cr (VectorTest src1 src2)); 8419 effect(TEMP tmp); 8420 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8421 ins_encode %{ 8422 uint masklen = Matcher::vector_length(this, $src1); 8423 __ kmovwl($tmp$$Register, $src1$$KRegister); 8424 __ andl($tmp$$Register, (1 << masklen) - 1); 8425 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8426 %} 8427 ins_pipe( pipe_slow ); 8428 %} 8429 8430 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8431 predicate((Matcher::vector_length(n->in(1)) < 8 || 8432 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8433 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8434 match(Set cr (VectorTest src1 src2)); 8435 effect(TEMP tmp); 8436 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8437 ins_encode %{ 8438 uint masklen = Matcher::vector_length(this, $src1); 8439 __ kmovwl($tmp$$Register, $src1$$KRegister); 8440 __ andl($tmp$$Register, (1 << masklen) - 1); 8441 %} 8442 ins_pipe( pipe_slow ); 8443 %} 8444 8445 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8446 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8447 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8448 match(Set cr (VectorTest src1 src2)); 8449 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8450 ins_encode %{ 8451 uint masklen = Matcher::vector_length(this, $src1); 8452 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8453 %} 8454 ins_pipe( pipe_slow ); 8455 %} 8456 #endif 8457 8458 //------------------------------------- LoadMask -------------------------------------------- 8459 8460 instruct loadMask(legVec dst, legVec src) %{ 8461 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8462 match(Set dst (VectorLoadMask src)); 8463 effect(TEMP dst); 8464 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8465 ins_encode %{ 8466 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8467 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8468 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8469 %} 8470 ins_pipe( pipe_slow ); 8471 %} 8472 8473 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8474 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8475 match(Set dst (VectorLoadMask src)); 8476 effect(TEMP xtmp); 8477 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8478 ins_encode %{ 8479 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8480 true, Assembler::AVX_512bit); 8481 %} 8482 ins_pipe( pipe_slow ); 8483 %} 8484 8485 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8486 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8487 match(Set dst (VectorLoadMask src)); 8488 effect(TEMP xtmp); 8489 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8490 ins_encode %{ 8491 int vlen_enc = vector_length_encoding(in(1)); 8492 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8493 false, vlen_enc); 8494 %} 8495 ins_pipe( pipe_slow ); 8496 %} 8497 8498 //------------------------------------- StoreMask -------------------------------------------- 8499 8500 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8501 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8502 match(Set dst (VectorStoreMask src size)); 8503 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8504 ins_encode %{ 8505 int vlen = Matcher::vector_length(this); 8506 if (vlen <= 16 && UseAVX <= 2) { 8507 assert(UseSSE >= 3, "required"); 8508 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8509 } else { 8510 assert(UseAVX > 0, "required"); 8511 int src_vlen_enc = vector_length_encoding(this, $src); 8512 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8513 } 8514 %} 8515 ins_pipe( pipe_slow ); 8516 %} 8517 8518 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8519 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8520 match(Set dst (VectorStoreMask src size)); 8521 effect(TEMP_DEF dst, TEMP xtmp); 8522 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8523 ins_encode %{ 8524 int vlen_enc = Assembler::AVX_128bit; 8525 int vlen = Matcher::vector_length(this); 8526 if (vlen <= 8) { 8527 assert(UseSSE >= 3, "required"); 8528 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8529 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8530 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8531 } else { 8532 assert(UseAVX > 0, "required"); 8533 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8534 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8535 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8536 } 8537 %} 8538 ins_pipe( pipe_slow ); 8539 %} 8540 8541 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8542 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8543 match(Set dst (VectorStoreMask src size)); 8544 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8545 effect(TEMP_DEF dst, TEMP xtmp); 8546 ins_encode %{ 8547 int vlen_enc = Assembler::AVX_128bit; 8548 int vlen = Matcher::vector_length(this); 8549 if (vlen <= 4) { 8550 assert(UseSSE >= 3, "required"); 8551 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8552 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8553 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8554 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8555 } else { 8556 assert(UseAVX > 0, "required"); 8557 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8558 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8559 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8560 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8561 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8562 } 8563 %} 8564 ins_pipe( pipe_slow ); 8565 %} 8566 8567 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8568 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8569 match(Set dst (VectorStoreMask src size)); 8570 effect(TEMP_DEF dst, TEMP xtmp); 8571 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8572 ins_encode %{ 8573 assert(UseSSE >= 3, "required"); 8574 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8575 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8576 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8577 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8578 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8579 %} 8580 ins_pipe( pipe_slow ); 8581 %} 8582 8583 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8584 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8585 match(Set dst (VectorStoreMask src size)); 8586 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8587 effect(TEMP_DEF dst, TEMP vtmp); 8588 ins_encode %{ 8589 int vlen_enc = Assembler::AVX_128bit; 8590 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8591 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8592 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8593 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8594 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8595 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8596 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8597 %} 8598 ins_pipe( pipe_slow ); 8599 %} 8600 8601 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8602 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8603 match(Set dst (VectorStoreMask src size)); 8604 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8605 ins_encode %{ 8606 int src_vlen_enc = vector_length_encoding(this, $src); 8607 int dst_vlen_enc = vector_length_encoding(this); 8608 if (!VM_Version::supports_avx512vl()) { 8609 src_vlen_enc = Assembler::AVX_512bit; 8610 } 8611 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8612 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8613 %} 8614 ins_pipe( pipe_slow ); 8615 %} 8616 8617 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8618 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8619 match(Set dst (VectorStoreMask src size)); 8620 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8621 ins_encode %{ 8622 int src_vlen_enc = vector_length_encoding(this, $src); 8623 int dst_vlen_enc = vector_length_encoding(this); 8624 if (!VM_Version::supports_avx512vl()) { 8625 src_vlen_enc = Assembler::AVX_512bit; 8626 } 8627 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8628 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8629 %} 8630 ins_pipe( pipe_slow ); 8631 %} 8632 8633 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8634 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8635 match(Set dst (VectorStoreMask mask size)); 8636 effect(TEMP_DEF dst); 8637 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8638 ins_encode %{ 8639 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8640 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8641 false, Assembler::AVX_512bit, noreg); 8642 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8643 %} 8644 ins_pipe( pipe_slow ); 8645 %} 8646 8647 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8648 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8649 match(Set dst (VectorStoreMask mask size)); 8650 effect(TEMP_DEF dst); 8651 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8652 ins_encode %{ 8653 int dst_vlen_enc = vector_length_encoding(this); 8654 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8655 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8656 %} 8657 ins_pipe( pipe_slow ); 8658 %} 8659 8660 instruct vmaskcast_evex(kReg dst) %{ 8661 match(Set dst (VectorMaskCast dst)); 8662 ins_cost(0); 8663 format %{ "vector_mask_cast $dst" %} 8664 ins_encode %{ 8665 // empty 8666 %} 8667 ins_pipe(empty); 8668 %} 8669 8670 instruct vmaskcast(vec dst) %{ 8671 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8672 match(Set dst (VectorMaskCast dst)); 8673 ins_cost(0); 8674 format %{ "vector_mask_cast $dst" %} 8675 ins_encode %{ 8676 // empty 8677 %} 8678 ins_pipe(empty); 8679 %} 8680 8681 instruct vmaskcast_avx(vec dst, vec src) %{ 8682 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8683 match(Set dst (VectorMaskCast src)); 8684 format %{ "vector_mask_cast $dst, $src" %} 8685 ins_encode %{ 8686 int vlen = Matcher::vector_length(this); 8687 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8688 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8689 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8690 %} 8691 ins_pipe(pipe_slow); 8692 %} 8693 8694 //-------------------------------- Load Iota Indices ---------------------------------- 8695 8696 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8697 match(Set dst (VectorLoadConst src)); 8698 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8699 ins_encode %{ 8700 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8701 BasicType bt = Matcher::vector_element_basic_type(this); 8702 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8703 %} 8704 ins_pipe( pipe_slow ); 8705 %} 8706 8707 #ifdef _LP64 8708 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8709 match(Set dst (PopulateIndex src1 src2)); 8710 effect(TEMP dst, TEMP vtmp); 8711 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8712 ins_encode %{ 8713 assert($src2$$constant == 1, "required"); 8714 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8715 int vlen_enc = vector_length_encoding(this); 8716 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8717 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8718 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8719 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8720 %} 8721 ins_pipe( pipe_slow ); 8722 %} 8723 8724 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8725 match(Set dst (PopulateIndex src1 src2)); 8726 effect(TEMP dst, TEMP vtmp); 8727 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8728 ins_encode %{ 8729 assert($src2$$constant == 1, "required"); 8730 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8731 int vlen_enc = vector_length_encoding(this); 8732 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8733 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8734 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8735 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8736 %} 8737 ins_pipe( pipe_slow ); 8738 %} 8739 #endif 8740 //-------------------------------- Rearrange ---------------------------------- 8741 8742 // LoadShuffle/Rearrange for Byte 8743 8744 instruct loadShuffleB(vec dst) %{ 8745 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 8746 match(Set dst (VectorLoadShuffle dst)); 8747 format %{ "vector_load_shuffle $dst, $dst" %} 8748 ins_encode %{ 8749 // empty 8750 %} 8751 ins_pipe( pipe_slow ); 8752 %} 8753 8754 instruct rearrangeB(vec dst, vec shuffle) %{ 8755 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8756 Matcher::vector_length(n) < 32); 8757 match(Set dst (VectorRearrange dst shuffle)); 8758 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8759 ins_encode %{ 8760 assert(UseSSE >= 4, "required"); 8761 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8762 %} 8763 ins_pipe( pipe_slow ); 8764 %} 8765 8766 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8767 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8768 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8769 match(Set dst (VectorRearrange src shuffle)); 8770 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8771 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8772 ins_encode %{ 8773 assert(UseAVX >= 2, "required"); 8774 // Swap src into vtmp1 8775 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8776 // Shuffle swapped src to get entries from other 128 bit lane 8777 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8778 // Shuffle original src to get entries from self 128 bit lane 8779 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8780 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8781 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8782 // Perform the blend 8783 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8784 %} 8785 ins_pipe( pipe_slow ); 8786 %} 8787 8788 8789 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8790 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8791 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8792 match(Set dst (VectorRearrange src shuffle)); 8793 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8794 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8795 ins_encode %{ 8796 int vlen_enc = vector_length_encoding(this); 8797 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8798 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8799 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8800 %} 8801 ins_pipe( pipe_slow ); 8802 %} 8803 8804 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8805 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8806 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8807 match(Set dst (VectorRearrange src shuffle)); 8808 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8809 ins_encode %{ 8810 int vlen_enc = vector_length_encoding(this); 8811 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8812 %} 8813 ins_pipe( pipe_slow ); 8814 %} 8815 8816 // LoadShuffle/Rearrange for Short 8817 8818 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8819 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8820 Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS 8821 match(Set dst (VectorLoadShuffle src)); 8822 effect(TEMP dst, TEMP vtmp); 8823 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8824 ins_encode %{ 8825 // Create a byte shuffle mask from short shuffle mask 8826 // only byte shuffle instruction available on these platforms 8827 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8828 if (UseAVX == 0) { 8829 assert(vlen_in_bytes <= 16, "required"); 8830 // Multiply each shuffle by two to get byte index 8831 __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); 8832 __ psllw($vtmp$$XMMRegister, 1); 8833 8834 // Duplicate to create 2 copies of byte index 8835 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8836 __ psllw($dst$$XMMRegister, 8); 8837 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8838 8839 // Add one to get alternate byte index 8840 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8841 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8842 } else { 8843 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8844 int vlen_enc = vector_length_encoding(this); 8845 // Multiply each shuffle by two to get byte index 8846 __ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8847 __ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8848 8849 // Duplicate to create 2 copies of byte index 8850 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8851 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8852 8853 // Add one to get alternate byte index 8854 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 8855 } 8856 %} 8857 ins_pipe( pipe_slow ); 8858 %} 8859 8860 instruct rearrangeS(vec dst, vec shuffle) %{ 8861 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8862 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8863 match(Set dst (VectorRearrange dst shuffle)); 8864 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8865 ins_encode %{ 8866 assert(UseSSE >= 4, "required"); 8867 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8868 %} 8869 ins_pipe( pipe_slow ); 8870 %} 8871 8872 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8873 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8874 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8875 match(Set dst (VectorRearrange src shuffle)); 8876 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8877 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8878 ins_encode %{ 8879 assert(UseAVX >= 2, "required"); 8880 // Swap src into vtmp1 8881 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8882 // Shuffle swapped src to get entries from other 128 bit lane 8883 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8884 // Shuffle original src to get entries from self 128 bit lane 8885 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8886 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8887 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8888 // Perform the blend 8889 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8890 %} 8891 ins_pipe( pipe_slow ); 8892 %} 8893 8894 instruct loadShuffleS_evex(vec dst, vec src) %{ 8895 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8896 VM_Version::supports_avx512bw()); 8897 match(Set dst (VectorLoadShuffle src)); 8898 format %{ "vector_load_shuffle $dst, $src" %} 8899 ins_encode %{ 8900 int vlen_enc = vector_length_encoding(this); 8901 if (!VM_Version::supports_avx512vl()) { 8902 vlen_enc = Assembler::AVX_512bit; 8903 } 8904 __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8905 %} 8906 ins_pipe( pipe_slow ); 8907 %} 8908 8909 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8910 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8911 VM_Version::supports_avx512bw()); 8912 match(Set dst (VectorRearrange src shuffle)); 8913 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8914 ins_encode %{ 8915 int vlen_enc = vector_length_encoding(this); 8916 if (!VM_Version::supports_avx512vl()) { 8917 vlen_enc = Assembler::AVX_512bit; 8918 } 8919 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8920 %} 8921 ins_pipe( pipe_slow ); 8922 %} 8923 8924 // LoadShuffle/Rearrange for Integer and Float 8925 8926 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 8927 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8928 Matcher::vector_length(n) == 4 && UseAVX == 0); 8929 match(Set dst (VectorLoadShuffle src)); 8930 effect(TEMP dst, TEMP vtmp); 8931 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8932 ins_encode %{ 8933 assert(UseSSE >= 4, "required"); 8934 8935 // Create a byte shuffle mask from int shuffle mask 8936 // only byte shuffle instruction available on these platforms 8937 8938 // Duplicate and multiply each shuffle by 4 8939 __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); 8940 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8941 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8942 __ psllw($vtmp$$XMMRegister, 2); 8943 8944 // Duplicate again to create 4 copies of byte index 8945 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8946 __ psllw($dst$$XMMRegister, 8); 8947 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 8948 8949 // Add 3,2,1,0 to get alternate byte index 8950 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 8951 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8952 %} 8953 ins_pipe( pipe_slow ); 8954 %} 8955 8956 instruct rearrangeI(vec dst, vec shuffle) %{ 8957 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8958 UseAVX == 0); 8959 match(Set dst (VectorRearrange dst shuffle)); 8960 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8961 ins_encode %{ 8962 assert(UseSSE >= 4, "required"); 8963 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8964 %} 8965 ins_pipe( pipe_slow ); 8966 %} 8967 8968 instruct loadShuffleI_avx(vec dst, vec src) %{ 8969 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8970 UseAVX > 0); 8971 match(Set dst (VectorLoadShuffle src)); 8972 format %{ "vector_load_shuffle $dst, $src" %} 8973 ins_encode %{ 8974 int vlen_enc = vector_length_encoding(this); 8975 __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8976 %} 8977 ins_pipe( pipe_slow ); 8978 %} 8979 8980 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 8981 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8982 UseAVX > 0); 8983 match(Set dst (VectorRearrange src shuffle)); 8984 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8985 ins_encode %{ 8986 int vlen_enc = vector_length_encoding(this); 8987 BasicType bt = Matcher::vector_element_basic_type(this); 8988 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8989 %} 8990 ins_pipe( pipe_slow ); 8991 %} 8992 8993 // LoadShuffle/Rearrange for Long and Double 8994 8995 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 8996 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8997 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8998 match(Set dst (VectorLoadShuffle src)); 8999 effect(TEMP dst, TEMP vtmp); 9000 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 9001 ins_encode %{ 9002 assert(UseAVX >= 2, "required"); 9003 9004 int vlen_enc = vector_length_encoding(this); 9005 // Create a double word shuffle mask from long shuffle mask 9006 // only double word shuffle instruction available on these platforms 9007 9008 // Multiply each shuffle by two to get double word index 9009 __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 9010 __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 9011 9012 // Duplicate each double word shuffle 9013 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 9014 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 9015 9016 // Add one to get alternate double word index 9017 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 9018 %} 9019 ins_pipe( pipe_slow ); 9020 %} 9021 9022 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 9023 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9024 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 9025 match(Set dst (VectorRearrange src shuffle)); 9026 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9027 ins_encode %{ 9028 assert(UseAVX >= 2, "required"); 9029 9030 int vlen_enc = vector_length_encoding(this); 9031 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9032 %} 9033 ins_pipe( pipe_slow ); 9034 %} 9035 9036 instruct loadShuffleL_evex(vec dst, vec src) %{ 9037 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9038 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 9039 match(Set dst (VectorLoadShuffle src)); 9040 format %{ "vector_load_shuffle $dst, $src" %} 9041 ins_encode %{ 9042 assert(UseAVX > 2, "required"); 9043 9044 int vlen_enc = vector_length_encoding(this); 9045 __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9046 %} 9047 ins_pipe( pipe_slow ); 9048 %} 9049 9050 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 9051 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9052 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 9053 match(Set dst (VectorRearrange src shuffle)); 9054 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9055 ins_encode %{ 9056 assert(UseAVX > 2, "required"); 9057 9058 int vlen_enc = vector_length_encoding(this); 9059 if (vlen_enc == Assembler::AVX_128bit) { 9060 vlen_enc = Assembler::AVX_256bit; 9061 } 9062 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9063 %} 9064 ins_pipe( pipe_slow ); 9065 %} 9066 9067 // --------------------------------- FMA -------------------------------------- 9068 // a * b + c 9069 9070 instruct vfmaF_reg(vec a, vec b, vec c) %{ 9071 match(Set c (FmaVF c (Binary a b))); 9072 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9073 ins_cost(150); 9074 ins_encode %{ 9075 assert(UseFMA, "not enabled"); 9076 int vlen_enc = vector_length_encoding(this); 9077 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9078 %} 9079 ins_pipe( pipe_slow ); 9080 %} 9081 9082 instruct vfmaF_mem(vec a, memory b, vec c) %{ 9083 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9084 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9085 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9086 ins_cost(150); 9087 ins_encode %{ 9088 assert(UseFMA, "not enabled"); 9089 int vlen_enc = vector_length_encoding(this); 9090 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9091 %} 9092 ins_pipe( pipe_slow ); 9093 %} 9094 9095 instruct vfmaD_reg(vec a, vec b, vec c) %{ 9096 match(Set c (FmaVD c (Binary a b))); 9097 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9098 ins_cost(150); 9099 ins_encode %{ 9100 assert(UseFMA, "not enabled"); 9101 int vlen_enc = vector_length_encoding(this); 9102 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9103 %} 9104 ins_pipe( pipe_slow ); 9105 %} 9106 9107 instruct vfmaD_mem(vec a, memory b, vec c) %{ 9108 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9109 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9110 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9111 ins_cost(150); 9112 ins_encode %{ 9113 assert(UseFMA, "not enabled"); 9114 int vlen_enc = vector_length_encoding(this); 9115 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9116 %} 9117 ins_pipe( pipe_slow ); 9118 %} 9119 9120 // --------------------------------- Vector Multiply Add -------------------------------------- 9121 9122 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 9123 predicate(UseAVX == 0); 9124 match(Set dst (MulAddVS2VI dst src1)); 9125 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 9126 ins_encode %{ 9127 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9128 %} 9129 ins_pipe( pipe_slow ); 9130 %} 9131 9132 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 9133 predicate(UseAVX > 0); 9134 match(Set dst (MulAddVS2VI src1 src2)); 9135 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 9136 ins_encode %{ 9137 int vlen_enc = vector_length_encoding(this); 9138 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9139 %} 9140 ins_pipe( pipe_slow ); 9141 %} 9142 9143 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9144 9145 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 9146 predicate(VM_Version::supports_avx512_vnni()); 9147 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9148 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 9149 ins_encode %{ 9150 assert(UseAVX > 2, "required"); 9151 int vlen_enc = vector_length_encoding(this); 9152 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9153 %} 9154 ins_pipe( pipe_slow ); 9155 ins_cost(10); 9156 %} 9157 9158 // --------------------------------- PopCount -------------------------------------- 9159 9160 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 9161 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9162 match(Set dst (PopCountVI src)); 9163 match(Set dst (PopCountVL src)); 9164 format %{ "vector_popcount_integral $dst, $src" %} 9165 ins_encode %{ 9166 int opcode = this->ideal_Opcode(); 9167 int vlen_enc = vector_length_encoding(this, $src); 9168 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9169 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 9170 %} 9171 ins_pipe( pipe_slow ); 9172 %} 9173 9174 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9175 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9176 match(Set dst (PopCountVI src mask)); 9177 match(Set dst (PopCountVL src mask)); 9178 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 9179 ins_encode %{ 9180 int vlen_enc = vector_length_encoding(this, $src); 9181 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9182 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9183 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 9184 %} 9185 ins_pipe( pipe_slow ); 9186 %} 9187 9188 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 9189 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9190 match(Set dst (PopCountVI src)); 9191 match(Set dst (PopCountVL src)); 9192 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9193 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 9194 ins_encode %{ 9195 int opcode = this->ideal_Opcode(); 9196 int vlen_enc = vector_length_encoding(this, $src); 9197 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9198 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9199 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 9200 %} 9201 ins_pipe( pipe_slow ); 9202 %} 9203 9204 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 9205 9206 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 9207 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9208 Matcher::vector_length_in_bytes(n->in(1)))); 9209 match(Set dst (CountTrailingZerosV src)); 9210 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 9211 ins_cost(400); 9212 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 9213 ins_encode %{ 9214 int vlen_enc = vector_length_encoding(this, $src); 9215 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9216 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9217 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9218 %} 9219 ins_pipe( pipe_slow ); 9220 %} 9221 9222 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9223 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9224 VM_Version::supports_avx512cd() && 9225 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9226 match(Set dst (CountTrailingZerosV src)); 9227 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9228 ins_cost(400); 9229 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 9230 ins_encode %{ 9231 int vlen_enc = vector_length_encoding(this, $src); 9232 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9233 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9234 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9235 %} 9236 ins_pipe( pipe_slow ); 9237 %} 9238 9239 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 9240 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9241 match(Set dst (CountTrailingZerosV src)); 9242 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 9243 ins_cost(400); 9244 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 9245 ins_encode %{ 9246 int vlen_enc = vector_length_encoding(this, $src); 9247 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9248 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9249 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 9250 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 9251 %} 9252 ins_pipe( pipe_slow ); 9253 %} 9254 9255 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9256 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9257 match(Set dst (CountTrailingZerosV src)); 9258 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9259 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9260 ins_encode %{ 9261 int vlen_enc = vector_length_encoding(this, $src); 9262 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9263 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9264 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9265 %} 9266 ins_pipe( pipe_slow ); 9267 %} 9268 9269 9270 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 9271 9272 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 9273 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 9274 effect(TEMP dst); 9275 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9276 ins_encode %{ 9277 int vector_len = vector_length_encoding(this); 9278 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 9279 %} 9280 ins_pipe( pipe_slow ); 9281 %} 9282 9283 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 9284 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 9285 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 9286 effect(TEMP dst); 9287 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9288 ins_encode %{ 9289 int vector_len = vector_length_encoding(this); 9290 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 9291 %} 9292 ins_pipe( pipe_slow ); 9293 %} 9294 9295 // --------------------------------- Rotation Operations ---------------------------------- 9296 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 9297 match(Set dst (RotateLeftV src shift)); 9298 match(Set dst (RotateRightV src shift)); 9299 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 9300 ins_encode %{ 9301 int opcode = this->ideal_Opcode(); 9302 int vector_len = vector_length_encoding(this); 9303 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9304 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 9305 %} 9306 ins_pipe( pipe_slow ); 9307 %} 9308 9309 instruct vprorate(vec dst, vec src, vec shift) %{ 9310 match(Set dst (RotateLeftV src shift)); 9311 match(Set dst (RotateRightV src shift)); 9312 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 9313 ins_encode %{ 9314 int opcode = this->ideal_Opcode(); 9315 int vector_len = vector_length_encoding(this); 9316 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9317 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9318 %} 9319 ins_pipe( pipe_slow ); 9320 %} 9321 9322 // ---------------------------------- Masked Operations ------------------------------------ 9323 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 9324 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 9325 match(Set dst (LoadVectorMasked mem mask)); 9326 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9327 ins_encode %{ 9328 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9329 int vlen_enc = vector_length_encoding(this); 9330 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 9331 %} 9332 ins_pipe( pipe_slow ); 9333 %} 9334 9335 9336 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 9337 predicate(n->in(3)->bottom_type()->isa_vectmask()); 9338 match(Set dst (LoadVectorMasked mem mask)); 9339 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9340 ins_encode %{ 9341 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9342 int vector_len = vector_length_encoding(this); 9343 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 9344 %} 9345 ins_pipe( pipe_slow ); 9346 %} 9347 9348 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 9349 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9350 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9351 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9352 ins_encode %{ 9353 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9354 int vlen_enc = vector_length_encoding(src_node); 9355 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9356 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 9357 %} 9358 ins_pipe( pipe_slow ); 9359 %} 9360 9361 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 9362 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9363 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9364 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9365 ins_encode %{ 9366 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9367 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9368 int vlen_enc = vector_length_encoding(src_node); 9369 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 9370 %} 9371 ins_pipe( pipe_slow ); 9372 %} 9373 9374 #ifdef _LP64 9375 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{ 9376 match(Set addr (VerifyVectorAlignment addr mask)); 9377 effect(KILL cr); 9378 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %} 9379 ins_encode %{ 9380 Label Lskip; 9381 // check if masked bits of addr are zero 9382 __ testq($addr$$Register, $mask$$constant); 9383 __ jccb(Assembler::equal, Lskip); 9384 __ stop("verify_vector_alignment found a misaligned vector memory access"); 9385 __ bind(Lskip); 9386 %} 9387 ins_pipe(pipe_slow); 9388 %} 9389 9390 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9391 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9392 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9393 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9394 ins_encode %{ 9395 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9396 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9397 9398 Label DONE; 9399 int vlen_enc = vector_length_encoding(this, $src1); 9400 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9401 9402 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9403 __ mov64($dst$$Register, -1L); 9404 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9405 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9406 __ jccb(Assembler::carrySet, DONE); 9407 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9408 __ notq($dst$$Register); 9409 __ tzcntq($dst$$Register, $dst$$Register); 9410 __ bind(DONE); 9411 %} 9412 ins_pipe( pipe_slow ); 9413 %} 9414 9415 9416 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9417 match(Set dst (VectorMaskGen len)); 9418 effect(TEMP temp, KILL cr); 9419 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9420 ins_encode %{ 9421 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9422 %} 9423 ins_pipe( pipe_slow ); 9424 %} 9425 9426 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9427 match(Set dst (VectorMaskGen len)); 9428 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9429 effect(TEMP temp); 9430 ins_encode %{ 9431 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9432 __ kmovql($dst$$KRegister, $temp$$Register); 9433 %} 9434 ins_pipe( pipe_slow ); 9435 %} 9436 9437 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9438 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9439 match(Set dst (VectorMaskToLong mask)); 9440 effect(TEMP dst, KILL cr); 9441 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9442 ins_encode %{ 9443 int opcode = this->ideal_Opcode(); 9444 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9445 int mask_len = Matcher::vector_length(this, $mask); 9446 int mask_size = mask_len * type2aelembytes(mbt); 9447 int vlen_enc = vector_length_encoding(this, $mask); 9448 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9449 $dst$$Register, mask_len, mask_size, vlen_enc); 9450 %} 9451 ins_pipe( pipe_slow ); 9452 %} 9453 9454 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9455 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9456 match(Set dst (VectorMaskToLong mask)); 9457 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9458 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9459 ins_encode %{ 9460 int opcode = this->ideal_Opcode(); 9461 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9462 int mask_len = Matcher::vector_length(this, $mask); 9463 int vlen_enc = vector_length_encoding(this, $mask); 9464 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9465 $dst$$Register, mask_len, mbt, vlen_enc); 9466 %} 9467 ins_pipe( pipe_slow ); 9468 %} 9469 9470 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9471 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9472 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9473 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9474 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9475 ins_encode %{ 9476 int opcode = this->ideal_Opcode(); 9477 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9478 int mask_len = Matcher::vector_length(this, $mask); 9479 int vlen_enc = vector_length_encoding(this, $mask); 9480 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9481 $dst$$Register, mask_len, mbt, vlen_enc); 9482 %} 9483 ins_pipe( pipe_slow ); 9484 %} 9485 9486 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9487 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9488 match(Set dst (VectorMaskTrueCount mask)); 9489 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9490 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9491 ins_encode %{ 9492 int opcode = this->ideal_Opcode(); 9493 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9494 int mask_len = Matcher::vector_length(this, $mask); 9495 int mask_size = mask_len * type2aelembytes(mbt); 9496 int vlen_enc = vector_length_encoding(this, $mask); 9497 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9498 $tmp$$Register, mask_len, mask_size, vlen_enc); 9499 %} 9500 ins_pipe( pipe_slow ); 9501 %} 9502 9503 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9504 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9505 match(Set dst (VectorMaskTrueCount mask)); 9506 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9507 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9508 ins_encode %{ 9509 int opcode = this->ideal_Opcode(); 9510 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9511 int mask_len = Matcher::vector_length(this, $mask); 9512 int vlen_enc = vector_length_encoding(this, $mask); 9513 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9514 $tmp$$Register, mask_len, mbt, vlen_enc); 9515 %} 9516 ins_pipe( pipe_slow ); 9517 %} 9518 9519 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9520 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9521 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9522 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9523 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9524 ins_encode %{ 9525 int opcode = this->ideal_Opcode(); 9526 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9527 int mask_len = Matcher::vector_length(this, $mask); 9528 int vlen_enc = vector_length_encoding(this, $mask); 9529 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9530 $tmp$$Register, mask_len, mbt, vlen_enc); 9531 %} 9532 ins_pipe( pipe_slow ); 9533 %} 9534 9535 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9536 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9537 match(Set dst (VectorMaskFirstTrue mask)); 9538 match(Set dst (VectorMaskLastTrue mask)); 9539 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9540 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9541 ins_encode %{ 9542 int opcode = this->ideal_Opcode(); 9543 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9544 int mask_len = Matcher::vector_length(this, $mask); 9545 int mask_size = mask_len * type2aelembytes(mbt); 9546 int vlen_enc = vector_length_encoding(this, $mask); 9547 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9548 $tmp$$Register, mask_len, mask_size, vlen_enc); 9549 %} 9550 ins_pipe( pipe_slow ); 9551 %} 9552 9553 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9554 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9555 match(Set dst (VectorMaskFirstTrue mask)); 9556 match(Set dst (VectorMaskLastTrue mask)); 9557 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9558 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9559 ins_encode %{ 9560 int opcode = this->ideal_Opcode(); 9561 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9562 int mask_len = Matcher::vector_length(this, $mask); 9563 int vlen_enc = vector_length_encoding(this, $mask); 9564 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9565 $tmp$$Register, mask_len, mbt, vlen_enc); 9566 %} 9567 ins_pipe( pipe_slow ); 9568 %} 9569 9570 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9571 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9572 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9573 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9574 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9575 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9576 ins_encode %{ 9577 int opcode = this->ideal_Opcode(); 9578 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9579 int mask_len = Matcher::vector_length(this, $mask); 9580 int vlen_enc = vector_length_encoding(this, $mask); 9581 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9582 $tmp$$Register, mask_len, mbt, vlen_enc); 9583 %} 9584 ins_pipe( pipe_slow ); 9585 %} 9586 9587 // --------------------------------- Compress/Expand Operations --------------------------- 9588 #ifdef _LP64 9589 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{ 9590 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 9591 match(Set dst (CompressV src mask)); 9592 match(Set dst (ExpandV src mask)); 9593 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr); 9594 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %} 9595 ins_encode %{ 9596 int opcode = this->ideal_Opcode(); 9597 int vlen_enc = vector_length_encoding(this); 9598 BasicType bt = Matcher::vector_element_basic_type(this); 9599 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register, 9600 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc); 9601 %} 9602 ins_pipe( pipe_slow ); 9603 %} 9604 #endif 9605 9606 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9607 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 9608 match(Set dst (CompressV src mask)); 9609 match(Set dst (ExpandV src mask)); 9610 format %{ "vector_compress_expand $dst, $src, $mask" %} 9611 ins_encode %{ 9612 int opcode = this->ideal_Opcode(); 9613 int vector_len = vector_length_encoding(this); 9614 BasicType bt = Matcher::vector_element_basic_type(this); 9615 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9616 %} 9617 ins_pipe( pipe_slow ); 9618 %} 9619 9620 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9621 match(Set dst (CompressM mask)); 9622 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9623 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9624 ins_encode %{ 9625 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9626 int mask_len = Matcher::vector_length(this); 9627 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9628 %} 9629 ins_pipe( pipe_slow ); 9630 %} 9631 9632 #endif // _LP64 9633 9634 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9635 9636 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9637 predicate(!VM_Version::supports_gfni()); 9638 match(Set dst (ReverseV src)); 9639 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9640 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9641 ins_encode %{ 9642 int vec_enc = vector_length_encoding(this); 9643 BasicType bt = Matcher::vector_element_basic_type(this); 9644 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9645 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9646 %} 9647 ins_pipe( pipe_slow ); 9648 %} 9649 9650 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9651 predicate(VM_Version::supports_gfni()); 9652 match(Set dst (ReverseV src)); 9653 effect(TEMP dst, TEMP xtmp); 9654 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9655 ins_encode %{ 9656 int vec_enc = vector_length_encoding(this); 9657 BasicType bt = Matcher::vector_element_basic_type(this); 9658 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1)); 9659 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9660 $xtmp$$XMMRegister); 9661 %} 9662 ins_pipe( pipe_slow ); 9663 %} 9664 9665 instruct vreverse_byte_reg(vec dst, vec src) %{ 9666 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9667 match(Set dst (ReverseBytesV src)); 9668 effect(TEMP dst); 9669 format %{ "vector_reverse_byte $dst, $src" %} 9670 ins_encode %{ 9671 int vec_enc = vector_length_encoding(this); 9672 BasicType bt = Matcher::vector_element_basic_type(this); 9673 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9674 %} 9675 ins_pipe( pipe_slow ); 9676 %} 9677 9678 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9679 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9680 match(Set dst (ReverseBytesV src)); 9681 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9682 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9683 ins_encode %{ 9684 int vec_enc = vector_length_encoding(this); 9685 BasicType bt = Matcher::vector_element_basic_type(this); 9686 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9687 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9688 %} 9689 ins_pipe( pipe_slow ); 9690 %} 9691 9692 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9693 9694 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9695 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9696 Matcher::vector_length_in_bytes(n->in(1)))); 9697 match(Set dst (CountLeadingZerosV src)); 9698 format %{ "vector_count_leading_zeros $dst, $src" %} 9699 ins_encode %{ 9700 int vlen_enc = vector_length_encoding(this, $src); 9701 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9702 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9703 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9704 %} 9705 ins_pipe( pipe_slow ); 9706 %} 9707 9708 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9709 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9710 Matcher::vector_length_in_bytes(n->in(1)))); 9711 match(Set dst (CountLeadingZerosV src mask)); 9712 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9713 ins_encode %{ 9714 int vlen_enc = vector_length_encoding(this, $src); 9715 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9716 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9717 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9718 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9719 %} 9720 ins_pipe( pipe_slow ); 9721 %} 9722 9723 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9724 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9725 VM_Version::supports_avx512cd() && 9726 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9727 match(Set dst (CountLeadingZerosV src)); 9728 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9729 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9730 ins_encode %{ 9731 int vlen_enc = vector_length_encoding(this, $src); 9732 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9733 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9734 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9735 %} 9736 ins_pipe( pipe_slow ); 9737 %} 9738 9739 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9740 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9741 match(Set dst (CountLeadingZerosV src)); 9742 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9743 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9744 ins_encode %{ 9745 int vlen_enc = vector_length_encoding(this, $src); 9746 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9747 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9748 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9749 $rtmp$$Register, true, vlen_enc); 9750 %} 9751 ins_pipe( pipe_slow ); 9752 %} 9753 9754 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9755 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9756 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9757 match(Set dst (CountLeadingZerosV src)); 9758 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9759 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9760 ins_encode %{ 9761 int vlen_enc = vector_length_encoding(this, $src); 9762 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9763 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9764 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9765 %} 9766 ins_pipe( pipe_slow ); 9767 %} 9768 9769 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9770 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9771 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9772 match(Set dst (CountLeadingZerosV src)); 9773 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9774 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9775 ins_encode %{ 9776 int vlen_enc = vector_length_encoding(this, $src); 9777 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9778 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9779 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9780 %} 9781 ins_pipe( pipe_slow ); 9782 %} 9783 9784 // ---------------------------------- Vector Masked Operations ------------------------------------ 9785 9786 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9787 match(Set dst (AddVB (Binary dst src2) mask)); 9788 match(Set dst (AddVS (Binary dst src2) mask)); 9789 match(Set dst (AddVI (Binary dst src2) mask)); 9790 match(Set dst (AddVL (Binary dst src2) mask)); 9791 match(Set dst (AddVF (Binary dst src2) mask)); 9792 match(Set dst (AddVD (Binary dst src2) mask)); 9793 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9794 ins_encode %{ 9795 int vlen_enc = vector_length_encoding(this); 9796 BasicType bt = Matcher::vector_element_basic_type(this); 9797 int opc = this->ideal_Opcode(); 9798 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9799 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9800 %} 9801 ins_pipe( pipe_slow ); 9802 %} 9803 9804 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9805 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9806 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9807 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9808 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9809 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9810 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9811 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9812 ins_encode %{ 9813 int vlen_enc = vector_length_encoding(this); 9814 BasicType bt = Matcher::vector_element_basic_type(this); 9815 int opc = this->ideal_Opcode(); 9816 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9817 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9818 %} 9819 ins_pipe( pipe_slow ); 9820 %} 9821 9822 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9823 match(Set dst (XorV (Binary dst src2) mask)); 9824 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9825 ins_encode %{ 9826 int vlen_enc = vector_length_encoding(this); 9827 BasicType bt = Matcher::vector_element_basic_type(this); 9828 int opc = this->ideal_Opcode(); 9829 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9830 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9831 %} 9832 ins_pipe( pipe_slow ); 9833 %} 9834 9835 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9836 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9837 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9838 ins_encode %{ 9839 int vlen_enc = vector_length_encoding(this); 9840 BasicType bt = Matcher::vector_element_basic_type(this); 9841 int opc = this->ideal_Opcode(); 9842 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9843 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9844 %} 9845 ins_pipe( pipe_slow ); 9846 %} 9847 9848 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9849 match(Set dst (OrV (Binary dst src2) mask)); 9850 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9851 ins_encode %{ 9852 int vlen_enc = vector_length_encoding(this); 9853 BasicType bt = Matcher::vector_element_basic_type(this); 9854 int opc = this->ideal_Opcode(); 9855 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9856 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9857 %} 9858 ins_pipe( pipe_slow ); 9859 %} 9860 9861 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9862 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9863 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9864 ins_encode %{ 9865 int vlen_enc = vector_length_encoding(this); 9866 BasicType bt = Matcher::vector_element_basic_type(this); 9867 int opc = this->ideal_Opcode(); 9868 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9869 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9870 %} 9871 ins_pipe( pipe_slow ); 9872 %} 9873 9874 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9875 match(Set dst (AndV (Binary dst src2) mask)); 9876 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9877 ins_encode %{ 9878 int vlen_enc = vector_length_encoding(this); 9879 BasicType bt = Matcher::vector_element_basic_type(this); 9880 int opc = this->ideal_Opcode(); 9881 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9882 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9883 %} 9884 ins_pipe( pipe_slow ); 9885 %} 9886 9887 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9888 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9889 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9890 ins_encode %{ 9891 int vlen_enc = vector_length_encoding(this); 9892 BasicType bt = Matcher::vector_element_basic_type(this); 9893 int opc = this->ideal_Opcode(); 9894 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9895 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9896 %} 9897 ins_pipe( pipe_slow ); 9898 %} 9899 9900 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9901 match(Set dst (SubVB (Binary dst src2) mask)); 9902 match(Set dst (SubVS (Binary dst src2) mask)); 9903 match(Set dst (SubVI (Binary dst src2) mask)); 9904 match(Set dst (SubVL (Binary dst src2) mask)); 9905 match(Set dst (SubVF (Binary dst src2) mask)); 9906 match(Set dst (SubVD (Binary dst src2) mask)); 9907 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9908 ins_encode %{ 9909 int vlen_enc = vector_length_encoding(this); 9910 BasicType bt = Matcher::vector_element_basic_type(this); 9911 int opc = this->ideal_Opcode(); 9912 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9913 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9914 %} 9915 ins_pipe( pipe_slow ); 9916 %} 9917 9918 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9919 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9920 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9921 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9922 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9923 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9924 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9925 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9926 ins_encode %{ 9927 int vlen_enc = vector_length_encoding(this); 9928 BasicType bt = Matcher::vector_element_basic_type(this); 9929 int opc = this->ideal_Opcode(); 9930 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9931 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9932 %} 9933 ins_pipe( pipe_slow ); 9934 %} 9935 9936 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9937 match(Set dst (MulVS (Binary dst src2) mask)); 9938 match(Set dst (MulVI (Binary dst src2) mask)); 9939 match(Set dst (MulVL (Binary dst src2) mask)); 9940 match(Set dst (MulVF (Binary dst src2) mask)); 9941 match(Set dst (MulVD (Binary dst src2) mask)); 9942 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9943 ins_encode %{ 9944 int vlen_enc = vector_length_encoding(this); 9945 BasicType bt = Matcher::vector_element_basic_type(this); 9946 int opc = this->ideal_Opcode(); 9947 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9948 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9949 %} 9950 ins_pipe( pipe_slow ); 9951 %} 9952 9953 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 9954 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 9955 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 9956 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 9957 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 9958 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 9959 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9960 ins_encode %{ 9961 int vlen_enc = vector_length_encoding(this); 9962 BasicType bt = Matcher::vector_element_basic_type(this); 9963 int opc = this->ideal_Opcode(); 9964 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9965 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9966 %} 9967 ins_pipe( pipe_slow ); 9968 %} 9969 9970 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 9971 match(Set dst (SqrtVF dst mask)); 9972 match(Set dst (SqrtVD dst mask)); 9973 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 9974 ins_encode %{ 9975 int vlen_enc = vector_length_encoding(this); 9976 BasicType bt = Matcher::vector_element_basic_type(this); 9977 int opc = this->ideal_Opcode(); 9978 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9979 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9980 %} 9981 ins_pipe( pipe_slow ); 9982 %} 9983 9984 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 9985 match(Set dst (DivVF (Binary dst src2) mask)); 9986 match(Set dst (DivVD (Binary dst src2) mask)); 9987 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9988 ins_encode %{ 9989 int vlen_enc = vector_length_encoding(this); 9990 BasicType bt = Matcher::vector_element_basic_type(this); 9991 int opc = this->ideal_Opcode(); 9992 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9993 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9994 %} 9995 ins_pipe( pipe_slow ); 9996 %} 9997 9998 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 9999 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 10000 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 10001 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 10002 ins_encode %{ 10003 int vlen_enc = vector_length_encoding(this); 10004 BasicType bt = Matcher::vector_element_basic_type(this); 10005 int opc = this->ideal_Opcode(); 10006 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10007 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10008 %} 10009 ins_pipe( pipe_slow ); 10010 %} 10011 10012 10013 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10014 match(Set dst (RotateLeftV (Binary dst shift) mask)); 10015 match(Set dst (RotateRightV (Binary dst shift) mask)); 10016 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 10017 ins_encode %{ 10018 int vlen_enc = vector_length_encoding(this); 10019 BasicType bt = Matcher::vector_element_basic_type(this); 10020 int opc = this->ideal_Opcode(); 10021 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10022 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10023 %} 10024 ins_pipe( pipe_slow ); 10025 %} 10026 10027 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 10028 match(Set dst (RotateLeftV (Binary dst src2) mask)); 10029 match(Set dst (RotateRightV (Binary dst src2) mask)); 10030 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 10031 ins_encode %{ 10032 int vlen_enc = vector_length_encoding(this); 10033 BasicType bt = Matcher::vector_element_basic_type(this); 10034 int opc = this->ideal_Opcode(); 10035 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10036 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10037 %} 10038 ins_pipe( pipe_slow ); 10039 %} 10040 10041 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10042 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 10043 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 10044 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 10045 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 10046 ins_encode %{ 10047 int vlen_enc = vector_length_encoding(this); 10048 BasicType bt = Matcher::vector_element_basic_type(this); 10049 int opc = this->ideal_Opcode(); 10050 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10051 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10052 %} 10053 ins_pipe( pipe_slow ); 10054 %} 10055 10056 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10057 predicate(!n->as_ShiftV()->is_var_shift()); 10058 match(Set dst (LShiftVS (Binary dst src2) mask)); 10059 match(Set dst (LShiftVI (Binary dst src2) mask)); 10060 match(Set dst (LShiftVL (Binary dst src2) mask)); 10061 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10062 ins_encode %{ 10063 int vlen_enc = vector_length_encoding(this); 10064 BasicType bt = Matcher::vector_element_basic_type(this); 10065 int opc = this->ideal_Opcode(); 10066 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10067 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10068 %} 10069 ins_pipe( pipe_slow ); 10070 %} 10071 10072 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10073 predicate(n->as_ShiftV()->is_var_shift()); 10074 match(Set dst (LShiftVS (Binary dst src2) mask)); 10075 match(Set dst (LShiftVI (Binary dst src2) mask)); 10076 match(Set dst (LShiftVL (Binary dst src2) mask)); 10077 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10078 ins_encode %{ 10079 int vlen_enc = vector_length_encoding(this); 10080 BasicType bt = Matcher::vector_element_basic_type(this); 10081 int opc = this->ideal_Opcode(); 10082 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10083 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10084 %} 10085 ins_pipe( pipe_slow ); 10086 %} 10087 10088 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10089 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 10090 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 10091 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 10092 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 10093 ins_encode %{ 10094 int vlen_enc = vector_length_encoding(this); 10095 BasicType bt = Matcher::vector_element_basic_type(this); 10096 int opc = this->ideal_Opcode(); 10097 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10098 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10099 %} 10100 ins_pipe( pipe_slow ); 10101 %} 10102 10103 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10104 predicate(!n->as_ShiftV()->is_var_shift()); 10105 match(Set dst (RShiftVS (Binary dst src2) mask)); 10106 match(Set dst (RShiftVI (Binary dst src2) mask)); 10107 match(Set dst (RShiftVL (Binary dst src2) mask)); 10108 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10109 ins_encode %{ 10110 int vlen_enc = vector_length_encoding(this); 10111 BasicType bt = Matcher::vector_element_basic_type(this); 10112 int opc = this->ideal_Opcode(); 10113 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10114 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10115 %} 10116 ins_pipe( pipe_slow ); 10117 %} 10118 10119 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10120 predicate(n->as_ShiftV()->is_var_shift()); 10121 match(Set dst (RShiftVS (Binary dst src2) mask)); 10122 match(Set dst (RShiftVI (Binary dst src2) mask)); 10123 match(Set dst (RShiftVL (Binary dst src2) mask)); 10124 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10125 ins_encode %{ 10126 int vlen_enc = vector_length_encoding(this); 10127 BasicType bt = Matcher::vector_element_basic_type(this); 10128 int opc = this->ideal_Opcode(); 10129 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10130 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10131 %} 10132 ins_pipe( pipe_slow ); 10133 %} 10134 10135 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10136 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 10137 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 10138 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 10139 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 10140 ins_encode %{ 10141 int vlen_enc = vector_length_encoding(this); 10142 BasicType bt = Matcher::vector_element_basic_type(this); 10143 int opc = this->ideal_Opcode(); 10144 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10145 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10146 %} 10147 ins_pipe( pipe_slow ); 10148 %} 10149 10150 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10151 predicate(!n->as_ShiftV()->is_var_shift()); 10152 match(Set dst (URShiftVS (Binary dst src2) mask)); 10153 match(Set dst (URShiftVI (Binary dst src2) mask)); 10154 match(Set dst (URShiftVL (Binary dst src2) mask)); 10155 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10156 ins_encode %{ 10157 int vlen_enc = vector_length_encoding(this); 10158 BasicType bt = Matcher::vector_element_basic_type(this); 10159 int opc = this->ideal_Opcode(); 10160 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10161 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10162 %} 10163 ins_pipe( pipe_slow ); 10164 %} 10165 10166 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10167 predicate(n->as_ShiftV()->is_var_shift()); 10168 match(Set dst (URShiftVS (Binary dst src2) mask)); 10169 match(Set dst (URShiftVI (Binary dst src2) mask)); 10170 match(Set dst (URShiftVL (Binary dst src2) mask)); 10171 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10172 ins_encode %{ 10173 int vlen_enc = vector_length_encoding(this); 10174 BasicType bt = Matcher::vector_element_basic_type(this); 10175 int opc = this->ideal_Opcode(); 10176 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10177 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10178 %} 10179 ins_pipe( pipe_slow ); 10180 %} 10181 10182 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 10183 match(Set dst (MaxV (Binary dst src2) mask)); 10184 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10185 ins_encode %{ 10186 int vlen_enc = vector_length_encoding(this); 10187 BasicType bt = Matcher::vector_element_basic_type(this); 10188 int opc = this->ideal_Opcode(); 10189 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10190 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10191 %} 10192 ins_pipe( pipe_slow ); 10193 %} 10194 10195 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 10196 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 10197 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10198 ins_encode %{ 10199 int vlen_enc = vector_length_encoding(this); 10200 BasicType bt = Matcher::vector_element_basic_type(this); 10201 int opc = this->ideal_Opcode(); 10202 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10203 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10204 %} 10205 ins_pipe( pipe_slow ); 10206 %} 10207 10208 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 10209 match(Set dst (MinV (Binary dst src2) mask)); 10210 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10211 ins_encode %{ 10212 int vlen_enc = vector_length_encoding(this); 10213 BasicType bt = Matcher::vector_element_basic_type(this); 10214 int opc = this->ideal_Opcode(); 10215 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10216 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10217 %} 10218 ins_pipe( pipe_slow ); 10219 %} 10220 10221 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 10222 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 10223 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10224 ins_encode %{ 10225 int vlen_enc = vector_length_encoding(this); 10226 BasicType bt = Matcher::vector_element_basic_type(this); 10227 int opc = this->ideal_Opcode(); 10228 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10229 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10230 %} 10231 ins_pipe( pipe_slow ); 10232 %} 10233 10234 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 10235 match(Set dst (VectorRearrange (Binary dst src2) mask)); 10236 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 10237 ins_encode %{ 10238 int vlen_enc = vector_length_encoding(this); 10239 BasicType bt = Matcher::vector_element_basic_type(this); 10240 int opc = this->ideal_Opcode(); 10241 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10242 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10243 %} 10244 ins_pipe( pipe_slow ); 10245 %} 10246 10247 instruct vabs_masked(vec dst, kReg mask) %{ 10248 match(Set dst (AbsVB dst mask)); 10249 match(Set dst (AbsVS dst mask)); 10250 match(Set dst (AbsVI dst mask)); 10251 match(Set dst (AbsVL dst mask)); 10252 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 10253 ins_encode %{ 10254 int vlen_enc = vector_length_encoding(this); 10255 BasicType bt = Matcher::vector_element_basic_type(this); 10256 int opc = this->ideal_Opcode(); 10257 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10258 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10259 %} 10260 ins_pipe( pipe_slow ); 10261 %} 10262 10263 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 10264 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 10265 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 10266 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10267 ins_encode %{ 10268 assert(UseFMA, "Needs FMA instructions support."); 10269 int vlen_enc = vector_length_encoding(this); 10270 BasicType bt = Matcher::vector_element_basic_type(this); 10271 int opc = this->ideal_Opcode(); 10272 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10273 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 10274 %} 10275 ins_pipe( pipe_slow ); 10276 %} 10277 10278 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 10279 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 10280 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 10281 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10282 ins_encode %{ 10283 assert(UseFMA, "Needs FMA instructions support."); 10284 int vlen_enc = vector_length_encoding(this); 10285 BasicType bt = Matcher::vector_element_basic_type(this); 10286 int opc = this->ideal_Opcode(); 10287 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10288 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 10289 %} 10290 ins_pipe( pipe_slow ); 10291 %} 10292 10293 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 10294 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 10295 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 10296 ins_encode %{ 10297 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 10298 int vlen_enc = vector_length_encoding(this, $src1); 10299 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 10300 10301 // Comparison i 10302 switch (src1_elem_bt) { 10303 case T_BYTE: { 10304 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10305 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10306 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10307 break; 10308 } 10309 case T_SHORT: { 10310 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10311 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10312 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10313 break; 10314 } 10315 case T_INT: { 10316 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10317 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10318 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10319 break; 10320 } 10321 case T_LONG: { 10322 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10323 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10324 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10325 break; 10326 } 10327 case T_FLOAT: { 10328 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10329 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10330 break; 10331 } 10332 case T_DOUBLE: { 10333 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10334 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10335 break; 10336 } 10337 default: assert(false, "%s", type2name(src1_elem_bt)); break; 10338 } 10339 %} 10340 ins_pipe( pipe_slow ); 10341 %} 10342 10343 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 10344 predicate(Matcher::vector_length(n) <= 32); 10345 match(Set dst (MaskAll src)); 10346 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 10347 ins_encode %{ 10348 int mask_len = Matcher::vector_length(this); 10349 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 10350 %} 10351 ins_pipe( pipe_slow ); 10352 %} 10353 10354 #ifdef _LP64 10355 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10356 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10357 match(Set dst (XorVMask src (MaskAll cnt))); 10358 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10359 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10360 ins_encode %{ 10361 uint masklen = Matcher::vector_length(this); 10362 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10363 %} 10364 ins_pipe( pipe_slow ); 10365 %} 10366 10367 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10368 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10369 (Matcher::vector_length(n) == 16) || 10370 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10371 match(Set dst (XorVMask src (MaskAll cnt))); 10372 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10373 ins_encode %{ 10374 uint masklen = Matcher::vector_length(this); 10375 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10376 %} 10377 ins_pipe( pipe_slow ); 10378 %} 10379 10380 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10381 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10382 match(Set dst (VectorLongToMask src)); 10383 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10384 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10385 ins_encode %{ 10386 int mask_len = Matcher::vector_length(this); 10387 int vec_enc = vector_length_encoding(mask_len); 10388 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10389 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10390 %} 10391 ins_pipe( pipe_slow ); 10392 %} 10393 10394 10395 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10396 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10397 match(Set dst (VectorLongToMask src)); 10398 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10399 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10400 ins_encode %{ 10401 int mask_len = Matcher::vector_length(this); 10402 assert(mask_len <= 32, "invalid mask length"); 10403 int vec_enc = vector_length_encoding(mask_len); 10404 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10405 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10406 %} 10407 ins_pipe( pipe_slow ); 10408 %} 10409 10410 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10411 predicate(n->bottom_type()->isa_vectmask()); 10412 match(Set dst (VectorLongToMask src)); 10413 format %{ "long_to_mask_evex $dst, $src\t!" %} 10414 ins_encode %{ 10415 __ kmov($dst$$KRegister, $src$$Register); 10416 %} 10417 ins_pipe( pipe_slow ); 10418 %} 10419 #endif 10420 10421 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10422 match(Set dst (AndVMask src1 src2)); 10423 match(Set dst (OrVMask src1 src2)); 10424 match(Set dst (XorVMask src1 src2)); 10425 effect(TEMP kscratch); 10426 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10427 ins_encode %{ 10428 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10429 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10430 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal"); 10431 uint masklen = Matcher::vector_length(this); 10432 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10433 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10434 %} 10435 ins_pipe( pipe_slow ); 10436 %} 10437 10438 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10439 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10440 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10441 ins_encode %{ 10442 int vlen_enc = vector_length_encoding(this); 10443 BasicType bt = Matcher::vector_element_basic_type(this); 10444 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10445 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10446 %} 10447 ins_pipe( pipe_slow ); 10448 %} 10449 10450 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10451 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10452 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10453 ins_encode %{ 10454 int vlen_enc = vector_length_encoding(this); 10455 BasicType bt = Matcher::vector_element_basic_type(this); 10456 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10457 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10458 %} 10459 ins_pipe( pipe_slow ); 10460 %} 10461 10462 instruct castMM(kReg dst) 10463 %{ 10464 match(Set dst (CastVV dst)); 10465 10466 size(0); 10467 format %{ "# castVV of $dst" %} 10468 ins_encode(/* empty encoding */); 10469 ins_cost(0); 10470 ins_pipe(empty); 10471 %} 10472 10473 instruct castVV(vec dst) 10474 %{ 10475 match(Set dst (CastVV dst)); 10476 10477 size(0); 10478 format %{ "# castVV of $dst" %} 10479 ins_encode(/* empty encoding */); 10480 ins_cost(0); 10481 ins_pipe(empty); 10482 %} 10483 10484 instruct castVVLeg(legVec dst) 10485 %{ 10486 match(Set dst (CastVV dst)); 10487 10488 size(0); 10489 format %{ "# castVV of $dst" %} 10490 ins_encode(/* empty encoding */); 10491 ins_cost(0); 10492 ins_pipe(empty); 10493 %} 10494 10495 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10496 %{ 10497 match(Set dst (IsInfiniteF src)); 10498 effect(TEMP ktmp, KILL cr); 10499 format %{ "float_class_check $dst, $src" %} 10500 ins_encode %{ 10501 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10502 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10503 %} 10504 ins_pipe(pipe_slow); 10505 %} 10506 10507 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10508 %{ 10509 match(Set dst (IsInfiniteD src)); 10510 effect(TEMP ktmp, KILL cr); 10511 format %{ "double_class_check $dst, $src" %} 10512 ins_encode %{ 10513 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10514 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10515 %} 10516 ins_pipe(pipe_slow); 10517 %} 10518 10519 10520 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2) 10521 %{ 10522 match(Set index (SelectFromTwoVector (Binary index src1) src2)); 10523 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %} 10524 ins_encode %{ 10525 int vlen_enc = vector_length_encoding(this); 10526 BasicType bt = Matcher::vector_element_basic_type(this); 10527 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10528 %} 10529 ins_pipe(pipe_slow); 10530 %}