1 // 2 // Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 // AVX3 Mask Registers. 632 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 633 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 634 635 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 636 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 637 638 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 639 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 640 641 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 642 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 643 644 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 645 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 646 647 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 648 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 649 650 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 651 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 652 653 654 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 655 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 656 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 657 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 658 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 659 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 660 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 661 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 662 #ifdef _LP64 663 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 664 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 665 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 666 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 667 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 668 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 669 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 670 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 671 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 672 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 673 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 674 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 675 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 676 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 677 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 678 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 679 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 680 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 681 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 682 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 683 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 684 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 685 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 686 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 687 #endif 688 ); 689 690 alloc_class chunk2(K7, K7_H, 691 K6, K6_H, 692 K5, K5_H, 693 K4, K4_H, 694 K3, K3_H, 695 K2, K2_H, 696 K1, K1_H); 697 698 reg_class vectmask_reg(K1, K1_H, 699 K2, K2_H, 700 K3, K3_H, 701 K4, K4_H, 702 K5, K5_H, 703 K6, K6_H, 704 K7, K7_H); 705 706 reg_class vectmask_reg_K1(K1, K1_H); 707 reg_class vectmask_reg_K2(K2, K2_H); 708 reg_class vectmask_reg_K3(K3, K3_H); 709 reg_class vectmask_reg_K4(K4, K4_H); 710 reg_class vectmask_reg_K5(K5, K5_H); 711 reg_class vectmask_reg_K6(K6, K6_H); 712 reg_class vectmask_reg_K7(K7, K7_H); 713 714 // flags allocation class should be last. 715 alloc_class chunk3(RFLAGS); 716 717 718 // Singleton class for condition codes 719 reg_class int_flags(RFLAGS); 720 721 // Class for pre evex float registers 722 reg_class float_reg_legacy(XMM0, 723 XMM1, 724 XMM2, 725 XMM3, 726 XMM4, 727 XMM5, 728 XMM6, 729 XMM7 730 #ifdef _LP64 731 ,XMM8, 732 XMM9, 733 XMM10, 734 XMM11, 735 XMM12, 736 XMM13, 737 XMM14, 738 XMM15 739 #endif 740 ); 741 742 // Class for evex float registers 743 reg_class float_reg_evex(XMM0, 744 XMM1, 745 XMM2, 746 XMM3, 747 XMM4, 748 XMM5, 749 XMM6, 750 XMM7 751 #ifdef _LP64 752 ,XMM8, 753 XMM9, 754 XMM10, 755 XMM11, 756 XMM12, 757 XMM13, 758 XMM14, 759 XMM15, 760 XMM16, 761 XMM17, 762 XMM18, 763 XMM19, 764 XMM20, 765 XMM21, 766 XMM22, 767 XMM23, 768 XMM24, 769 XMM25, 770 XMM26, 771 XMM27, 772 XMM28, 773 XMM29, 774 XMM30, 775 XMM31 776 #endif 777 ); 778 779 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 780 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 781 782 // Class for pre evex double registers 783 reg_class double_reg_legacy(XMM0, XMM0b, 784 XMM1, XMM1b, 785 XMM2, XMM2b, 786 XMM3, XMM3b, 787 XMM4, XMM4b, 788 XMM5, XMM5b, 789 XMM6, XMM6b, 790 XMM7, XMM7b 791 #ifdef _LP64 792 ,XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b 800 #endif 801 ); 802 803 // Class for evex double registers 804 reg_class double_reg_evex(XMM0, XMM0b, 805 XMM1, XMM1b, 806 XMM2, XMM2b, 807 XMM3, XMM3b, 808 XMM4, XMM4b, 809 XMM5, XMM5b, 810 XMM6, XMM6b, 811 XMM7, XMM7b 812 #ifdef _LP64 813 ,XMM8, XMM8b, 814 XMM9, XMM9b, 815 XMM10, XMM10b, 816 XMM11, XMM11b, 817 XMM12, XMM12b, 818 XMM13, XMM13b, 819 XMM14, XMM14b, 820 XMM15, XMM15b, 821 XMM16, XMM16b, 822 XMM17, XMM17b, 823 XMM18, XMM18b, 824 XMM19, XMM19b, 825 XMM20, XMM20b, 826 XMM21, XMM21b, 827 XMM22, XMM22b, 828 XMM23, XMM23b, 829 XMM24, XMM24b, 830 XMM25, XMM25b, 831 XMM26, XMM26b, 832 XMM27, XMM27b, 833 XMM28, XMM28b, 834 XMM29, XMM29b, 835 XMM30, XMM30b, 836 XMM31, XMM31b 837 #endif 838 ); 839 840 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 841 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 842 843 // Class for pre evex 32bit vector registers 844 reg_class vectors_reg_legacy(XMM0, 845 XMM1, 846 XMM2, 847 XMM3, 848 XMM4, 849 XMM5, 850 XMM6, 851 XMM7 852 #ifdef _LP64 853 ,XMM8, 854 XMM9, 855 XMM10, 856 XMM11, 857 XMM12, 858 XMM13, 859 XMM14, 860 XMM15 861 #endif 862 ); 863 864 // Class for evex 32bit vector registers 865 reg_class vectors_reg_evex(XMM0, 866 XMM1, 867 XMM2, 868 XMM3, 869 XMM4, 870 XMM5, 871 XMM6, 872 XMM7 873 #ifdef _LP64 874 ,XMM8, 875 XMM9, 876 XMM10, 877 XMM11, 878 XMM12, 879 XMM13, 880 XMM14, 881 XMM15, 882 XMM16, 883 XMM17, 884 XMM18, 885 XMM19, 886 XMM20, 887 XMM21, 888 XMM22, 889 XMM23, 890 XMM24, 891 XMM25, 892 XMM26, 893 XMM27, 894 XMM28, 895 XMM29, 896 XMM30, 897 XMM31 898 #endif 899 ); 900 901 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 902 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 903 904 // Class for all 64bit vector registers 905 reg_class vectord_reg_legacy(XMM0, XMM0b, 906 XMM1, XMM1b, 907 XMM2, XMM2b, 908 XMM3, XMM3b, 909 XMM4, XMM4b, 910 XMM5, XMM5b, 911 XMM6, XMM6b, 912 XMM7, XMM7b 913 #ifdef _LP64 914 ,XMM8, XMM8b, 915 XMM9, XMM9b, 916 XMM10, XMM10b, 917 XMM11, XMM11b, 918 XMM12, XMM12b, 919 XMM13, XMM13b, 920 XMM14, XMM14b, 921 XMM15, XMM15b 922 #endif 923 ); 924 925 // Class for all 64bit vector registers 926 reg_class vectord_reg_evex(XMM0, XMM0b, 927 XMM1, XMM1b, 928 XMM2, XMM2b, 929 XMM3, XMM3b, 930 XMM4, XMM4b, 931 XMM5, XMM5b, 932 XMM6, XMM6b, 933 XMM7, XMM7b 934 #ifdef _LP64 935 ,XMM8, XMM8b, 936 XMM9, XMM9b, 937 XMM10, XMM10b, 938 XMM11, XMM11b, 939 XMM12, XMM12b, 940 XMM13, XMM13b, 941 XMM14, XMM14b, 942 XMM15, XMM15b, 943 XMM16, XMM16b, 944 XMM17, XMM17b, 945 XMM18, XMM18b, 946 XMM19, XMM19b, 947 XMM20, XMM20b, 948 XMM21, XMM21b, 949 XMM22, XMM22b, 950 XMM23, XMM23b, 951 XMM24, XMM24b, 952 XMM25, XMM25b, 953 XMM26, XMM26b, 954 XMM27, XMM27b, 955 XMM28, XMM28b, 956 XMM29, XMM29b, 957 XMM30, XMM30b, 958 XMM31, XMM31b 959 #endif 960 ); 961 962 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 963 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 964 965 // Class for all 128bit vector registers 966 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 967 XMM1, XMM1b, XMM1c, XMM1d, 968 XMM2, XMM2b, XMM2c, XMM2d, 969 XMM3, XMM3b, XMM3c, XMM3d, 970 XMM4, XMM4b, XMM4c, XMM4d, 971 XMM5, XMM5b, XMM5c, XMM5d, 972 XMM6, XMM6b, XMM6c, XMM6d, 973 XMM7, XMM7b, XMM7c, XMM7d 974 #ifdef _LP64 975 ,XMM8, XMM8b, XMM8c, XMM8d, 976 XMM9, XMM9b, XMM9c, XMM9d, 977 XMM10, XMM10b, XMM10c, XMM10d, 978 XMM11, XMM11b, XMM11c, XMM11d, 979 XMM12, XMM12b, XMM12c, XMM12d, 980 XMM13, XMM13b, XMM13c, XMM13d, 981 XMM14, XMM14b, XMM14c, XMM14d, 982 XMM15, XMM15b, XMM15c, XMM15d 983 #endif 984 ); 985 986 // Class for all 128bit vector registers 987 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 988 XMM1, XMM1b, XMM1c, XMM1d, 989 XMM2, XMM2b, XMM2c, XMM2d, 990 XMM3, XMM3b, XMM3c, XMM3d, 991 XMM4, XMM4b, XMM4c, XMM4d, 992 XMM5, XMM5b, XMM5c, XMM5d, 993 XMM6, XMM6b, XMM6c, XMM6d, 994 XMM7, XMM7b, XMM7c, XMM7d 995 #ifdef _LP64 996 ,XMM8, XMM8b, XMM8c, XMM8d, 997 XMM9, XMM9b, XMM9c, XMM9d, 998 XMM10, XMM10b, XMM10c, XMM10d, 999 XMM11, XMM11b, XMM11c, XMM11d, 1000 XMM12, XMM12b, XMM12c, XMM12d, 1001 XMM13, XMM13b, XMM13c, XMM13d, 1002 XMM14, XMM14b, XMM14c, XMM14d, 1003 XMM15, XMM15b, XMM15c, XMM15d, 1004 XMM16, XMM16b, XMM16c, XMM16d, 1005 XMM17, XMM17b, XMM17c, XMM17d, 1006 XMM18, XMM18b, XMM18c, XMM18d, 1007 XMM19, XMM19b, XMM19c, XMM19d, 1008 XMM20, XMM20b, XMM20c, XMM20d, 1009 XMM21, XMM21b, XMM21c, XMM21d, 1010 XMM22, XMM22b, XMM22c, XMM22d, 1011 XMM23, XMM23b, XMM23c, XMM23d, 1012 XMM24, XMM24b, XMM24c, XMM24d, 1013 XMM25, XMM25b, XMM25c, XMM25d, 1014 XMM26, XMM26b, XMM26c, XMM26d, 1015 XMM27, XMM27b, XMM27c, XMM27d, 1016 XMM28, XMM28b, XMM28c, XMM28d, 1017 XMM29, XMM29b, XMM29c, XMM29d, 1018 XMM30, XMM30b, XMM30c, XMM30d, 1019 XMM31, XMM31b, XMM31c, XMM31d 1020 #endif 1021 ); 1022 1023 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1024 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1025 1026 // Class for all 256bit vector registers 1027 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1028 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1029 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1030 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1031 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1032 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1033 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1034 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1035 #ifdef _LP64 1036 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1037 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1038 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1039 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1040 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1041 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1042 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1043 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1044 #endif 1045 ); 1046 1047 // Class for all 256bit vector registers 1048 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1049 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1050 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1051 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1052 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1053 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1054 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1055 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1056 #ifdef _LP64 1057 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1058 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1059 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1060 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1061 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1062 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1063 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1064 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1065 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1066 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1067 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1068 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1069 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1070 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1071 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1072 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1073 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1074 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1075 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1076 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1077 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1078 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1079 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1080 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1081 #endif 1082 ); 1083 1084 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1085 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1086 1087 // Class for all 512bit vector registers 1088 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1089 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1090 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1091 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1092 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1093 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1094 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1095 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1096 #ifdef _LP64 1097 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1098 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1099 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1100 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1101 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1102 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1103 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1104 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1105 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1106 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1107 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1108 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1109 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1110 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1111 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1112 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1113 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1114 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1115 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1116 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1117 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1118 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1119 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1120 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1121 #endif 1122 ); 1123 1124 // Class for restricted 512bit vector registers 1125 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1126 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1127 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1128 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1129 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1130 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1131 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1132 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1133 #ifdef _LP64 1134 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1135 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1136 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1137 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1138 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1139 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1140 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1141 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1142 #endif 1143 ); 1144 1145 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1146 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1147 1148 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1149 %} 1150 1151 1152 //----------SOURCE BLOCK------------------------------------------------------- 1153 // This is a block of C++ code which provides values, functions, and 1154 // definitions necessary in the rest of the architecture description 1155 1156 source_hpp %{ 1157 // Header information of the source block. 1158 // Method declarations/definitions which are used outside 1159 // the ad-scope can conveniently be defined here. 1160 // 1161 // To keep related declarations/definitions/uses close together, 1162 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1163 1164 #include "runtime/vm_version.hpp" 1165 1166 class NativeJump; 1167 1168 class CallStubImpl { 1169 1170 //-------------------------------------------------------------- 1171 //---< Used for optimization in Compile::shorten_branches >--- 1172 //-------------------------------------------------------------- 1173 1174 public: 1175 // Size of call trampoline stub. 1176 static uint size_call_trampoline() { 1177 return 0; // no call trampolines on this platform 1178 } 1179 1180 // number of relocations needed by a call trampoline stub 1181 static uint reloc_call_trampoline() { 1182 return 0; // no call trampolines on this platform 1183 } 1184 }; 1185 1186 class HandlerImpl { 1187 1188 public: 1189 1190 static int emit_exception_handler(CodeBuffer &cbuf); 1191 static int emit_deopt_handler(CodeBuffer& cbuf); 1192 1193 static uint size_exception_handler() { 1194 // NativeCall instruction size is the same as NativeJump. 1195 // exception handler starts out as jump and can be patched to 1196 // a call be deoptimization. (4932387) 1197 // Note that this value is also credited (in output.cpp) to 1198 // the size of the code section. 1199 return NativeJump::instruction_size; 1200 } 1201 1202 #ifdef _LP64 1203 static uint size_deopt_handler() { 1204 // three 5 byte instructions plus one move for unreachable address. 1205 return 15+3; 1206 } 1207 #else 1208 static uint size_deopt_handler() { 1209 // NativeCall instruction size is the same as NativeJump. 1210 // exception handler starts out as jump and can be patched to 1211 // a call be deoptimization. (4932387) 1212 // Note that this value is also credited (in output.cpp) to 1213 // the size of the code section. 1214 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1215 } 1216 #endif 1217 }; 1218 1219 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1220 switch(bytes) { 1221 case 4: // fall-through 1222 case 8: // fall-through 1223 case 16: return Assembler::AVX_128bit; 1224 case 32: return Assembler::AVX_256bit; 1225 case 64: return Assembler::AVX_512bit; 1226 1227 default: { 1228 ShouldNotReachHere(); 1229 return Assembler::AVX_NoVec; 1230 } 1231 } 1232 } 1233 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1236 } 1237 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1239 uint def_idx = use->operand_index(opnd); 1240 Node* def = use->in(def_idx); 1241 return vector_length_encoding(def); 1242 } 1243 1244 static inline bool is_vector_popcount_predicate(BasicType bt) { 1245 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1246 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1247 } 1248 1249 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1250 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1251 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1252 } 1253 1254 class Node::PD { 1255 public: 1256 enum NodeFlags { 1257 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1258 Flag_sets_carry_flag = Node::_last_flag << 2, 1259 Flag_sets_parity_flag = Node::_last_flag << 3, 1260 Flag_sets_zero_flag = Node::_last_flag << 4, 1261 Flag_sets_overflow_flag = Node::_last_flag << 5, 1262 Flag_sets_sign_flag = Node::_last_flag << 6, 1263 Flag_clears_carry_flag = Node::_last_flag << 7, 1264 Flag_clears_parity_flag = Node::_last_flag << 8, 1265 Flag_clears_zero_flag = Node::_last_flag << 9, 1266 Flag_clears_overflow_flag = Node::_last_flag << 10, 1267 Flag_clears_sign_flag = Node::_last_flag << 11, 1268 _last_flag = Flag_clears_sign_flag 1269 }; 1270 }; 1271 1272 %} // end source_hpp 1273 1274 source %{ 1275 1276 #include "opto/addnode.hpp" 1277 #include "c2_intelJccErratum_x86.hpp" 1278 1279 void PhaseOutput::pd_perform_mach_node_analysis() { 1280 if (VM_Version::has_intel_jcc_erratum()) { 1281 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1282 _buf_sizes._code += extra_padding; 1283 } 1284 } 1285 1286 int MachNode::pd_alignment_required() const { 1287 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1288 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1289 return IntelJccErratum::largest_jcc_size() + 1; 1290 } else { 1291 return 1; 1292 } 1293 } 1294 1295 int MachNode::compute_padding(int current_offset) const { 1296 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1297 Compile* C = Compile::current(); 1298 PhaseOutput* output = C->output(); 1299 Block* block = output->block(); 1300 int index = output->index(); 1301 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1302 } else { 1303 return 0; 1304 } 1305 } 1306 1307 // Emit exception handler code. 1308 // Stuff framesize into a register and call a VM stub routine. 1309 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1310 1311 // Note that the code buffer's insts_mark is always relative to insts. 1312 // That's why we must use the macroassembler to generate a handler. 1313 C2_MacroAssembler _masm(&cbuf); 1314 address base = __ start_a_stub(size_exception_handler()); 1315 if (base == NULL) { 1316 ciEnv::current()->record_failure("CodeCache is full"); 1317 return 0; // CodeBuffer::expand failed 1318 } 1319 int offset = __ offset(); 1320 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1321 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1322 __ end_a_stub(); 1323 return offset; 1324 } 1325 1326 // Emit deopt handler code. 1327 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1328 1329 // Note that the code buffer's insts_mark is always relative to insts. 1330 // That's why we must use the macroassembler to generate a handler. 1331 C2_MacroAssembler _masm(&cbuf); 1332 address base = __ start_a_stub(size_deopt_handler()); 1333 if (base == NULL) { 1334 ciEnv::current()->record_failure("CodeCache is full"); 1335 return 0; // CodeBuffer::expand failed 1336 } 1337 int offset = __ offset(); 1338 1339 #ifdef _LP64 1340 address the_pc = (address) __ pc(); 1341 Label next; 1342 // push a "the_pc" on the stack without destroying any registers 1343 // as they all may be live. 1344 1345 // push address of "next" 1346 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1347 __ bind(next); 1348 // adjust it so it matches "the_pc" 1349 __ subptr(Address(rsp, 0), __ offset() - offset); 1350 #else 1351 InternalAddress here(__ pc()); 1352 __ pushptr(here.addr(), noreg); 1353 #endif 1354 1355 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1356 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1357 __ end_a_stub(); 1358 return offset; 1359 } 1360 1361 Assembler::Width widthForType(BasicType bt) { 1362 if (bt == T_BYTE) { 1363 return Assembler::B; 1364 } else if (bt == T_SHORT) { 1365 return Assembler::W; 1366 } else if (bt == T_INT) { 1367 return Assembler::D; 1368 } else { 1369 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1370 return Assembler::Q; 1371 } 1372 } 1373 1374 //============================================================================= 1375 1376 // Float masks come from different places depending on platform. 1377 #ifdef _LP64 1378 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1379 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1380 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1381 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1382 #else 1383 static address float_signmask() { return (address)float_signmask_pool; } 1384 static address float_signflip() { return (address)float_signflip_pool; } 1385 static address double_signmask() { return (address)double_signmask_pool; } 1386 static address double_signflip() { return (address)double_signflip_pool; } 1387 #endif 1388 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1389 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1390 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1391 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1392 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1393 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1394 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1395 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1396 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1397 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1398 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1399 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1400 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1401 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1402 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1403 1404 //============================================================================= 1405 bool Matcher::match_rule_supported(int opcode) { 1406 if (!has_match_rule(opcode)) { 1407 return false; // no match rule present 1408 } 1409 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1410 switch (opcode) { 1411 case Op_AbsVL: 1412 case Op_StoreVectorScatter: 1413 if (UseAVX < 3) { 1414 return false; 1415 } 1416 break; 1417 case Op_PopCountI: 1418 case Op_PopCountL: 1419 if (!UsePopCountInstruction) { 1420 return false; 1421 } 1422 break; 1423 case Op_PopCountVI: 1424 if (UseAVX < 2) { 1425 return false; 1426 } 1427 break; 1428 case Op_PopCountVL: 1429 if (UseAVX < 2) { 1430 return false; 1431 } 1432 break; 1433 case Op_MulVI: 1434 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1435 return false; 1436 } 1437 break; 1438 case Op_MulVL: 1439 if (UseSSE < 4) { // only with SSE4_1 or AVX 1440 return false; 1441 } 1442 break; 1443 case Op_MulReductionVL: 1444 if (VM_Version::supports_avx512dq() == false) { 1445 return false; 1446 } 1447 break; 1448 case Op_AddReductionVL: 1449 if (UseSSE < 2) { // requires at least SSE2 1450 return false; 1451 } 1452 break; 1453 case Op_AbsVB: 1454 case Op_AbsVS: 1455 case Op_AbsVI: 1456 case Op_AddReductionVI: 1457 case Op_AndReductionV: 1458 case Op_OrReductionV: 1459 case Op_XorReductionV: 1460 if (UseSSE < 3) { // requires at least SSSE3 1461 return false; 1462 } 1463 break; 1464 case Op_VectorLoadShuffle: 1465 case Op_VectorRearrange: 1466 case Op_MulReductionVI: 1467 if (UseSSE < 4) { // requires at least SSE4 1468 return false; 1469 } 1470 break; 1471 case Op_IsInfiniteF: 1472 case Op_IsInfiniteD: 1473 if (!VM_Version::supports_avx512dq()) { 1474 return false; 1475 } 1476 break; 1477 case Op_SqrtVD: 1478 case Op_SqrtVF: 1479 case Op_VectorMaskCmp: 1480 case Op_VectorCastB2X: 1481 case Op_VectorCastS2X: 1482 case Op_VectorCastI2X: 1483 case Op_VectorCastL2X: 1484 case Op_VectorCastF2X: 1485 case Op_VectorCastD2X: 1486 case Op_VectorUCastB2X: 1487 case Op_VectorUCastS2X: 1488 case Op_VectorUCastI2X: 1489 case Op_VectorMaskCast: 1490 if (UseAVX < 1) { // enabled for AVX only 1491 return false; 1492 } 1493 break; 1494 case Op_PopulateIndex: 1495 if (!is_LP64 || (UseAVX < 2)) { 1496 return false; 1497 } 1498 break; 1499 case Op_RoundVF: 1500 if (UseAVX < 2) { // enabled for AVX2 only 1501 return false; 1502 } 1503 break; 1504 case Op_RoundVD: 1505 if (UseAVX < 3) { 1506 return false; // enabled for AVX3 only 1507 } 1508 break; 1509 case Op_CompareAndSwapL: 1510 #ifdef _LP64 1511 case Op_CompareAndSwapP: 1512 #endif 1513 break; 1514 case Op_StrIndexOf: 1515 if (!UseSSE42Intrinsics) { 1516 return false; 1517 } 1518 break; 1519 case Op_StrIndexOfChar: 1520 if (!UseSSE42Intrinsics) { 1521 return false; 1522 } 1523 break; 1524 case Op_OnSpinWait: 1525 if (VM_Version::supports_on_spin_wait() == false) { 1526 return false; 1527 } 1528 break; 1529 case Op_MulVB: 1530 case Op_LShiftVB: 1531 case Op_RShiftVB: 1532 case Op_URShiftVB: 1533 case Op_VectorInsert: 1534 case Op_VectorLoadMask: 1535 case Op_VectorStoreMask: 1536 case Op_VectorBlend: 1537 if (UseSSE < 4) { 1538 return false; 1539 } 1540 break; 1541 #ifdef _LP64 1542 case Op_MaxD: 1543 case Op_MaxF: 1544 case Op_MinD: 1545 case Op_MinF: 1546 if (UseAVX < 1) { // enabled for AVX only 1547 return false; 1548 } 1549 break; 1550 #endif 1551 case Op_CacheWB: 1552 case Op_CacheWBPreSync: 1553 case Op_CacheWBPostSync: 1554 if (!VM_Version::supports_data_cache_line_flush()) { 1555 return false; 1556 } 1557 break; 1558 case Op_ExtractB: 1559 case Op_ExtractL: 1560 case Op_ExtractI: 1561 case Op_RoundDoubleMode: 1562 if (UseSSE < 4) { 1563 return false; 1564 } 1565 break; 1566 case Op_RoundDoubleModeV: 1567 if (VM_Version::supports_avx() == false) { 1568 return false; // 128bit vroundpd is not available 1569 } 1570 break; 1571 case Op_LoadVectorGather: 1572 if (UseAVX < 2) { 1573 return false; 1574 } 1575 break; 1576 case Op_FmaF: 1577 case Op_FmaD: 1578 case Op_FmaVD: 1579 case Op_FmaVF: 1580 if (!UseFMA) { 1581 return false; 1582 } 1583 break; 1584 case Op_MacroLogicV: 1585 if (UseAVX < 3 || !UseVectorMacroLogic) { 1586 return false; 1587 } 1588 break; 1589 1590 case Op_VectorCmpMasked: 1591 case Op_VectorMaskGen: 1592 if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { 1593 return false; 1594 } 1595 break; 1596 case Op_VectorMaskFirstTrue: 1597 case Op_VectorMaskLastTrue: 1598 case Op_VectorMaskTrueCount: 1599 case Op_VectorMaskToLong: 1600 if (!is_LP64 || UseAVX < 1) { 1601 return false; 1602 } 1603 break; 1604 case Op_RoundF: 1605 case Op_RoundD: 1606 if (!is_LP64) { 1607 return false; 1608 } 1609 break; 1610 case Op_CopySignD: 1611 case Op_CopySignF: 1612 if (UseAVX < 3 || !is_LP64) { 1613 return false; 1614 } 1615 if (!VM_Version::supports_avx512vl()) { 1616 return false; 1617 } 1618 break; 1619 #ifndef _LP64 1620 case Op_AddReductionVF: 1621 case Op_AddReductionVD: 1622 case Op_MulReductionVF: 1623 case Op_MulReductionVD: 1624 if (UseSSE < 1) { // requires at least SSE 1625 return false; 1626 } 1627 break; 1628 case Op_MulAddVS2VI: 1629 case Op_RShiftVL: 1630 case Op_AbsVD: 1631 case Op_NegVD: 1632 if (UseSSE < 2) { 1633 return false; 1634 } 1635 break; 1636 #endif // !LP64 1637 case Op_CompressBits: 1638 if (!VM_Version::supports_bmi2() || (!is_LP64 && UseSSE < 2)) { 1639 return false; 1640 } 1641 break; 1642 case Op_ExpandBits: 1643 if (!VM_Version::supports_bmi2() || (!is_LP64 && (UseSSE < 2 || !VM_Version::supports_bmi1()))) { 1644 return false; 1645 } 1646 break; 1647 case Op_SignumF: 1648 if (UseSSE < 1) { 1649 return false; 1650 } 1651 break; 1652 case Op_SignumD: 1653 if (UseSSE < 2) { 1654 return false; 1655 } 1656 break; 1657 case Op_CompressM: 1658 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1659 return false; 1660 } 1661 break; 1662 case Op_CompressV: 1663 case Op_ExpandV: 1664 if (!VM_Version::supports_avx512vl()) { 1665 return false; 1666 } 1667 break; 1668 case Op_SqrtF: 1669 if (UseSSE < 1) { 1670 return false; 1671 } 1672 break; 1673 case Op_SqrtD: 1674 #ifdef _LP64 1675 if (UseSSE < 2) { 1676 return false; 1677 } 1678 #else 1679 // x86_32.ad has a special match rule for SqrtD. 1680 // Together with common x86 rules, this handles all UseSSE cases. 1681 #endif 1682 break; 1683 case Op_ConvF2HF: 1684 case Op_ConvHF2F: 1685 if (!VM_Version::supports_float16()) { 1686 return false; 1687 } 1688 break; 1689 case Op_VectorCastF2HF: 1690 case Op_VectorCastHF2F: 1691 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1692 return false; 1693 } 1694 break; 1695 } 1696 return true; // Match rules are supported by default. 1697 } 1698 1699 //------------------------------------------------------------------------ 1700 1701 static inline bool is_pop_count_instr_target(BasicType bt) { 1702 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1703 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1704 } 1705 1706 bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) { 1707 return match_rule_supported_vector(opcode, vlen, bt); 1708 } 1709 1710 // Identify extra cases that we might want to provide match rules for vector nodes and 1711 // other intrinsics guarded with vector length (vlen) and element type (bt). 1712 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1713 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1714 if (!match_rule_supported(opcode)) { 1715 return false; 1716 } 1717 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1718 // * SSE2 supports 128bit vectors for all types; 1719 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1720 // * AVX2 supports 256bit vectors for all types; 1721 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1722 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1723 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1724 // And MaxVectorSize is taken into account as well. 1725 if (!vector_size_supported(bt, vlen)) { 1726 return false; 1727 } 1728 // Special cases which require vector length follow: 1729 // * implementation limitations 1730 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1731 // * 128bit vroundpd instruction is present only in AVX1 1732 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1733 switch (opcode) { 1734 case Op_AbsVF: 1735 case Op_NegVF: 1736 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1737 return false; // 512bit vandps and vxorps are not available 1738 } 1739 break; 1740 case Op_AbsVD: 1741 case Op_NegVD: 1742 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1743 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1744 } 1745 break; 1746 case Op_RotateRightV: 1747 case Op_RotateLeftV: 1748 if (bt != T_INT && bt != T_LONG) { 1749 return false; 1750 } // fallthrough 1751 case Op_MacroLogicV: 1752 if (!VM_Version::supports_evex() || 1753 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1754 return false; 1755 } 1756 break; 1757 case Op_ClearArray: 1758 case Op_VectorMaskGen: 1759 case Op_VectorCmpMasked: 1760 if (!is_LP64 || !VM_Version::supports_avx512bw()) { 1761 return false; 1762 } 1763 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1764 return false; 1765 } 1766 break; 1767 case Op_LoadVectorMasked: 1768 case Op_StoreVectorMasked: 1769 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1770 return false; 1771 } 1772 break; 1773 case Op_MaxV: 1774 case Op_MinV: 1775 if (UseSSE < 4 && is_integral_type(bt)) { 1776 return false; 1777 } 1778 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1779 // Float/Double intrinsics are enabled for AVX family currently. 1780 if (UseAVX == 0) { 1781 return false; 1782 } 1783 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1784 return false; 1785 } 1786 } 1787 break; 1788 case Op_CallLeafVector: 1789 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1790 return false; 1791 } 1792 break; 1793 case Op_AddReductionVI: 1794 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1795 return false; 1796 } 1797 // fallthrough 1798 case Op_AndReductionV: 1799 case Op_OrReductionV: 1800 case Op_XorReductionV: 1801 if (is_subword_type(bt) && (UseSSE < 4)) { 1802 return false; 1803 } 1804 #ifndef _LP64 1805 if (bt == T_BYTE || bt == T_LONG) { 1806 return false; 1807 } 1808 #endif 1809 break; 1810 #ifndef _LP64 1811 case Op_VectorInsert: 1812 if (bt == T_LONG || bt == T_DOUBLE) { 1813 return false; 1814 } 1815 break; 1816 #endif 1817 case Op_MinReductionV: 1818 case Op_MaxReductionV: 1819 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1820 return false; 1821 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1822 return false; 1823 } 1824 // Float/Double intrinsics enabled for AVX family. 1825 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1826 return false; 1827 } 1828 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1829 return false; 1830 } 1831 #ifndef _LP64 1832 if (bt == T_BYTE || bt == T_LONG) { 1833 return false; 1834 } 1835 #endif 1836 break; 1837 case Op_VectorTest: 1838 if (UseSSE < 4) { 1839 return false; // Implementation limitation 1840 } else if (size_in_bits < 32) { 1841 return false; // Implementation limitation 1842 } 1843 break; 1844 case Op_VectorLoadShuffle: 1845 case Op_VectorRearrange: 1846 if(vlen == 2) { 1847 return false; // Implementation limitation due to how shuffle is loaded 1848 } else if (size_in_bits == 256 && UseAVX < 2) { 1849 return false; // Implementation limitation 1850 } 1851 break; 1852 case Op_VectorLoadMask: 1853 case Op_VectorMaskCast: 1854 if (size_in_bits == 256 && UseAVX < 2) { 1855 return false; // Implementation limitation 1856 } 1857 // fallthrough 1858 case Op_VectorStoreMask: 1859 if (vlen == 2) { 1860 return false; // Implementation limitation 1861 } 1862 break; 1863 case Op_PopulateIndex: 1864 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1865 return false; 1866 } 1867 break; 1868 case Op_VectorCastB2X: 1869 case Op_VectorCastS2X: 1870 case Op_VectorCastI2X: 1871 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1872 return false; 1873 } 1874 break; 1875 case Op_VectorCastL2X: 1876 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1877 return false; 1878 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1879 return false; 1880 } 1881 break; 1882 case Op_VectorCastF2X: { 1883 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1884 // happen after intermediate conversion to integer and special handling 1885 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1886 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1887 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1888 return false; 1889 } 1890 } 1891 // fallthrough 1892 case Op_VectorCastD2X: 1893 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1894 return false; 1895 } 1896 break; 1897 case Op_VectorCastF2HF: 1898 case Op_VectorCastHF2F: 1899 if (!VM_Version::supports_f16c() && 1900 ((!VM_Version::supports_evex() || 1901 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1902 return false; 1903 } 1904 break; 1905 case Op_RoundVD: 1906 if (!VM_Version::supports_avx512dq()) { 1907 return false; 1908 } 1909 break; 1910 case Op_MulReductionVI: 1911 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1912 return false; 1913 } 1914 break; 1915 case Op_LoadVectorGatherMasked: 1916 case Op_StoreVectorScatterMasked: 1917 case Op_StoreVectorScatter: 1918 if (is_subword_type(bt)) { 1919 return false; 1920 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1921 return false; 1922 } 1923 // fallthrough 1924 case Op_LoadVectorGather: 1925 if (size_in_bits == 64 ) { 1926 return false; 1927 } 1928 break; 1929 case Op_MaskAll: 1930 if (!VM_Version::supports_evex()) { 1931 return false; 1932 } 1933 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1934 return false; 1935 } 1936 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1937 return false; 1938 } 1939 break; 1940 case Op_VectorMaskCmp: 1941 if (vlen < 2 || size_in_bits < 32) { 1942 return false; 1943 } 1944 break; 1945 case Op_CompressM: 1946 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1947 return false; 1948 } 1949 break; 1950 case Op_CompressV: 1951 case Op_ExpandV: 1952 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1953 return false; 1954 } 1955 if (size_in_bits < 128 ) { 1956 return false; 1957 } 1958 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1959 return false; 1960 } 1961 break; 1962 case Op_VectorLongToMask: 1963 if (UseAVX < 1 || !is_LP64) { 1964 return false; 1965 } 1966 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1967 return false; 1968 } 1969 break; 1970 case Op_SignumVD: 1971 case Op_SignumVF: 1972 if (UseAVX < 1) { 1973 return false; 1974 } 1975 break; 1976 case Op_PopCountVI: 1977 case Op_PopCountVL: { 1978 if (!is_pop_count_instr_target(bt) && 1979 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 1980 return false; 1981 } 1982 } 1983 break; 1984 case Op_ReverseV: 1985 case Op_ReverseBytesV: 1986 if (UseAVX < 2) { 1987 return false; 1988 } 1989 break; 1990 case Op_CountTrailingZerosV: 1991 case Op_CountLeadingZerosV: 1992 if (UseAVX < 2) { 1993 return false; 1994 } 1995 break; 1996 } 1997 return true; // Per default match rules are supported. 1998 } 1999 2000 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 2001 // ADLC based match_rule_supported routine checks for the existence of pattern based 2002 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 2003 // of their non-masked counterpart with mask edge being the differentiator. 2004 // This routine does a strict check on the existence of masked operation patterns 2005 // by returning a default false value for all the other opcodes apart from the 2006 // ones whose masked instruction patterns are defined in this file. 2007 if (!match_rule_supported_vector(opcode, vlen, bt)) { 2008 return false; 2009 } 2010 2011 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 2012 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 2013 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 2014 return false; 2015 } 2016 switch(opcode) { 2017 // Unary masked operations 2018 case Op_AbsVB: 2019 case Op_AbsVS: 2020 if(!VM_Version::supports_avx512bw()) { 2021 return false; // Implementation limitation 2022 } 2023 case Op_AbsVI: 2024 case Op_AbsVL: 2025 return true; 2026 2027 // Ternary masked operations 2028 case Op_FmaVF: 2029 case Op_FmaVD: 2030 return true; 2031 2032 case Op_MacroLogicV: 2033 if(bt != T_INT && bt != T_LONG) { 2034 return false; 2035 } 2036 return true; 2037 2038 // Binary masked operations 2039 case Op_AddVB: 2040 case Op_AddVS: 2041 case Op_SubVB: 2042 case Op_SubVS: 2043 case Op_MulVS: 2044 case Op_LShiftVS: 2045 case Op_RShiftVS: 2046 case Op_URShiftVS: 2047 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2048 if (!VM_Version::supports_avx512bw()) { 2049 return false; // Implementation limitation 2050 } 2051 return true; 2052 2053 case Op_MulVL: 2054 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2055 if (!VM_Version::supports_avx512dq()) { 2056 return false; // Implementation limitation 2057 } 2058 return true; 2059 2060 case Op_AndV: 2061 case Op_OrV: 2062 case Op_XorV: 2063 case Op_RotateRightV: 2064 case Op_RotateLeftV: 2065 if (bt != T_INT && bt != T_LONG) { 2066 return false; // Implementation limitation 2067 } 2068 return true; 2069 2070 case Op_VectorLoadMask: 2071 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2072 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2073 return false; 2074 } 2075 return true; 2076 2077 case Op_AddVI: 2078 case Op_AddVL: 2079 case Op_AddVF: 2080 case Op_AddVD: 2081 case Op_SubVI: 2082 case Op_SubVL: 2083 case Op_SubVF: 2084 case Op_SubVD: 2085 case Op_MulVI: 2086 case Op_MulVF: 2087 case Op_MulVD: 2088 case Op_DivVF: 2089 case Op_DivVD: 2090 case Op_SqrtVF: 2091 case Op_SqrtVD: 2092 case Op_LShiftVI: 2093 case Op_LShiftVL: 2094 case Op_RShiftVI: 2095 case Op_RShiftVL: 2096 case Op_URShiftVI: 2097 case Op_URShiftVL: 2098 case Op_LoadVectorMasked: 2099 case Op_StoreVectorMasked: 2100 case Op_LoadVectorGatherMasked: 2101 case Op_StoreVectorScatterMasked: 2102 return true; 2103 2104 case Op_MaxV: 2105 case Op_MinV: 2106 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2107 return false; // Implementation limitation 2108 } 2109 if (is_floating_point_type(bt)) { 2110 return false; // Implementation limitation 2111 } 2112 return true; 2113 2114 case Op_VectorMaskCmp: 2115 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2116 return false; // Implementation limitation 2117 } 2118 return true; 2119 2120 case Op_VectorRearrange: 2121 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2122 return false; // Implementation limitation 2123 } 2124 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2125 return false; // Implementation limitation 2126 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2127 return false; // Implementation limitation 2128 } 2129 return true; 2130 2131 // Binary Logical operations 2132 case Op_AndVMask: 2133 case Op_OrVMask: 2134 case Op_XorVMask: 2135 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2136 return false; // Implementation limitation 2137 } 2138 return true; 2139 2140 case Op_PopCountVI: 2141 case Op_PopCountVL: 2142 if (!is_pop_count_instr_target(bt)) { 2143 return false; 2144 } 2145 return true; 2146 2147 case Op_MaskAll: 2148 return true; 2149 2150 case Op_CountLeadingZerosV: 2151 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2152 return true; 2153 } 2154 default: 2155 return false; 2156 } 2157 } 2158 2159 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2160 return false; 2161 } 2162 2163 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2164 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2165 bool legacy = (generic_opnd->opcode() == LEGVEC); 2166 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2167 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2168 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2169 return new legVecZOper(); 2170 } 2171 if (legacy) { 2172 switch (ideal_reg) { 2173 case Op_VecS: return new legVecSOper(); 2174 case Op_VecD: return new legVecDOper(); 2175 case Op_VecX: return new legVecXOper(); 2176 case Op_VecY: return new legVecYOper(); 2177 case Op_VecZ: return new legVecZOper(); 2178 } 2179 } else { 2180 switch (ideal_reg) { 2181 case Op_VecS: return new vecSOper(); 2182 case Op_VecD: return new vecDOper(); 2183 case Op_VecX: return new vecXOper(); 2184 case Op_VecY: return new vecYOper(); 2185 case Op_VecZ: return new vecZOper(); 2186 } 2187 } 2188 ShouldNotReachHere(); 2189 return NULL; 2190 } 2191 2192 bool Matcher::is_reg2reg_move(MachNode* m) { 2193 switch (m->rule()) { 2194 case MoveVec2Leg_rule: 2195 case MoveLeg2Vec_rule: 2196 case MoveF2VL_rule: 2197 case MoveF2LEG_rule: 2198 case MoveVL2F_rule: 2199 case MoveLEG2F_rule: 2200 case MoveD2VL_rule: 2201 case MoveD2LEG_rule: 2202 case MoveVL2D_rule: 2203 case MoveLEG2D_rule: 2204 return true; 2205 default: 2206 return false; 2207 } 2208 } 2209 2210 bool Matcher::is_generic_vector(MachOper* opnd) { 2211 switch (opnd->opcode()) { 2212 case VEC: 2213 case LEGVEC: 2214 return true; 2215 default: 2216 return false; 2217 } 2218 } 2219 2220 //------------------------------------------------------------------------ 2221 2222 const RegMask* Matcher::predicate_reg_mask(void) { 2223 return &_VECTMASK_REG_mask; 2224 } 2225 2226 const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) { 2227 return new TypeVectMask(elemTy, length); 2228 } 2229 2230 // Max vector size in bytes. 0 if not supported. 2231 int Matcher::vector_width_in_bytes(BasicType bt) { 2232 assert(is_java_primitive(bt), "only primitive type vectors"); 2233 if (UseSSE < 2) return 0; 2234 // SSE2 supports 128bit vectors for all types. 2235 // AVX2 supports 256bit vectors for all types. 2236 // AVX2/EVEX supports 512bit vectors for all types. 2237 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2238 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2239 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2240 size = (UseAVX > 2) ? 64 : 32; 2241 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2242 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2243 // Use flag to limit vector size. 2244 size = MIN2(size,(int)MaxVectorSize); 2245 // Minimum 2 values in vector (or 4 for bytes). 2246 switch (bt) { 2247 case T_DOUBLE: 2248 case T_LONG: 2249 if (size < 16) return 0; 2250 break; 2251 case T_FLOAT: 2252 case T_INT: 2253 if (size < 8) return 0; 2254 break; 2255 case T_BOOLEAN: 2256 if (size < 4) return 0; 2257 break; 2258 case T_CHAR: 2259 if (size < 4) return 0; 2260 break; 2261 case T_BYTE: 2262 if (size < 4) return 0; 2263 break; 2264 case T_SHORT: 2265 if (size < 4) return 0; 2266 break; 2267 default: 2268 ShouldNotReachHere(); 2269 } 2270 return size; 2271 } 2272 2273 // Limits on vector size (number of elements) loaded into vector. 2274 int Matcher::max_vector_size(const BasicType bt) { 2275 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2276 } 2277 int Matcher::min_vector_size(const BasicType bt) { 2278 int max_size = max_vector_size(bt); 2279 // Min size which can be loaded into vector is 4 bytes. 2280 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2281 // Support for calling svml double64 vectors 2282 if (bt == T_DOUBLE) { 2283 size = 1; 2284 } 2285 return MIN2(size,max_size); 2286 } 2287 2288 int Matcher::superword_max_vector_size(const BasicType bt) { 2289 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2290 // by default on Cascade Lake 2291 if (VM_Version::is_default_intel_cascade_lake()) { 2292 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2293 } 2294 return Matcher::max_vector_size(bt); 2295 } 2296 2297 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2298 return -1; 2299 } 2300 2301 // Vector ideal reg corresponding to specified size in bytes 2302 uint Matcher::vector_ideal_reg(int size) { 2303 assert(MaxVectorSize >= size, ""); 2304 switch(size) { 2305 case 4: return Op_VecS; 2306 case 8: return Op_VecD; 2307 case 16: return Op_VecX; 2308 case 32: return Op_VecY; 2309 case 64: return Op_VecZ; 2310 } 2311 ShouldNotReachHere(); 2312 return 0; 2313 } 2314 2315 // Check for shift by small constant as well 2316 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2317 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2318 shift->in(2)->get_int() <= 3 && 2319 // Are there other uses besides address expressions? 2320 !matcher->is_visited(shift)) { 2321 address_visited.set(shift->_idx); // Flag as address_visited 2322 mstack.push(shift->in(2), Matcher::Visit); 2323 Node *conv = shift->in(1); 2324 #ifdef _LP64 2325 // Allow Matcher to match the rule which bypass 2326 // ConvI2L operation for an array index on LP64 2327 // if the index value is positive. 2328 if (conv->Opcode() == Op_ConvI2L && 2329 conv->as_Type()->type()->is_long()->_lo >= 0 && 2330 // Are there other uses besides address expressions? 2331 !matcher->is_visited(conv)) { 2332 address_visited.set(conv->_idx); // Flag as address_visited 2333 mstack.push(conv->in(1), Matcher::Pre_Visit); 2334 } else 2335 #endif 2336 mstack.push(conv, Matcher::Pre_Visit); 2337 return true; 2338 } 2339 return false; 2340 } 2341 2342 // This function identifies sub-graphs in which a 'load' node is 2343 // input to two different nodes, and such that it can be matched 2344 // with BMI instructions like blsi, blsr, etc. 2345 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2346 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2347 // refers to the same node. 2348 // 2349 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2350 // This is a temporary solution until we make DAGs expressible in ADL. 2351 template<typename ConType> 2352 class FusedPatternMatcher { 2353 Node* _op1_node; 2354 Node* _mop_node; 2355 int _con_op; 2356 2357 static int match_next(Node* n, int next_op, int next_op_idx) { 2358 if (n->in(1) == NULL || n->in(2) == NULL) { 2359 return -1; 2360 } 2361 2362 if (next_op_idx == -1) { // n is commutative, try rotations 2363 if (n->in(1)->Opcode() == next_op) { 2364 return 1; 2365 } else if (n->in(2)->Opcode() == next_op) { 2366 return 2; 2367 } 2368 } else { 2369 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2370 if (n->in(next_op_idx)->Opcode() == next_op) { 2371 return next_op_idx; 2372 } 2373 } 2374 return -1; 2375 } 2376 2377 public: 2378 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2379 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2380 2381 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2382 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2383 typename ConType::NativeType con_value) { 2384 if (_op1_node->Opcode() != op1) { 2385 return false; 2386 } 2387 if (_mop_node->outcnt() > 2) { 2388 return false; 2389 } 2390 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2391 if (op1_op2_idx == -1) { 2392 return false; 2393 } 2394 // Memory operation must be the other edge 2395 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2396 2397 // Check that the mop node is really what we want 2398 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2399 Node* op2_node = _op1_node->in(op1_op2_idx); 2400 if (op2_node->outcnt() > 1) { 2401 return false; 2402 } 2403 assert(op2_node->Opcode() == op2, "Should be"); 2404 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2405 if (op2_con_idx == -1) { 2406 return false; 2407 } 2408 // Memory operation must be the other edge 2409 int op2_mop_idx = (op2_con_idx & 1) + 1; 2410 // Check that the memory operation is the same node 2411 if (op2_node->in(op2_mop_idx) == _mop_node) { 2412 // Now check the constant 2413 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2414 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2415 return true; 2416 } 2417 } 2418 } 2419 return false; 2420 } 2421 }; 2422 2423 static bool is_bmi_pattern(Node* n, Node* m) { 2424 assert(UseBMI1Instructions, "sanity"); 2425 if (n != NULL && m != NULL) { 2426 if (m->Opcode() == Op_LoadI) { 2427 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2428 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2429 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2430 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2431 } else if (m->Opcode() == Op_LoadL) { 2432 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2433 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2434 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2435 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2436 } 2437 } 2438 return false; 2439 } 2440 2441 // Should the matcher clone input 'm' of node 'n'? 2442 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2443 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2444 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2445 mstack.push(m, Visit); 2446 return true; 2447 } 2448 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2449 mstack.push(m, Visit); // m = ShiftCntV 2450 return true; 2451 } 2452 return false; 2453 } 2454 2455 // Should the Matcher clone shifts on addressing modes, expecting them 2456 // to be subsumed into complex addressing expressions or compute them 2457 // into registers? 2458 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2459 Node *off = m->in(AddPNode::Offset); 2460 if (off->is_Con()) { 2461 address_visited.test_set(m->_idx); // Flag as address_visited 2462 Node *adr = m->in(AddPNode::Address); 2463 2464 // Intel can handle 2 adds in addressing mode 2465 // AtomicAdd is not an addressing expression. 2466 // Cheap to find it by looking for screwy base. 2467 if (adr->is_AddP() && 2468 !adr->in(AddPNode::Base)->is_top() && 2469 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2470 // Are there other uses besides address expressions? 2471 !is_visited(adr)) { 2472 address_visited.set(adr->_idx); // Flag as address_visited 2473 Node *shift = adr->in(AddPNode::Offset); 2474 if (!clone_shift(shift, this, mstack, address_visited)) { 2475 mstack.push(shift, Pre_Visit); 2476 } 2477 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2478 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2479 } else { 2480 mstack.push(adr, Pre_Visit); 2481 } 2482 2483 // Clone X+offset as it also folds into most addressing expressions 2484 mstack.push(off, Visit); 2485 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2486 return true; 2487 } else if (clone_shift(off, this, mstack, address_visited)) { 2488 address_visited.test_set(m->_idx); // Flag as address_visited 2489 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2490 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2491 return true; 2492 } 2493 return false; 2494 } 2495 2496 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2497 switch (bt) { 2498 case BoolTest::eq: 2499 return Assembler::eq; 2500 case BoolTest::ne: 2501 return Assembler::neq; 2502 case BoolTest::le: 2503 case BoolTest::ule: 2504 return Assembler::le; 2505 case BoolTest::ge: 2506 case BoolTest::uge: 2507 return Assembler::nlt; 2508 case BoolTest::lt: 2509 case BoolTest::ult: 2510 return Assembler::lt; 2511 case BoolTest::gt: 2512 case BoolTest::ugt: 2513 return Assembler::nle; 2514 default : ShouldNotReachHere(); return Assembler::_false; 2515 } 2516 } 2517 2518 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2519 switch (bt) { 2520 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2521 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2522 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2523 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2524 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2525 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2526 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2527 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2528 } 2529 } 2530 2531 // Helper methods for MachSpillCopyNode::implementation(). 2532 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 2533 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2534 assert(ireg == Op_VecS || // 32bit vector 2535 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2536 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 2537 "no non-adjacent vector moves" ); 2538 if (cbuf) { 2539 C2_MacroAssembler _masm(cbuf); 2540 switch (ireg) { 2541 case Op_VecS: // copy whole register 2542 case Op_VecD: 2543 case Op_VecX: 2544 #ifndef _LP64 2545 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2546 #else 2547 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2548 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2549 } else { 2550 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2551 } 2552 #endif 2553 break; 2554 case Op_VecY: 2555 #ifndef _LP64 2556 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2557 #else 2558 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2559 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2560 } else { 2561 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2562 } 2563 #endif 2564 break; 2565 case Op_VecZ: 2566 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2567 break; 2568 default: 2569 ShouldNotReachHere(); 2570 } 2571 #ifndef PRODUCT 2572 } else { 2573 switch (ireg) { 2574 case Op_VecS: 2575 case Op_VecD: 2576 case Op_VecX: 2577 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2578 break; 2579 case Op_VecY: 2580 case Op_VecZ: 2581 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2582 break; 2583 default: 2584 ShouldNotReachHere(); 2585 } 2586 #endif 2587 } 2588 } 2589 2590 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 2591 int stack_offset, int reg, uint ireg, outputStream* st) { 2592 if (cbuf) { 2593 C2_MacroAssembler _masm(cbuf); 2594 if (is_load) { 2595 switch (ireg) { 2596 case Op_VecS: 2597 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2598 break; 2599 case Op_VecD: 2600 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2601 break; 2602 case Op_VecX: 2603 #ifndef _LP64 2604 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2605 #else 2606 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2607 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2608 } else { 2609 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2610 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2611 } 2612 #endif 2613 break; 2614 case Op_VecY: 2615 #ifndef _LP64 2616 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2617 #else 2618 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2619 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2620 } else { 2621 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2622 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2623 } 2624 #endif 2625 break; 2626 case Op_VecZ: 2627 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2628 break; 2629 default: 2630 ShouldNotReachHere(); 2631 } 2632 } else { // store 2633 switch (ireg) { 2634 case Op_VecS: 2635 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2636 break; 2637 case Op_VecD: 2638 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2639 break; 2640 case Op_VecX: 2641 #ifndef _LP64 2642 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2643 #else 2644 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2645 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2646 } 2647 else { 2648 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2649 } 2650 #endif 2651 break; 2652 case Op_VecY: 2653 #ifndef _LP64 2654 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2655 #else 2656 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2657 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2658 } 2659 else { 2660 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2661 } 2662 #endif 2663 break; 2664 case Op_VecZ: 2665 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2666 break; 2667 default: 2668 ShouldNotReachHere(); 2669 } 2670 } 2671 #ifndef PRODUCT 2672 } else { 2673 if (is_load) { 2674 switch (ireg) { 2675 case Op_VecS: 2676 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2677 break; 2678 case Op_VecD: 2679 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2680 break; 2681 case Op_VecX: 2682 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2683 break; 2684 case Op_VecY: 2685 case Op_VecZ: 2686 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2687 break; 2688 default: 2689 ShouldNotReachHere(); 2690 } 2691 } else { // store 2692 switch (ireg) { 2693 case Op_VecS: 2694 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2695 break; 2696 case Op_VecD: 2697 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2698 break; 2699 case Op_VecX: 2700 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2701 break; 2702 case Op_VecY: 2703 case Op_VecZ: 2704 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2705 break; 2706 default: 2707 ShouldNotReachHere(); 2708 } 2709 } 2710 #endif 2711 } 2712 } 2713 2714 template <class T> 2715 static inline GrowableArray<jvalue>* vreplicate_imm(BasicType bt, T con, int len) { 2716 GrowableArray<jvalue>* val = new GrowableArray<jvalue>(len); 2717 jvalue ele; 2718 switch (bt) { 2719 case T_BYTE: ele.b = con; break; 2720 case T_SHORT: ele.s = con; break; 2721 case T_INT: ele.i = con; break; 2722 case T_LONG: ele.j = con; break; 2723 case T_FLOAT: ele.f = con; break; 2724 case T_DOUBLE: ele.d = con; break; 2725 default: ShouldNotReachHere(); 2726 } 2727 for (int i = 0; i < len; i++) { 2728 val->append(ele); 2729 } 2730 return val; 2731 } 2732 2733 static inline jlong high_bit_set(BasicType bt) { 2734 switch (bt) { 2735 case T_BYTE: return 0x8080808080808080; 2736 case T_SHORT: return 0x8000800080008000; 2737 case T_INT: return 0x8000000080000000; 2738 case T_LONG: return 0x8000000000000000; 2739 default: 2740 ShouldNotReachHere(); 2741 return 0; 2742 } 2743 } 2744 2745 #ifndef PRODUCT 2746 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2747 st->print("nop \t# %d bytes pad for loops and calls", _count); 2748 } 2749 #endif 2750 2751 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2752 C2_MacroAssembler _masm(&cbuf); 2753 __ nop(_count); 2754 } 2755 2756 uint MachNopNode::size(PhaseRegAlloc*) const { 2757 return _count; 2758 } 2759 2760 #ifndef PRODUCT 2761 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2762 st->print("# breakpoint"); 2763 } 2764 #endif 2765 2766 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2767 C2_MacroAssembler _masm(&cbuf); 2768 __ int3(); 2769 } 2770 2771 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2772 return MachNode::size(ra_); 2773 } 2774 2775 %} 2776 2777 encode %{ 2778 2779 enc_class call_epilog %{ 2780 C2_MacroAssembler _masm(&cbuf); 2781 if (VerifyStackAtCalls) { 2782 // Check that stack depth is unchanged: find majik cookie on stack 2783 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2784 Label L; 2785 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2786 __ jccb(Assembler::equal, L); 2787 // Die if stack mismatch 2788 __ int3(); 2789 __ bind(L); 2790 } 2791 %} 2792 2793 %} 2794 2795 // Operands for bound floating pointer register arguments 2796 operand rxmm0() %{ 2797 constraint(ALLOC_IN_RC(xmm0_reg)); 2798 match(VecX); 2799 format%{%} 2800 interface(REG_INTER); 2801 %} 2802 2803 //----------OPERANDS----------------------------------------------------------- 2804 // Operand definitions must precede instruction definitions for correct parsing 2805 // in the ADLC because operands constitute user defined types which are used in 2806 // instruction definitions. 2807 2808 // Vectors 2809 2810 // Dummy generic vector class. Should be used for all vector operands. 2811 // Replaced with vec[SDXYZ] during post-selection pass. 2812 operand vec() %{ 2813 constraint(ALLOC_IN_RC(dynamic)); 2814 match(VecX); 2815 match(VecY); 2816 match(VecZ); 2817 match(VecS); 2818 match(VecD); 2819 2820 format %{ %} 2821 interface(REG_INTER); 2822 %} 2823 2824 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2825 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2826 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2827 // runtime code generation via reg_class_dynamic. 2828 operand legVec() %{ 2829 constraint(ALLOC_IN_RC(dynamic)); 2830 match(VecX); 2831 match(VecY); 2832 match(VecZ); 2833 match(VecS); 2834 match(VecD); 2835 2836 format %{ %} 2837 interface(REG_INTER); 2838 %} 2839 2840 // Replaces vec during post-selection cleanup. See above. 2841 operand vecS() %{ 2842 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2843 match(VecS); 2844 2845 format %{ %} 2846 interface(REG_INTER); 2847 %} 2848 2849 // Replaces legVec during post-selection cleanup. See above. 2850 operand legVecS() %{ 2851 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2852 match(VecS); 2853 2854 format %{ %} 2855 interface(REG_INTER); 2856 %} 2857 2858 // Replaces vec during post-selection cleanup. See above. 2859 operand vecD() %{ 2860 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2861 match(VecD); 2862 2863 format %{ %} 2864 interface(REG_INTER); 2865 %} 2866 2867 // Replaces legVec during post-selection cleanup. See above. 2868 operand legVecD() %{ 2869 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2870 match(VecD); 2871 2872 format %{ %} 2873 interface(REG_INTER); 2874 %} 2875 2876 // Replaces vec during post-selection cleanup. See above. 2877 operand vecX() %{ 2878 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2879 match(VecX); 2880 2881 format %{ %} 2882 interface(REG_INTER); 2883 %} 2884 2885 // Replaces legVec during post-selection cleanup. See above. 2886 operand legVecX() %{ 2887 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2888 match(VecX); 2889 2890 format %{ %} 2891 interface(REG_INTER); 2892 %} 2893 2894 // Replaces vec during post-selection cleanup. See above. 2895 operand vecY() %{ 2896 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2897 match(VecY); 2898 2899 format %{ %} 2900 interface(REG_INTER); 2901 %} 2902 2903 // Replaces legVec during post-selection cleanup. See above. 2904 operand legVecY() %{ 2905 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2906 match(VecY); 2907 2908 format %{ %} 2909 interface(REG_INTER); 2910 %} 2911 2912 // Replaces vec during post-selection cleanup. See above. 2913 operand vecZ() %{ 2914 constraint(ALLOC_IN_RC(vectorz_reg)); 2915 match(VecZ); 2916 2917 format %{ %} 2918 interface(REG_INTER); 2919 %} 2920 2921 // Replaces legVec during post-selection cleanup. See above. 2922 operand legVecZ() %{ 2923 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2924 match(VecZ); 2925 2926 format %{ %} 2927 interface(REG_INTER); 2928 %} 2929 2930 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2931 2932 // ============================================================================ 2933 2934 instruct ShouldNotReachHere() %{ 2935 match(Halt); 2936 format %{ "stop\t# ShouldNotReachHere" %} 2937 ins_encode %{ 2938 if (is_reachable()) { 2939 __ stop(_halt_reason); 2940 } 2941 %} 2942 ins_pipe(pipe_slow); 2943 %} 2944 2945 // ============================================================================ 2946 2947 instruct addF_reg(regF dst, regF src) %{ 2948 predicate((UseSSE>=1) && (UseAVX == 0)); 2949 match(Set dst (AddF dst src)); 2950 2951 format %{ "addss $dst, $src" %} 2952 ins_cost(150); 2953 ins_encode %{ 2954 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2955 %} 2956 ins_pipe(pipe_slow); 2957 %} 2958 2959 instruct addF_mem(regF dst, memory src) %{ 2960 predicate((UseSSE>=1) && (UseAVX == 0)); 2961 match(Set dst (AddF dst (LoadF src))); 2962 2963 format %{ "addss $dst, $src" %} 2964 ins_cost(150); 2965 ins_encode %{ 2966 __ addss($dst$$XMMRegister, $src$$Address); 2967 %} 2968 ins_pipe(pipe_slow); 2969 %} 2970 2971 instruct addF_imm(regF dst, immF con) %{ 2972 predicate((UseSSE>=1) && (UseAVX == 0)); 2973 match(Set dst (AddF dst con)); 2974 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2975 ins_cost(150); 2976 ins_encode %{ 2977 __ addss($dst$$XMMRegister, $constantaddress($con)); 2978 %} 2979 ins_pipe(pipe_slow); 2980 %} 2981 2982 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2983 predicate(UseAVX > 0); 2984 match(Set dst (AddF src1 src2)); 2985 2986 format %{ "vaddss $dst, $src1, $src2" %} 2987 ins_cost(150); 2988 ins_encode %{ 2989 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2990 %} 2991 ins_pipe(pipe_slow); 2992 %} 2993 2994 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2995 predicate(UseAVX > 0); 2996 match(Set dst (AddF src1 (LoadF src2))); 2997 2998 format %{ "vaddss $dst, $src1, $src2" %} 2999 ins_cost(150); 3000 ins_encode %{ 3001 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3002 %} 3003 ins_pipe(pipe_slow); 3004 %} 3005 3006 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 3007 predicate(UseAVX > 0); 3008 match(Set dst (AddF src con)); 3009 3010 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3011 ins_cost(150); 3012 ins_encode %{ 3013 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3014 %} 3015 ins_pipe(pipe_slow); 3016 %} 3017 3018 instruct addD_reg(regD dst, regD src) %{ 3019 predicate((UseSSE>=2) && (UseAVX == 0)); 3020 match(Set dst (AddD dst src)); 3021 3022 format %{ "addsd $dst, $src" %} 3023 ins_cost(150); 3024 ins_encode %{ 3025 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 3026 %} 3027 ins_pipe(pipe_slow); 3028 %} 3029 3030 instruct addD_mem(regD dst, memory src) %{ 3031 predicate((UseSSE>=2) && (UseAVX == 0)); 3032 match(Set dst (AddD dst (LoadD src))); 3033 3034 format %{ "addsd $dst, $src" %} 3035 ins_cost(150); 3036 ins_encode %{ 3037 __ addsd($dst$$XMMRegister, $src$$Address); 3038 %} 3039 ins_pipe(pipe_slow); 3040 %} 3041 3042 instruct addD_imm(regD dst, immD con) %{ 3043 predicate((UseSSE>=2) && (UseAVX == 0)); 3044 match(Set dst (AddD dst con)); 3045 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3046 ins_cost(150); 3047 ins_encode %{ 3048 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3049 %} 3050 ins_pipe(pipe_slow); 3051 %} 3052 3053 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3054 predicate(UseAVX > 0); 3055 match(Set dst (AddD src1 src2)); 3056 3057 format %{ "vaddsd $dst, $src1, $src2" %} 3058 ins_cost(150); 3059 ins_encode %{ 3060 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3061 %} 3062 ins_pipe(pipe_slow); 3063 %} 3064 3065 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3066 predicate(UseAVX > 0); 3067 match(Set dst (AddD src1 (LoadD src2))); 3068 3069 format %{ "vaddsd $dst, $src1, $src2" %} 3070 ins_cost(150); 3071 ins_encode %{ 3072 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3073 %} 3074 ins_pipe(pipe_slow); 3075 %} 3076 3077 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3078 predicate(UseAVX > 0); 3079 match(Set dst (AddD src con)); 3080 3081 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3082 ins_cost(150); 3083 ins_encode %{ 3084 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3085 %} 3086 ins_pipe(pipe_slow); 3087 %} 3088 3089 instruct subF_reg(regF dst, regF src) %{ 3090 predicate((UseSSE>=1) && (UseAVX == 0)); 3091 match(Set dst (SubF dst src)); 3092 3093 format %{ "subss $dst, $src" %} 3094 ins_cost(150); 3095 ins_encode %{ 3096 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3097 %} 3098 ins_pipe(pipe_slow); 3099 %} 3100 3101 instruct subF_mem(regF dst, memory src) %{ 3102 predicate((UseSSE>=1) && (UseAVX == 0)); 3103 match(Set dst (SubF dst (LoadF src))); 3104 3105 format %{ "subss $dst, $src" %} 3106 ins_cost(150); 3107 ins_encode %{ 3108 __ subss($dst$$XMMRegister, $src$$Address); 3109 %} 3110 ins_pipe(pipe_slow); 3111 %} 3112 3113 instruct subF_imm(regF dst, immF con) %{ 3114 predicate((UseSSE>=1) && (UseAVX == 0)); 3115 match(Set dst (SubF dst con)); 3116 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3117 ins_cost(150); 3118 ins_encode %{ 3119 __ subss($dst$$XMMRegister, $constantaddress($con)); 3120 %} 3121 ins_pipe(pipe_slow); 3122 %} 3123 3124 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3125 predicate(UseAVX > 0); 3126 match(Set dst (SubF src1 src2)); 3127 3128 format %{ "vsubss $dst, $src1, $src2" %} 3129 ins_cost(150); 3130 ins_encode %{ 3131 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3132 %} 3133 ins_pipe(pipe_slow); 3134 %} 3135 3136 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3137 predicate(UseAVX > 0); 3138 match(Set dst (SubF src1 (LoadF src2))); 3139 3140 format %{ "vsubss $dst, $src1, $src2" %} 3141 ins_cost(150); 3142 ins_encode %{ 3143 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3144 %} 3145 ins_pipe(pipe_slow); 3146 %} 3147 3148 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3149 predicate(UseAVX > 0); 3150 match(Set dst (SubF src con)); 3151 3152 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3153 ins_cost(150); 3154 ins_encode %{ 3155 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3156 %} 3157 ins_pipe(pipe_slow); 3158 %} 3159 3160 instruct subD_reg(regD dst, regD src) %{ 3161 predicate((UseSSE>=2) && (UseAVX == 0)); 3162 match(Set dst (SubD dst src)); 3163 3164 format %{ "subsd $dst, $src" %} 3165 ins_cost(150); 3166 ins_encode %{ 3167 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3168 %} 3169 ins_pipe(pipe_slow); 3170 %} 3171 3172 instruct subD_mem(regD dst, memory src) %{ 3173 predicate((UseSSE>=2) && (UseAVX == 0)); 3174 match(Set dst (SubD dst (LoadD src))); 3175 3176 format %{ "subsd $dst, $src" %} 3177 ins_cost(150); 3178 ins_encode %{ 3179 __ subsd($dst$$XMMRegister, $src$$Address); 3180 %} 3181 ins_pipe(pipe_slow); 3182 %} 3183 3184 instruct subD_imm(regD dst, immD con) %{ 3185 predicate((UseSSE>=2) && (UseAVX == 0)); 3186 match(Set dst (SubD dst con)); 3187 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3188 ins_cost(150); 3189 ins_encode %{ 3190 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3191 %} 3192 ins_pipe(pipe_slow); 3193 %} 3194 3195 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3196 predicate(UseAVX > 0); 3197 match(Set dst (SubD src1 src2)); 3198 3199 format %{ "vsubsd $dst, $src1, $src2" %} 3200 ins_cost(150); 3201 ins_encode %{ 3202 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3203 %} 3204 ins_pipe(pipe_slow); 3205 %} 3206 3207 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3208 predicate(UseAVX > 0); 3209 match(Set dst (SubD src1 (LoadD src2))); 3210 3211 format %{ "vsubsd $dst, $src1, $src2" %} 3212 ins_cost(150); 3213 ins_encode %{ 3214 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3215 %} 3216 ins_pipe(pipe_slow); 3217 %} 3218 3219 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3220 predicate(UseAVX > 0); 3221 match(Set dst (SubD src con)); 3222 3223 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3224 ins_cost(150); 3225 ins_encode %{ 3226 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3227 %} 3228 ins_pipe(pipe_slow); 3229 %} 3230 3231 instruct mulF_reg(regF dst, regF src) %{ 3232 predicate((UseSSE>=1) && (UseAVX == 0)); 3233 match(Set dst (MulF dst src)); 3234 3235 format %{ "mulss $dst, $src" %} 3236 ins_cost(150); 3237 ins_encode %{ 3238 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3239 %} 3240 ins_pipe(pipe_slow); 3241 %} 3242 3243 instruct mulF_mem(regF dst, memory src) %{ 3244 predicate((UseSSE>=1) && (UseAVX == 0)); 3245 match(Set dst (MulF dst (LoadF src))); 3246 3247 format %{ "mulss $dst, $src" %} 3248 ins_cost(150); 3249 ins_encode %{ 3250 __ mulss($dst$$XMMRegister, $src$$Address); 3251 %} 3252 ins_pipe(pipe_slow); 3253 %} 3254 3255 instruct mulF_imm(regF dst, immF con) %{ 3256 predicate((UseSSE>=1) && (UseAVX == 0)); 3257 match(Set dst (MulF dst con)); 3258 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3259 ins_cost(150); 3260 ins_encode %{ 3261 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3262 %} 3263 ins_pipe(pipe_slow); 3264 %} 3265 3266 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3267 predicate(UseAVX > 0); 3268 match(Set dst (MulF src1 src2)); 3269 3270 format %{ "vmulss $dst, $src1, $src2" %} 3271 ins_cost(150); 3272 ins_encode %{ 3273 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3274 %} 3275 ins_pipe(pipe_slow); 3276 %} 3277 3278 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3279 predicate(UseAVX > 0); 3280 match(Set dst (MulF src1 (LoadF src2))); 3281 3282 format %{ "vmulss $dst, $src1, $src2" %} 3283 ins_cost(150); 3284 ins_encode %{ 3285 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3286 %} 3287 ins_pipe(pipe_slow); 3288 %} 3289 3290 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3291 predicate(UseAVX > 0); 3292 match(Set dst (MulF src con)); 3293 3294 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3295 ins_cost(150); 3296 ins_encode %{ 3297 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3298 %} 3299 ins_pipe(pipe_slow); 3300 %} 3301 3302 instruct mulD_reg(regD dst, regD src) %{ 3303 predicate((UseSSE>=2) && (UseAVX == 0)); 3304 match(Set dst (MulD dst src)); 3305 3306 format %{ "mulsd $dst, $src" %} 3307 ins_cost(150); 3308 ins_encode %{ 3309 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3310 %} 3311 ins_pipe(pipe_slow); 3312 %} 3313 3314 instruct mulD_mem(regD dst, memory src) %{ 3315 predicate((UseSSE>=2) && (UseAVX == 0)); 3316 match(Set dst (MulD dst (LoadD src))); 3317 3318 format %{ "mulsd $dst, $src" %} 3319 ins_cost(150); 3320 ins_encode %{ 3321 __ mulsd($dst$$XMMRegister, $src$$Address); 3322 %} 3323 ins_pipe(pipe_slow); 3324 %} 3325 3326 instruct mulD_imm(regD dst, immD con) %{ 3327 predicate((UseSSE>=2) && (UseAVX == 0)); 3328 match(Set dst (MulD dst con)); 3329 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3330 ins_cost(150); 3331 ins_encode %{ 3332 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3333 %} 3334 ins_pipe(pipe_slow); 3335 %} 3336 3337 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3338 predicate(UseAVX > 0); 3339 match(Set dst (MulD src1 src2)); 3340 3341 format %{ "vmulsd $dst, $src1, $src2" %} 3342 ins_cost(150); 3343 ins_encode %{ 3344 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3345 %} 3346 ins_pipe(pipe_slow); 3347 %} 3348 3349 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3350 predicate(UseAVX > 0); 3351 match(Set dst (MulD src1 (LoadD src2))); 3352 3353 format %{ "vmulsd $dst, $src1, $src2" %} 3354 ins_cost(150); 3355 ins_encode %{ 3356 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3357 %} 3358 ins_pipe(pipe_slow); 3359 %} 3360 3361 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3362 predicate(UseAVX > 0); 3363 match(Set dst (MulD src con)); 3364 3365 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3366 ins_cost(150); 3367 ins_encode %{ 3368 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3369 %} 3370 ins_pipe(pipe_slow); 3371 %} 3372 3373 instruct divF_reg(regF dst, regF src) %{ 3374 predicate((UseSSE>=1) && (UseAVX == 0)); 3375 match(Set dst (DivF dst src)); 3376 3377 format %{ "divss $dst, $src" %} 3378 ins_cost(150); 3379 ins_encode %{ 3380 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3381 %} 3382 ins_pipe(pipe_slow); 3383 %} 3384 3385 instruct divF_mem(regF dst, memory src) %{ 3386 predicate((UseSSE>=1) && (UseAVX == 0)); 3387 match(Set dst (DivF dst (LoadF src))); 3388 3389 format %{ "divss $dst, $src" %} 3390 ins_cost(150); 3391 ins_encode %{ 3392 __ divss($dst$$XMMRegister, $src$$Address); 3393 %} 3394 ins_pipe(pipe_slow); 3395 %} 3396 3397 instruct divF_imm(regF dst, immF con) %{ 3398 predicate((UseSSE>=1) && (UseAVX == 0)); 3399 match(Set dst (DivF dst con)); 3400 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3401 ins_cost(150); 3402 ins_encode %{ 3403 __ divss($dst$$XMMRegister, $constantaddress($con)); 3404 %} 3405 ins_pipe(pipe_slow); 3406 %} 3407 3408 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3409 predicate(UseAVX > 0); 3410 match(Set dst (DivF src1 src2)); 3411 3412 format %{ "vdivss $dst, $src1, $src2" %} 3413 ins_cost(150); 3414 ins_encode %{ 3415 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3416 %} 3417 ins_pipe(pipe_slow); 3418 %} 3419 3420 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3421 predicate(UseAVX > 0); 3422 match(Set dst (DivF src1 (LoadF src2))); 3423 3424 format %{ "vdivss $dst, $src1, $src2" %} 3425 ins_cost(150); 3426 ins_encode %{ 3427 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3428 %} 3429 ins_pipe(pipe_slow); 3430 %} 3431 3432 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3433 predicate(UseAVX > 0); 3434 match(Set dst (DivF src con)); 3435 3436 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3437 ins_cost(150); 3438 ins_encode %{ 3439 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3440 %} 3441 ins_pipe(pipe_slow); 3442 %} 3443 3444 instruct divD_reg(regD dst, regD src) %{ 3445 predicate((UseSSE>=2) && (UseAVX == 0)); 3446 match(Set dst (DivD dst src)); 3447 3448 format %{ "divsd $dst, $src" %} 3449 ins_cost(150); 3450 ins_encode %{ 3451 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3452 %} 3453 ins_pipe(pipe_slow); 3454 %} 3455 3456 instruct divD_mem(regD dst, memory src) %{ 3457 predicate((UseSSE>=2) && (UseAVX == 0)); 3458 match(Set dst (DivD dst (LoadD src))); 3459 3460 format %{ "divsd $dst, $src" %} 3461 ins_cost(150); 3462 ins_encode %{ 3463 __ divsd($dst$$XMMRegister, $src$$Address); 3464 %} 3465 ins_pipe(pipe_slow); 3466 %} 3467 3468 instruct divD_imm(regD dst, immD con) %{ 3469 predicate((UseSSE>=2) && (UseAVX == 0)); 3470 match(Set dst (DivD dst con)); 3471 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3472 ins_cost(150); 3473 ins_encode %{ 3474 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3475 %} 3476 ins_pipe(pipe_slow); 3477 %} 3478 3479 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3480 predicate(UseAVX > 0); 3481 match(Set dst (DivD src1 src2)); 3482 3483 format %{ "vdivsd $dst, $src1, $src2" %} 3484 ins_cost(150); 3485 ins_encode %{ 3486 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3487 %} 3488 ins_pipe(pipe_slow); 3489 %} 3490 3491 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3492 predicate(UseAVX > 0); 3493 match(Set dst (DivD src1 (LoadD src2))); 3494 3495 format %{ "vdivsd $dst, $src1, $src2" %} 3496 ins_cost(150); 3497 ins_encode %{ 3498 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3499 %} 3500 ins_pipe(pipe_slow); 3501 %} 3502 3503 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3504 predicate(UseAVX > 0); 3505 match(Set dst (DivD src con)); 3506 3507 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3508 ins_cost(150); 3509 ins_encode %{ 3510 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3511 %} 3512 ins_pipe(pipe_slow); 3513 %} 3514 3515 instruct absF_reg(regF dst) %{ 3516 predicate((UseSSE>=1) && (UseAVX == 0)); 3517 match(Set dst (AbsF dst)); 3518 ins_cost(150); 3519 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3520 ins_encode %{ 3521 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3522 %} 3523 ins_pipe(pipe_slow); 3524 %} 3525 3526 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3527 predicate(UseAVX > 0); 3528 match(Set dst (AbsF src)); 3529 ins_cost(150); 3530 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3531 ins_encode %{ 3532 int vlen_enc = Assembler::AVX_128bit; 3533 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3534 ExternalAddress(float_signmask()), vlen_enc); 3535 %} 3536 ins_pipe(pipe_slow); 3537 %} 3538 3539 instruct absD_reg(regD dst) %{ 3540 predicate((UseSSE>=2) && (UseAVX == 0)); 3541 match(Set dst (AbsD dst)); 3542 ins_cost(150); 3543 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3544 "# abs double by sign masking" %} 3545 ins_encode %{ 3546 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3547 %} 3548 ins_pipe(pipe_slow); 3549 %} 3550 3551 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3552 predicate(UseAVX > 0); 3553 match(Set dst (AbsD src)); 3554 ins_cost(150); 3555 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3556 "# abs double by sign masking" %} 3557 ins_encode %{ 3558 int vlen_enc = Assembler::AVX_128bit; 3559 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3560 ExternalAddress(double_signmask()), vlen_enc); 3561 %} 3562 ins_pipe(pipe_slow); 3563 %} 3564 3565 instruct negF_reg(regF dst) %{ 3566 predicate((UseSSE>=1) && (UseAVX == 0)); 3567 match(Set dst (NegF dst)); 3568 ins_cost(150); 3569 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3570 ins_encode %{ 3571 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3572 %} 3573 ins_pipe(pipe_slow); 3574 %} 3575 3576 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3577 predicate(UseAVX > 0); 3578 match(Set dst (NegF src)); 3579 ins_cost(150); 3580 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3581 ins_encode %{ 3582 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3583 ExternalAddress(float_signflip())); 3584 %} 3585 ins_pipe(pipe_slow); 3586 %} 3587 3588 instruct negD_reg(regD dst) %{ 3589 predicate((UseSSE>=2) && (UseAVX == 0)); 3590 match(Set dst (NegD dst)); 3591 ins_cost(150); 3592 format %{ "xorpd $dst, [0x8000000000000000]\t" 3593 "# neg double by sign flipping" %} 3594 ins_encode %{ 3595 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3596 %} 3597 ins_pipe(pipe_slow); 3598 %} 3599 3600 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3601 predicate(UseAVX > 0); 3602 match(Set dst (NegD src)); 3603 ins_cost(150); 3604 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3605 "# neg double by sign flipping" %} 3606 ins_encode %{ 3607 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3608 ExternalAddress(double_signflip())); 3609 %} 3610 ins_pipe(pipe_slow); 3611 %} 3612 3613 // sqrtss instruction needs destination register to be pre initialized for best performance 3614 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3615 instruct sqrtF_reg(regF dst) %{ 3616 predicate(UseSSE>=1); 3617 match(Set dst (SqrtF dst)); 3618 format %{ "sqrtss $dst, $dst" %} 3619 ins_encode %{ 3620 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3621 %} 3622 ins_pipe(pipe_slow); 3623 %} 3624 3625 // sqrtsd instruction needs destination register to be pre initialized for best performance 3626 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3627 instruct sqrtD_reg(regD dst) %{ 3628 predicate(UseSSE>=2); 3629 match(Set dst (SqrtD dst)); 3630 format %{ "sqrtsd $dst, $dst" %} 3631 ins_encode %{ 3632 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3633 %} 3634 ins_pipe(pipe_slow); 3635 %} 3636 3637 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3638 effect(TEMP tmp); 3639 match(Set dst (ConvF2HF src)); 3640 ins_cost(125); 3641 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3642 ins_encode %{ 3643 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3644 %} 3645 ins_pipe( pipe_slow ); 3646 %} 3647 3648 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3649 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3650 effect(TEMP ktmp, TEMP rtmp); 3651 match(Set mem (StoreC mem (ConvF2HF src))); 3652 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3653 ins_encode %{ 3654 __ movl($rtmp$$Register, 0x1); 3655 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3656 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3657 %} 3658 ins_pipe( pipe_slow ); 3659 %} 3660 3661 instruct vconvF2HF(vec dst, vec src) %{ 3662 match(Set dst (VectorCastF2HF src)); 3663 format %{ "vector_conv_F2HF $dst $src" %} 3664 ins_encode %{ 3665 int vlen_enc = vector_length_encoding(this, $src); 3666 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3667 %} 3668 ins_pipe( pipe_slow ); 3669 %} 3670 3671 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3672 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3673 format %{ "vcvtps2ph $mem,$src" %} 3674 ins_encode %{ 3675 int vlen_enc = vector_length_encoding(this, $src); 3676 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3677 %} 3678 ins_pipe( pipe_slow ); 3679 %} 3680 3681 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3682 match(Set dst (ConvHF2F src)); 3683 format %{ "vcvtph2ps $dst,$src" %} 3684 ins_encode %{ 3685 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3686 %} 3687 ins_pipe( pipe_slow ); 3688 %} 3689 3690 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3691 match(Set dst (VectorCastHF2F (LoadVector mem))); 3692 format %{ "vcvtph2ps $dst,$mem" %} 3693 ins_encode %{ 3694 int vlen_enc = vector_length_encoding(this); 3695 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3696 %} 3697 ins_pipe( pipe_slow ); 3698 %} 3699 3700 instruct vconvHF2F(vec dst, vec src) %{ 3701 match(Set dst (VectorCastHF2F src)); 3702 ins_cost(125); 3703 format %{ "vector_conv_HF2F $dst,$src" %} 3704 ins_encode %{ 3705 int vlen_enc = vector_length_encoding(this); 3706 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3707 %} 3708 ins_pipe( pipe_slow ); 3709 %} 3710 3711 // ---------------------------------------- VectorReinterpret ------------------------------------ 3712 instruct reinterpret_mask(kReg dst) %{ 3713 predicate(n->bottom_type()->isa_vectmask() && 3714 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3715 match(Set dst (VectorReinterpret dst)); 3716 ins_cost(125); 3717 format %{ "vector_reinterpret $dst\t!" %} 3718 ins_encode %{ 3719 // empty 3720 %} 3721 ins_pipe( pipe_slow ); 3722 %} 3723 3724 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3725 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3726 n->bottom_type()->isa_vectmask() && 3727 n->in(1)->bottom_type()->isa_vectmask() && 3728 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3729 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3730 match(Set dst (VectorReinterpret src)); 3731 effect(TEMP xtmp); 3732 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3733 ins_encode %{ 3734 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3735 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3736 assert(src_sz == dst_sz , "src and dst size mismatch"); 3737 int vlen_enc = vector_length_encoding(src_sz); 3738 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3739 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3740 %} 3741 ins_pipe( pipe_slow ); 3742 %} 3743 3744 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3745 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3746 n->bottom_type()->isa_vectmask() && 3747 n->in(1)->bottom_type()->isa_vectmask() && 3748 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3749 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3750 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3751 match(Set dst (VectorReinterpret src)); 3752 effect(TEMP xtmp); 3753 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3754 ins_encode %{ 3755 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3756 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3757 assert(src_sz == dst_sz , "src and dst size mismatch"); 3758 int vlen_enc = vector_length_encoding(src_sz); 3759 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3760 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3761 %} 3762 ins_pipe( pipe_slow ); 3763 %} 3764 3765 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3766 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3767 n->bottom_type()->isa_vectmask() && 3768 n->in(1)->bottom_type()->isa_vectmask() && 3769 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3770 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3771 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3772 match(Set dst (VectorReinterpret src)); 3773 effect(TEMP xtmp); 3774 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3775 ins_encode %{ 3776 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3777 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3778 assert(src_sz == dst_sz , "src and dst size mismatch"); 3779 int vlen_enc = vector_length_encoding(src_sz); 3780 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3781 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3782 %} 3783 ins_pipe( pipe_slow ); 3784 %} 3785 3786 instruct reinterpret(vec dst) %{ 3787 predicate(!n->bottom_type()->isa_vectmask() && 3788 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3789 match(Set dst (VectorReinterpret dst)); 3790 ins_cost(125); 3791 format %{ "vector_reinterpret $dst\t!" %} 3792 ins_encode %{ 3793 // empty 3794 %} 3795 ins_pipe( pipe_slow ); 3796 %} 3797 3798 instruct reinterpret_expand(vec dst, vec src) %{ 3799 predicate(UseAVX == 0 && 3800 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3801 match(Set dst (VectorReinterpret src)); 3802 ins_cost(125); 3803 effect(TEMP dst); 3804 format %{ "vector_reinterpret_expand $dst,$src" %} 3805 ins_encode %{ 3806 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3807 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3808 3809 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3810 if (src_vlen_in_bytes == 4) { 3811 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3812 } else { 3813 assert(src_vlen_in_bytes == 8, ""); 3814 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3815 } 3816 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3817 %} 3818 ins_pipe( pipe_slow ); 3819 %} 3820 3821 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3822 predicate(UseAVX > 0 && 3823 !n->bottom_type()->isa_vectmask() && 3824 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3825 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3826 match(Set dst (VectorReinterpret src)); 3827 ins_cost(125); 3828 format %{ "vector_reinterpret_expand $dst,$src" %} 3829 ins_encode %{ 3830 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3831 %} 3832 ins_pipe( pipe_slow ); 3833 %} 3834 3835 3836 instruct vreinterpret_expand(legVec dst, vec src) %{ 3837 predicate(UseAVX > 0 && 3838 !n->bottom_type()->isa_vectmask() && 3839 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3840 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3841 match(Set dst (VectorReinterpret src)); 3842 ins_cost(125); 3843 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3844 ins_encode %{ 3845 switch (Matcher::vector_length_in_bytes(this, $src)) { 3846 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3847 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3848 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3849 default: ShouldNotReachHere(); 3850 } 3851 %} 3852 ins_pipe( pipe_slow ); 3853 %} 3854 3855 instruct reinterpret_shrink(vec dst, legVec src) %{ 3856 predicate(!n->bottom_type()->isa_vectmask() && 3857 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3858 match(Set dst (VectorReinterpret src)); 3859 ins_cost(125); 3860 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3861 ins_encode %{ 3862 switch (Matcher::vector_length_in_bytes(this)) { 3863 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3864 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3865 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3866 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3867 default: ShouldNotReachHere(); 3868 } 3869 %} 3870 ins_pipe( pipe_slow ); 3871 %} 3872 3873 // ---------------------------------------------------------------------------------------------------- 3874 3875 #ifdef _LP64 3876 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3877 match(Set dst (RoundDoubleMode src rmode)); 3878 format %{ "roundsd $dst,$src" %} 3879 ins_cost(150); 3880 ins_encode %{ 3881 assert(UseSSE >= 4, "required"); 3882 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3883 %} 3884 ins_pipe(pipe_slow); 3885 %} 3886 3887 instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{ 3888 match(Set dst (RoundDoubleMode (LoadD src) rmode)); 3889 format %{ "roundsd $dst,$src" %} 3890 ins_cost(150); 3891 ins_encode %{ 3892 assert(UseSSE >= 4, "required"); 3893 __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant); 3894 %} 3895 ins_pipe(pipe_slow); 3896 %} 3897 3898 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3899 match(Set dst (RoundDoubleMode con rmode)); 3900 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3901 ins_cost(150); 3902 ins_encode %{ 3903 assert(UseSSE >= 4, "required"); 3904 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3905 %} 3906 ins_pipe(pipe_slow); 3907 %} 3908 3909 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3910 predicate(Matcher::vector_length(n) < 8); 3911 match(Set dst (RoundDoubleModeV src rmode)); 3912 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3913 ins_encode %{ 3914 assert(UseAVX > 0, "required"); 3915 int vlen_enc = vector_length_encoding(this); 3916 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3917 %} 3918 ins_pipe( pipe_slow ); 3919 %} 3920 3921 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3922 predicate(Matcher::vector_length(n) == 8); 3923 match(Set dst (RoundDoubleModeV src rmode)); 3924 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3925 ins_encode %{ 3926 assert(UseAVX > 2, "required"); 3927 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3928 %} 3929 ins_pipe( pipe_slow ); 3930 %} 3931 3932 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3933 predicate(Matcher::vector_length(n) < 8); 3934 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3935 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3936 ins_encode %{ 3937 assert(UseAVX > 0, "required"); 3938 int vlen_enc = vector_length_encoding(this); 3939 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3940 %} 3941 ins_pipe( pipe_slow ); 3942 %} 3943 3944 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3945 predicate(Matcher::vector_length(n) == 8); 3946 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3947 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3948 ins_encode %{ 3949 assert(UseAVX > 2, "required"); 3950 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3951 %} 3952 ins_pipe( pipe_slow ); 3953 %} 3954 #endif // _LP64 3955 3956 instruct onspinwait() %{ 3957 match(OnSpinWait); 3958 ins_cost(200); 3959 3960 format %{ 3961 $$template 3962 $$emit$$"pause\t! membar_onspinwait" 3963 %} 3964 ins_encode %{ 3965 __ pause(); 3966 %} 3967 ins_pipe(pipe_slow); 3968 %} 3969 3970 // a * b + c 3971 instruct fmaD_reg(regD a, regD b, regD c) %{ 3972 match(Set c (FmaD c (Binary a b))); 3973 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3974 ins_cost(150); 3975 ins_encode %{ 3976 assert(UseFMA, "Needs FMA instructions support."); 3977 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3978 %} 3979 ins_pipe( pipe_slow ); 3980 %} 3981 3982 // a * b + c 3983 instruct fmaF_reg(regF a, regF b, regF c) %{ 3984 match(Set c (FmaF c (Binary a b))); 3985 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3986 ins_cost(150); 3987 ins_encode %{ 3988 assert(UseFMA, "Needs FMA instructions support."); 3989 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3990 %} 3991 ins_pipe( pipe_slow ); 3992 %} 3993 3994 // ====================VECTOR INSTRUCTIONS===================================== 3995 3996 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3997 instruct MoveVec2Leg(legVec dst, vec src) %{ 3998 match(Set dst src); 3999 format %{ "" %} 4000 ins_encode %{ 4001 ShouldNotReachHere(); 4002 %} 4003 ins_pipe( fpu_reg_reg ); 4004 %} 4005 4006 instruct MoveLeg2Vec(vec dst, legVec src) %{ 4007 match(Set dst src); 4008 format %{ "" %} 4009 ins_encode %{ 4010 ShouldNotReachHere(); 4011 %} 4012 ins_pipe( fpu_reg_reg ); 4013 %} 4014 4015 // ============================================================================ 4016 4017 // Load vectors generic operand pattern 4018 instruct loadV(vec dst, memory mem) %{ 4019 match(Set dst (LoadVector mem)); 4020 ins_cost(125); 4021 format %{ "load_vector $dst,$mem" %} 4022 ins_encode %{ 4023 __ load_vector($dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 4024 %} 4025 ins_pipe( pipe_slow ); 4026 %} 4027 4028 // Store vectors generic operand pattern. 4029 instruct storeV(memory mem, vec src) %{ 4030 match(Set mem (StoreVector mem src)); 4031 ins_cost(145); 4032 format %{ "store_vector $mem,$src\n\t" %} 4033 ins_encode %{ 4034 switch (Matcher::vector_length_in_bytes(this, $src)) { 4035 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 4036 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 4037 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 4038 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 4039 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 4040 default: ShouldNotReachHere(); 4041 } 4042 %} 4043 ins_pipe( pipe_slow ); 4044 %} 4045 4046 // ---------------------------------------- Gather ------------------------------------ 4047 4048 // Gather INT, LONG, FLOAT, DOUBLE 4049 4050 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 4051 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 4052 match(Set dst (LoadVectorGather mem idx)); 4053 effect(TEMP dst, TEMP tmp, TEMP mask); 4054 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 4055 ins_encode %{ 4056 assert(UseAVX >= 2, "sanity"); 4057 4058 int vlen_enc = vector_length_encoding(this); 4059 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4060 4061 assert(Matcher::vector_length_in_bytes(this) >= 16, "sanity"); 4062 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4063 4064 if (vlen_enc == Assembler::AVX_128bit) { 4065 __ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), noreg); 4066 } else { 4067 __ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), noreg); 4068 } 4069 __ lea($tmp$$Register, $mem$$Address); 4070 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4071 %} 4072 ins_pipe( pipe_slow ); 4073 %} 4074 4075 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 4076 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 4077 match(Set dst (LoadVectorGather mem idx)); 4078 effect(TEMP dst, TEMP tmp, TEMP ktmp); 4079 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 4080 ins_encode %{ 4081 assert(UseAVX > 2, "sanity"); 4082 4083 int vlen_enc = vector_length_encoding(this); 4084 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4085 4086 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4087 4088 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4089 __ lea($tmp$$Register, $mem$$Address); 4090 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4091 %} 4092 ins_pipe( pipe_slow ); 4093 %} 4094 4095 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4096 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4097 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4098 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4099 ins_encode %{ 4100 assert(UseAVX > 2, "sanity"); 4101 int vlen_enc = vector_length_encoding(this); 4102 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4103 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4104 // Note: Since gather instruction partially updates the opmask register used 4105 // for predication hense moving mask operand to a temporary. 4106 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4107 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4108 __ lea($tmp$$Register, $mem$$Address); 4109 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4110 %} 4111 ins_pipe( pipe_slow ); 4112 %} 4113 // ====================Scatter======================================= 4114 4115 // Scatter INT, LONG, FLOAT, DOUBLE 4116 4117 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4118 predicate(UseAVX > 2); 4119 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4120 effect(TEMP tmp, TEMP ktmp); 4121 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4122 ins_encode %{ 4123 int vlen_enc = vector_length_encoding(this, $src); 4124 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4125 4126 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4127 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4128 4129 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4130 __ lea($tmp$$Register, $mem$$Address); 4131 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4132 %} 4133 ins_pipe( pipe_slow ); 4134 %} 4135 4136 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4137 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4138 effect(TEMP tmp, TEMP ktmp); 4139 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4140 ins_encode %{ 4141 int vlen_enc = vector_length_encoding(this, $src); 4142 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4143 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4144 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4145 // Note: Since scatter instruction partially updates the opmask register used 4146 // for predication hense moving mask operand to a temporary. 4147 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4148 __ lea($tmp$$Register, $mem$$Address); 4149 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4150 %} 4151 ins_pipe( pipe_slow ); 4152 %} 4153 4154 // ====================REPLICATE======================================= 4155 4156 // Replicate byte scalar to be vector 4157 instruct vReplB_reg(vec dst, rRegI src) %{ 4158 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4159 match(Set dst (Replicate src)); 4160 format %{ "replicateB $dst,$src" %} 4161 ins_encode %{ 4162 uint vlen = Matcher::vector_length(this); 4163 if (UseAVX >= 2) { 4164 int vlen_enc = vector_length_encoding(this); 4165 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4166 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4167 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4168 } else { 4169 __ movdl($dst$$XMMRegister, $src$$Register); 4170 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4171 } 4172 } else { 4173 assert(UseAVX < 2, ""); 4174 __ movdl($dst$$XMMRegister, $src$$Register); 4175 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4176 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4177 if (vlen >= 16) { 4178 assert(vlen == 16, ""); 4179 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4180 } 4181 } 4182 %} 4183 ins_pipe( pipe_slow ); 4184 %} 4185 4186 instruct ReplB_mem(vec dst, memory mem) %{ 4187 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4188 match(Set dst (Replicate (LoadB mem))); 4189 format %{ "replicateB $dst,$mem" %} 4190 ins_encode %{ 4191 int vlen_enc = vector_length_encoding(this); 4192 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4193 %} 4194 ins_pipe( pipe_slow ); 4195 %} 4196 4197 // ====================ReplicateS======================================= 4198 4199 instruct vReplS_reg(vec dst, rRegI src) %{ 4200 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4201 match(Set dst (Replicate src)); 4202 format %{ "replicateS $dst,$src" %} 4203 ins_encode %{ 4204 uint vlen = Matcher::vector_length(this); 4205 int vlen_enc = vector_length_encoding(this); 4206 if (UseAVX >= 2) { 4207 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4208 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4209 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4210 } else { 4211 __ movdl($dst$$XMMRegister, $src$$Register); 4212 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4213 } 4214 } else { 4215 assert(UseAVX < 2, ""); 4216 __ movdl($dst$$XMMRegister, $src$$Register); 4217 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4218 if (vlen >= 8) { 4219 assert(vlen == 8, ""); 4220 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4221 } 4222 } 4223 %} 4224 ins_pipe( pipe_slow ); 4225 %} 4226 4227 instruct ReplS_mem(vec dst, memory mem) %{ 4228 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4229 match(Set dst (Replicate (LoadS mem))); 4230 format %{ "replicateS $dst,$mem" %} 4231 ins_encode %{ 4232 int vlen_enc = vector_length_encoding(this); 4233 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4234 %} 4235 ins_pipe( pipe_slow ); 4236 %} 4237 4238 // ====================ReplicateI======================================= 4239 4240 instruct ReplI_reg(vec dst, rRegI src) %{ 4241 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4242 match(Set dst (Replicate src)); 4243 format %{ "replicateI $dst,$src" %} 4244 ins_encode %{ 4245 uint vlen = Matcher::vector_length(this); 4246 int vlen_enc = vector_length_encoding(this); 4247 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4248 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4249 } else if (VM_Version::supports_avx2()) { 4250 __ movdl($dst$$XMMRegister, $src$$Register); 4251 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4252 } else { 4253 __ movdl($dst$$XMMRegister, $src$$Register); 4254 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4255 } 4256 %} 4257 ins_pipe( pipe_slow ); 4258 %} 4259 4260 instruct ReplI_mem(vec dst, memory mem) %{ 4261 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4262 match(Set dst (Replicate (LoadI mem))); 4263 format %{ "replicateI $dst,$mem" %} 4264 ins_encode %{ 4265 int vlen_enc = vector_length_encoding(this); 4266 if (VM_Version::supports_avx2()) { 4267 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4268 } else if (VM_Version::supports_avx()) { 4269 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4270 } else { 4271 __ movdl($dst$$XMMRegister, $mem$$Address); 4272 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4273 } 4274 %} 4275 ins_pipe( pipe_slow ); 4276 %} 4277 4278 instruct ReplI_imm(vec dst, immI con) %{ 4279 predicate(Matcher::is_non_long_integral_vector(n)); 4280 match(Set dst (Replicate con)); 4281 format %{ "replicateI $dst,$con" %} 4282 ins_encode %{ 4283 InternalAddress addr = $constantaddress(Matcher::vector_element_basic_type(this), 4284 vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4285 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 8) / 4286 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4287 BasicType bt = Matcher::vector_element_basic_type(this); 4288 int vlen = Matcher::vector_length_in_bytes(this); 4289 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4290 %} 4291 ins_pipe( pipe_slow ); 4292 %} 4293 4294 // Replicate scalar zero to be vector 4295 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4296 predicate(Matcher::is_non_long_integral_vector(n)); 4297 match(Set dst (Replicate zero)); 4298 format %{ "replicateI $dst,$zero" %} 4299 ins_encode %{ 4300 int vlen_enc = vector_length_encoding(this); 4301 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4302 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4303 } else { 4304 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4305 } 4306 %} 4307 ins_pipe( fpu_reg_reg ); 4308 %} 4309 4310 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4311 predicate(UseSSE >= 2 && Matcher::is_non_long_integral_vector(n)); 4312 match(Set dst (Replicate con)); 4313 format %{ "vallones $dst" %} 4314 ins_encode %{ 4315 int vector_len = vector_length_encoding(this); 4316 __ vallones($dst$$XMMRegister, vector_len); 4317 %} 4318 ins_pipe( pipe_slow ); 4319 %} 4320 4321 // ====================ReplicateL======================================= 4322 4323 #ifdef _LP64 4324 // Replicate long (8 byte) scalar to be vector 4325 instruct ReplL_reg(vec dst, rRegL src) %{ 4326 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4327 match(Set dst (Replicate src)); 4328 format %{ "replicateL $dst,$src" %} 4329 ins_encode %{ 4330 int vlen = Matcher::vector_length(this); 4331 int vlen_enc = vector_length_encoding(this); 4332 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4333 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4334 } else if (VM_Version::supports_avx2()) { 4335 __ movdq($dst$$XMMRegister, $src$$Register); 4336 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4337 } else { 4338 __ movdq($dst$$XMMRegister, $src$$Register); 4339 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4340 } 4341 %} 4342 ins_pipe( pipe_slow ); 4343 %} 4344 #else // _LP64 4345 // Replicate long (8 byte) scalar to be vector 4346 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 4347 predicate(Matcher::vector_length(n) <= 4 && Matcher::vector_element_basic_type(n) == T_LONG); 4348 match(Set dst (Replicate src)); 4349 effect(TEMP dst, USE src, TEMP tmp); 4350 format %{ "replicateL $dst,$src" %} 4351 ins_encode %{ 4352 uint vlen = Matcher::vector_length(this); 4353 if (vlen == 2) { 4354 __ movdl($dst$$XMMRegister, $src$$Register); 4355 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4356 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4357 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4358 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4359 int vlen_enc = Assembler::AVX_256bit; 4360 __ movdl($dst$$XMMRegister, $src$$Register); 4361 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4362 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4363 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4364 } else { 4365 __ movdl($dst$$XMMRegister, $src$$Register); 4366 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4367 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4368 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4369 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4370 } 4371 %} 4372 ins_pipe( pipe_slow ); 4373 %} 4374 4375 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 4376 predicate(Matcher::vector_length(n) == 8 && Matcher::vector_element_basic_type(n) == T_LONG); 4377 match(Set dst (Replicate src)); 4378 effect(TEMP dst, USE src, TEMP tmp); 4379 format %{ "replicateL $dst,$src" %} 4380 ins_encode %{ 4381 if (VM_Version::supports_avx512vl()) { 4382 __ movdl($dst$$XMMRegister, $src$$Register); 4383 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4384 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4385 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4386 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4387 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4388 } else { 4389 int vlen_enc = Assembler::AVX_512bit; 4390 __ movdl($dst$$XMMRegister, $src$$Register); 4391 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4392 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4393 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4394 } 4395 %} 4396 ins_pipe( pipe_slow ); 4397 %} 4398 #endif // _LP64 4399 4400 instruct ReplL_mem(vec dst, memory mem) %{ 4401 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4402 match(Set dst (Replicate (LoadL mem))); 4403 format %{ "replicateL $dst,$mem" %} 4404 ins_encode %{ 4405 int vlen_enc = vector_length_encoding(this); 4406 if (VM_Version::supports_avx2()) { 4407 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4408 } else if (VM_Version::supports_sse3()) { 4409 __ movddup($dst$$XMMRegister, $mem$$Address); 4410 } else { 4411 __ movq($dst$$XMMRegister, $mem$$Address); 4412 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4413 } 4414 %} 4415 ins_pipe( pipe_slow ); 4416 %} 4417 4418 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4419 instruct ReplL_imm(vec dst, immL con) %{ 4420 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4421 match(Set dst (Replicate con)); 4422 format %{ "replicateL $dst,$con" %} 4423 ins_encode %{ 4424 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, 1)); 4425 int vlen = Matcher::vector_length_in_bytes(this); 4426 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4427 %} 4428 ins_pipe( pipe_slow ); 4429 %} 4430 4431 instruct ReplL_zero(vec dst, immL0 zero) %{ 4432 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4433 match(Set dst (Replicate zero)); 4434 format %{ "replicateL $dst,$zero" %} 4435 ins_encode %{ 4436 int vlen_enc = vector_length_encoding(this); 4437 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4438 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4439 } else { 4440 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4441 } 4442 %} 4443 ins_pipe( fpu_reg_reg ); 4444 %} 4445 4446 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4447 predicate(UseSSE >= 2 && Matcher::vector_element_basic_type(n) == T_LONG); 4448 match(Set dst (Replicate con)); 4449 format %{ "vallones $dst" %} 4450 ins_encode %{ 4451 int vector_len = vector_length_encoding(this); 4452 __ vallones($dst$$XMMRegister, vector_len); 4453 %} 4454 ins_pipe( pipe_slow ); 4455 %} 4456 4457 // ====================ReplicateF======================================= 4458 4459 instruct vReplF_reg(vec dst, vlRegF src) %{ 4460 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4461 match(Set dst (Replicate src)); 4462 format %{ "replicateF $dst,$src" %} 4463 ins_encode %{ 4464 uint vlen = Matcher::vector_length(this); 4465 int vlen_enc = vector_length_encoding(this); 4466 if (vlen <= 4) { 4467 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4468 } else if (VM_Version::supports_avx2()) { 4469 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4470 } else { 4471 assert(vlen == 8, "sanity"); 4472 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4473 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4474 } 4475 %} 4476 ins_pipe( pipe_slow ); 4477 %} 4478 4479 instruct ReplF_reg(vec dst, vlRegF src) %{ 4480 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4481 match(Set dst (Replicate src)); 4482 format %{ "replicateF $dst,$src" %} 4483 ins_encode %{ 4484 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4485 %} 4486 ins_pipe( pipe_slow ); 4487 %} 4488 4489 instruct ReplF_mem(vec dst, memory mem) %{ 4490 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4491 match(Set dst (Replicate (LoadF mem))); 4492 format %{ "replicateF $dst,$mem" %} 4493 ins_encode %{ 4494 int vlen_enc = vector_length_encoding(this); 4495 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4496 %} 4497 ins_pipe( pipe_slow ); 4498 %} 4499 4500 // Replicate float scalar immediate to be vector by loading from const table. 4501 instruct ReplF_imm(vec dst, immF con) %{ 4502 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4503 match(Set dst (Replicate con)); 4504 format %{ "replicateF $dst,$con" %} 4505 ins_encode %{ 4506 InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant, 4507 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 2)); 4508 int vlen = Matcher::vector_length_in_bytes(this); 4509 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4510 %} 4511 ins_pipe( pipe_slow ); 4512 %} 4513 4514 instruct ReplF_zero(vec dst, immF0 zero) %{ 4515 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4516 match(Set dst (Replicate zero)); 4517 format %{ "replicateF $dst,$zero" %} 4518 ins_encode %{ 4519 int vlen_enc = vector_length_encoding(this); 4520 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4521 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4522 } else { 4523 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4524 } 4525 %} 4526 ins_pipe( fpu_reg_reg ); 4527 %} 4528 4529 // ====================ReplicateD======================================= 4530 4531 // Replicate double (8 bytes) scalar to be vector 4532 instruct vReplD_reg(vec dst, vlRegD src) %{ 4533 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4534 match(Set dst (Replicate src)); 4535 format %{ "replicateD $dst,$src" %} 4536 ins_encode %{ 4537 uint vlen = Matcher::vector_length(this); 4538 int vlen_enc = vector_length_encoding(this); 4539 if (vlen <= 2) { 4540 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4541 } else if (VM_Version::supports_avx2()) { 4542 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4543 } else { 4544 assert(vlen == 4, "sanity"); 4545 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4546 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4547 } 4548 %} 4549 ins_pipe( pipe_slow ); 4550 %} 4551 4552 instruct ReplD_reg(vec dst, vlRegD src) %{ 4553 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4554 match(Set dst (Replicate src)); 4555 format %{ "replicateD $dst,$src" %} 4556 ins_encode %{ 4557 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4558 %} 4559 ins_pipe( pipe_slow ); 4560 %} 4561 4562 instruct ReplD_mem(vec dst, memory mem) %{ 4563 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4564 match(Set dst (Replicate (LoadD mem))); 4565 format %{ "replicateD $dst,$mem" %} 4566 ins_encode %{ 4567 if (Matcher::vector_length(this) >= 4) { 4568 int vlen_enc = vector_length_encoding(this); 4569 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4570 } else { 4571 __ movddup($dst$$XMMRegister, $mem$$Address); 4572 } 4573 %} 4574 ins_pipe( pipe_slow ); 4575 %} 4576 4577 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4578 instruct ReplD_imm(vec dst, immD con) %{ 4579 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4580 match(Set dst (Replicate con)); 4581 format %{ "replicateD $dst,$con" %} 4582 ins_encode %{ 4583 InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, 1)); 4584 int vlen = Matcher::vector_length_in_bytes(this); 4585 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4586 %} 4587 ins_pipe( pipe_slow ); 4588 %} 4589 4590 instruct ReplD_zero(vec dst, immD0 zero) %{ 4591 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4592 match(Set dst (Replicate zero)); 4593 format %{ "replicateD $dst,$zero" %} 4594 ins_encode %{ 4595 int vlen_enc = vector_length_encoding(this); 4596 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4597 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4598 } else { 4599 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4600 } 4601 %} 4602 ins_pipe( fpu_reg_reg ); 4603 %} 4604 4605 // ====================VECTOR INSERT======================================= 4606 4607 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4608 predicate(Matcher::vector_length_in_bytes(n) < 32); 4609 match(Set dst (VectorInsert (Binary dst val) idx)); 4610 format %{ "vector_insert $dst,$val,$idx" %} 4611 ins_encode %{ 4612 assert(UseSSE >= 4, "required"); 4613 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4614 4615 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4616 4617 assert(is_integral_type(elem_bt), ""); 4618 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4619 4620 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4621 %} 4622 ins_pipe( pipe_slow ); 4623 %} 4624 4625 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4626 predicate(Matcher::vector_length_in_bytes(n) == 32); 4627 match(Set dst (VectorInsert (Binary src val) idx)); 4628 effect(TEMP vtmp); 4629 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4630 ins_encode %{ 4631 int vlen_enc = Assembler::AVX_256bit; 4632 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4633 int elem_per_lane = 16/type2aelembytes(elem_bt); 4634 int log2epr = log2(elem_per_lane); 4635 4636 assert(is_integral_type(elem_bt), "sanity"); 4637 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4638 4639 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4640 uint y_idx = ($idx$$constant >> log2epr) & 1; 4641 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4642 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4643 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4644 %} 4645 ins_pipe( pipe_slow ); 4646 %} 4647 4648 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4649 predicate(Matcher::vector_length_in_bytes(n) == 64); 4650 match(Set dst (VectorInsert (Binary src val) idx)); 4651 effect(TEMP vtmp); 4652 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4653 ins_encode %{ 4654 assert(UseAVX > 2, "sanity"); 4655 4656 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4657 int elem_per_lane = 16/type2aelembytes(elem_bt); 4658 int log2epr = log2(elem_per_lane); 4659 4660 assert(is_integral_type(elem_bt), ""); 4661 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4662 4663 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4664 uint y_idx = ($idx$$constant >> log2epr) & 3; 4665 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4666 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4667 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4668 %} 4669 ins_pipe( pipe_slow ); 4670 %} 4671 4672 #ifdef _LP64 4673 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4674 predicate(Matcher::vector_length(n) == 2); 4675 match(Set dst (VectorInsert (Binary dst val) idx)); 4676 format %{ "vector_insert $dst,$val,$idx" %} 4677 ins_encode %{ 4678 assert(UseSSE >= 4, "required"); 4679 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4680 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4681 4682 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4683 %} 4684 ins_pipe( pipe_slow ); 4685 %} 4686 4687 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4688 predicate(Matcher::vector_length(n) == 4); 4689 match(Set dst (VectorInsert (Binary src val) idx)); 4690 effect(TEMP vtmp); 4691 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4692 ins_encode %{ 4693 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4694 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4695 4696 uint x_idx = $idx$$constant & right_n_bits(1); 4697 uint y_idx = ($idx$$constant >> 1) & 1; 4698 int vlen_enc = Assembler::AVX_256bit; 4699 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4700 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4701 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4702 %} 4703 ins_pipe( pipe_slow ); 4704 %} 4705 4706 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4707 predicate(Matcher::vector_length(n) == 8); 4708 match(Set dst (VectorInsert (Binary src val) idx)); 4709 effect(TEMP vtmp); 4710 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4711 ins_encode %{ 4712 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4713 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4714 4715 uint x_idx = $idx$$constant & right_n_bits(1); 4716 uint y_idx = ($idx$$constant >> 1) & 3; 4717 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4718 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4719 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4720 %} 4721 ins_pipe( pipe_slow ); 4722 %} 4723 #endif 4724 4725 instruct insertF(vec dst, regF val, immU8 idx) %{ 4726 predicate(Matcher::vector_length(n) < 8); 4727 match(Set dst (VectorInsert (Binary dst val) idx)); 4728 format %{ "vector_insert $dst,$val,$idx" %} 4729 ins_encode %{ 4730 assert(UseSSE >= 4, "sanity"); 4731 4732 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4733 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4734 4735 uint x_idx = $idx$$constant & right_n_bits(2); 4736 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4737 %} 4738 ins_pipe( pipe_slow ); 4739 %} 4740 4741 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4742 predicate(Matcher::vector_length(n) >= 8); 4743 match(Set dst (VectorInsert (Binary src val) idx)); 4744 effect(TEMP vtmp); 4745 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4746 ins_encode %{ 4747 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4748 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4749 4750 int vlen = Matcher::vector_length(this); 4751 uint x_idx = $idx$$constant & right_n_bits(2); 4752 if (vlen == 8) { 4753 uint y_idx = ($idx$$constant >> 2) & 1; 4754 int vlen_enc = Assembler::AVX_256bit; 4755 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4756 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4757 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4758 } else { 4759 assert(vlen == 16, "sanity"); 4760 uint y_idx = ($idx$$constant >> 2) & 3; 4761 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4762 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4763 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4764 } 4765 %} 4766 ins_pipe( pipe_slow ); 4767 %} 4768 4769 #ifdef _LP64 4770 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4771 predicate(Matcher::vector_length(n) == 2); 4772 match(Set dst (VectorInsert (Binary dst val) idx)); 4773 effect(TEMP tmp); 4774 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4775 ins_encode %{ 4776 assert(UseSSE >= 4, "sanity"); 4777 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4778 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4779 4780 __ movq($tmp$$Register, $val$$XMMRegister); 4781 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4782 %} 4783 ins_pipe( pipe_slow ); 4784 %} 4785 4786 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4787 predicate(Matcher::vector_length(n) == 4); 4788 match(Set dst (VectorInsert (Binary src val) idx)); 4789 effect(TEMP vtmp, TEMP tmp); 4790 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4791 ins_encode %{ 4792 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4793 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4794 4795 uint x_idx = $idx$$constant & right_n_bits(1); 4796 uint y_idx = ($idx$$constant >> 1) & 1; 4797 int vlen_enc = Assembler::AVX_256bit; 4798 __ movq($tmp$$Register, $val$$XMMRegister); 4799 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4800 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4801 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4802 %} 4803 ins_pipe( pipe_slow ); 4804 %} 4805 4806 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4807 predicate(Matcher::vector_length(n) == 8); 4808 match(Set dst (VectorInsert (Binary src val) idx)); 4809 effect(TEMP tmp, TEMP vtmp); 4810 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4811 ins_encode %{ 4812 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4813 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4814 4815 uint x_idx = $idx$$constant & right_n_bits(1); 4816 uint y_idx = ($idx$$constant >> 1) & 3; 4817 __ movq($tmp$$Register, $val$$XMMRegister); 4818 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4819 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4820 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4821 %} 4822 ins_pipe( pipe_slow ); 4823 %} 4824 #endif 4825 4826 // ====================REDUCTION ARITHMETIC======================================= 4827 4828 // =======================Int Reduction========================================== 4829 4830 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4831 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 4832 match(Set dst (AddReductionVI src1 src2)); 4833 match(Set dst (MulReductionVI src1 src2)); 4834 match(Set dst (AndReductionV src1 src2)); 4835 match(Set dst ( OrReductionV src1 src2)); 4836 match(Set dst (XorReductionV src1 src2)); 4837 match(Set dst (MinReductionV src1 src2)); 4838 match(Set dst (MaxReductionV src1 src2)); 4839 effect(TEMP vtmp1, TEMP vtmp2); 4840 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4841 ins_encode %{ 4842 int opcode = this->ideal_Opcode(); 4843 int vlen = Matcher::vector_length(this, $src2); 4844 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4845 %} 4846 ins_pipe( pipe_slow ); 4847 %} 4848 4849 // =======================Long Reduction========================================== 4850 4851 #ifdef _LP64 4852 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4853 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 4854 match(Set dst (AddReductionVL src1 src2)); 4855 match(Set dst (MulReductionVL src1 src2)); 4856 match(Set dst (AndReductionV src1 src2)); 4857 match(Set dst ( OrReductionV src1 src2)); 4858 match(Set dst (XorReductionV src1 src2)); 4859 match(Set dst (MinReductionV src1 src2)); 4860 match(Set dst (MaxReductionV src1 src2)); 4861 effect(TEMP vtmp1, TEMP vtmp2); 4862 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4863 ins_encode %{ 4864 int opcode = this->ideal_Opcode(); 4865 int vlen = Matcher::vector_length(this, $src2); 4866 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4867 %} 4868 ins_pipe( pipe_slow ); 4869 %} 4870 4871 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 4872 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 4873 match(Set dst (AddReductionVL src1 src2)); 4874 match(Set dst (MulReductionVL src1 src2)); 4875 match(Set dst (AndReductionV src1 src2)); 4876 match(Set dst ( OrReductionV src1 src2)); 4877 match(Set dst (XorReductionV src1 src2)); 4878 match(Set dst (MinReductionV src1 src2)); 4879 match(Set dst (MaxReductionV src1 src2)); 4880 effect(TEMP vtmp1, TEMP vtmp2); 4881 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4882 ins_encode %{ 4883 int opcode = this->ideal_Opcode(); 4884 int vlen = Matcher::vector_length(this, $src2); 4885 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4886 %} 4887 ins_pipe( pipe_slow ); 4888 %} 4889 #endif // _LP64 4890 4891 // =======================Float Reduction========================================== 4892 4893 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 4894 predicate(Matcher::vector_length(n->in(2)) <= 4); // src 4895 match(Set dst (AddReductionVF dst src)); 4896 match(Set dst (MulReductionVF dst src)); 4897 effect(TEMP dst, TEMP vtmp); 4898 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 4899 ins_encode %{ 4900 int opcode = this->ideal_Opcode(); 4901 int vlen = Matcher::vector_length(this, $src); 4902 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4903 %} 4904 ins_pipe( pipe_slow ); 4905 %} 4906 4907 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 4908 predicate(Matcher::vector_length(n->in(2)) == 8); // src 4909 match(Set dst (AddReductionVF dst src)); 4910 match(Set dst (MulReductionVF dst src)); 4911 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4912 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4913 ins_encode %{ 4914 int opcode = this->ideal_Opcode(); 4915 int vlen = Matcher::vector_length(this, $src); 4916 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4917 %} 4918 ins_pipe( pipe_slow ); 4919 %} 4920 4921 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4922 predicate(Matcher::vector_length(n->in(2)) == 16); // src 4923 match(Set dst (AddReductionVF dst src)); 4924 match(Set dst (MulReductionVF dst src)); 4925 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4926 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4927 ins_encode %{ 4928 int opcode = this->ideal_Opcode(); 4929 int vlen = Matcher::vector_length(this, $src); 4930 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4931 %} 4932 ins_pipe( pipe_slow ); 4933 %} 4934 4935 // =======================Double Reduction========================================== 4936 4937 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 4938 predicate(Matcher::vector_length(n->in(2)) == 2); // src 4939 match(Set dst (AddReductionVD dst src)); 4940 match(Set dst (MulReductionVD dst src)); 4941 effect(TEMP dst, TEMP vtmp); 4942 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 4943 ins_encode %{ 4944 int opcode = this->ideal_Opcode(); 4945 int vlen = Matcher::vector_length(this, $src); 4946 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4947 %} 4948 ins_pipe( pipe_slow ); 4949 %} 4950 4951 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 4952 predicate(Matcher::vector_length(n->in(2)) == 4); // src 4953 match(Set dst (AddReductionVD dst src)); 4954 match(Set dst (MulReductionVD dst src)); 4955 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4956 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4957 ins_encode %{ 4958 int opcode = this->ideal_Opcode(); 4959 int vlen = Matcher::vector_length(this, $src); 4960 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4961 %} 4962 ins_pipe( pipe_slow ); 4963 %} 4964 4965 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4966 predicate(Matcher::vector_length(n->in(2)) == 8); // src 4967 match(Set dst (AddReductionVD dst src)); 4968 match(Set dst (MulReductionVD dst src)); 4969 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4970 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4971 ins_encode %{ 4972 int opcode = this->ideal_Opcode(); 4973 int vlen = Matcher::vector_length(this, $src); 4974 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4975 %} 4976 ins_pipe( pipe_slow ); 4977 %} 4978 4979 // =======================Byte Reduction========================================== 4980 4981 #ifdef _LP64 4982 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4983 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 4984 match(Set dst (AddReductionVI src1 src2)); 4985 match(Set dst (AndReductionV src1 src2)); 4986 match(Set dst ( OrReductionV src1 src2)); 4987 match(Set dst (XorReductionV src1 src2)); 4988 match(Set dst (MinReductionV src1 src2)); 4989 match(Set dst (MaxReductionV src1 src2)); 4990 effect(TEMP vtmp1, TEMP vtmp2); 4991 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4992 ins_encode %{ 4993 int opcode = this->ideal_Opcode(); 4994 int vlen = Matcher::vector_length(this, $src2); 4995 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4996 %} 4997 ins_pipe( pipe_slow ); 4998 %} 4999 5000 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5001 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5002 match(Set dst (AddReductionVI src1 src2)); 5003 match(Set dst (AndReductionV src1 src2)); 5004 match(Set dst ( OrReductionV src1 src2)); 5005 match(Set dst (XorReductionV src1 src2)); 5006 match(Set dst (MinReductionV src1 src2)); 5007 match(Set dst (MaxReductionV src1 src2)); 5008 effect(TEMP vtmp1, TEMP vtmp2); 5009 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5010 ins_encode %{ 5011 int opcode = this->ideal_Opcode(); 5012 int vlen = Matcher::vector_length(this, $src2); 5013 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5014 %} 5015 ins_pipe( pipe_slow ); 5016 %} 5017 #endif 5018 5019 // =======================Short Reduction========================================== 5020 5021 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5022 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5023 match(Set dst (AddReductionVI src1 src2)); 5024 match(Set dst (MulReductionVI src1 src2)); 5025 match(Set dst (AndReductionV src1 src2)); 5026 match(Set dst ( OrReductionV src1 src2)); 5027 match(Set dst (XorReductionV src1 src2)); 5028 match(Set dst (MinReductionV src1 src2)); 5029 match(Set dst (MaxReductionV src1 src2)); 5030 effect(TEMP vtmp1, TEMP vtmp2); 5031 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5032 ins_encode %{ 5033 int opcode = this->ideal_Opcode(); 5034 int vlen = Matcher::vector_length(this, $src2); 5035 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5036 %} 5037 ins_pipe( pipe_slow ); 5038 %} 5039 5040 // =======================Mul Reduction========================================== 5041 5042 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5043 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5044 Matcher::vector_length(n->in(2)) <= 32); // src2 5045 match(Set dst (MulReductionVI src1 src2)); 5046 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5047 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5048 ins_encode %{ 5049 int opcode = this->ideal_Opcode(); 5050 int vlen = Matcher::vector_length(this, $src2); 5051 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5052 %} 5053 ins_pipe( pipe_slow ); 5054 %} 5055 5056 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5057 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5058 Matcher::vector_length(n->in(2)) == 64); // src2 5059 match(Set dst (MulReductionVI src1 src2)); 5060 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5061 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5062 ins_encode %{ 5063 int opcode = this->ideal_Opcode(); 5064 int vlen = Matcher::vector_length(this, $src2); 5065 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5066 %} 5067 ins_pipe( pipe_slow ); 5068 %} 5069 5070 //--------------------Min/Max Float Reduction -------------------- 5071 // Float Min Reduction 5072 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 5073 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5074 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5075 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5076 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5077 Matcher::vector_length(n->in(2)) == 2); 5078 match(Set dst (MinReductionV src1 src2)); 5079 match(Set dst (MaxReductionV src1 src2)); 5080 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5081 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5082 ins_encode %{ 5083 assert(UseAVX > 0, "sanity"); 5084 5085 int opcode = this->ideal_Opcode(); 5086 int vlen = Matcher::vector_length(this, $src2); 5087 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5088 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5089 %} 5090 ins_pipe( pipe_slow ); 5091 %} 5092 5093 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5094 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5095 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5096 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5097 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5098 Matcher::vector_length(n->in(2)) >= 4); 5099 match(Set dst (MinReductionV src1 src2)); 5100 match(Set dst (MaxReductionV src1 src2)); 5101 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5102 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5103 ins_encode %{ 5104 assert(UseAVX > 0, "sanity"); 5105 5106 int opcode = this->ideal_Opcode(); 5107 int vlen = Matcher::vector_length(this, $src2); 5108 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5109 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5110 %} 5111 ins_pipe( pipe_slow ); 5112 %} 5113 5114 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 5115 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5116 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5117 Matcher::vector_length(n->in(2)) == 2); 5118 match(Set dst (MinReductionV dst src)); 5119 match(Set dst (MaxReductionV dst src)); 5120 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5121 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5122 ins_encode %{ 5123 assert(UseAVX > 0, "sanity"); 5124 5125 int opcode = this->ideal_Opcode(); 5126 int vlen = Matcher::vector_length(this, $src); 5127 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5128 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5129 %} 5130 ins_pipe( pipe_slow ); 5131 %} 5132 5133 5134 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5135 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5136 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5137 Matcher::vector_length(n->in(2)) >= 4); 5138 match(Set dst (MinReductionV dst src)); 5139 match(Set dst (MaxReductionV dst src)); 5140 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5141 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5142 ins_encode %{ 5143 assert(UseAVX > 0, "sanity"); 5144 5145 int opcode = this->ideal_Opcode(); 5146 int vlen = Matcher::vector_length(this, $src); 5147 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5148 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5149 %} 5150 ins_pipe( pipe_slow ); 5151 %} 5152 5153 5154 //--------------------Min Double Reduction -------------------- 5155 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5156 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5157 rFlagsReg cr) %{ 5158 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5159 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5160 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5161 Matcher::vector_length(n->in(2)) == 2); 5162 match(Set dst (MinReductionV src1 src2)); 5163 match(Set dst (MaxReductionV src1 src2)); 5164 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5165 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5166 ins_encode %{ 5167 assert(UseAVX > 0, "sanity"); 5168 5169 int opcode = this->ideal_Opcode(); 5170 int vlen = Matcher::vector_length(this, $src2); 5171 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5172 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5173 %} 5174 ins_pipe( pipe_slow ); 5175 %} 5176 5177 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5178 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5179 rFlagsReg cr) %{ 5180 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5181 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5182 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5183 Matcher::vector_length(n->in(2)) >= 4); 5184 match(Set dst (MinReductionV src1 src2)); 5185 match(Set dst (MaxReductionV src1 src2)); 5186 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5187 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5188 ins_encode %{ 5189 assert(UseAVX > 0, "sanity"); 5190 5191 int opcode = this->ideal_Opcode(); 5192 int vlen = Matcher::vector_length(this, $src2); 5193 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5194 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5195 %} 5196 ins_pipe( pipe_slow ); 5197 %} 5198 5199 5200 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5201 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5202 rFlagsReg cr) %{ 5203 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5204 Matcher::vector_length(n->in(2)) == 2); 5205 match(Set dst (MinReductionV dst src)); 5206 match(Set dst (MaxReductionV dst src)); 5207 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5208 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5209 ins_encode %{ 5210 assert(UseAVX > 0, "sanity"); 5211 5212 int opcode = this->ideal_Opcode(); 5213 int vlen = Matcher::vector_length(this, $src); 5214 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5215 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5216 %} 5217 ins_pipe( pipe_slow ); 5218 %} 5219 5220 instruct minmax_reductionD_av(legRegD dst, legVec src, 5221 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5222 rFlagsReg cr) %{ 5223 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5224 Matcher::vector_length(n->in(2)) >= 4); 5225 match(Set dst (MinReductionV dst src)); 5226 match(Set dst (MaxReductionV dst src)); 5227 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5228 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5229 ins_encode %{ 5230 assert(UseAVX > 0, "sanity"); 5231 5232 int opcode = this->ideal_Opcode(); 5233 int vlen = Matcher::vector_length(this, $src); 5234 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5235 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5236 %} 5237 ins_pipe( pipe_slow ); 5238 %} 5239 5240 // ====================VECTOR ARITHMETIC======================================= 5241 5242 // --------------------------------- ADD -------------------------------------- 5243 5244 // Bytes vector add 5245 instruct vaddB(vec dst, vec src) %{ 5246 predicate(UseAVX == 0); 5247 match(Set dst (AddVB dst src)); 5248 format %{ "paddb $dst,$src\t! add packedB" %} 5249 ins_encode %{ 5250 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5251 %} 5252 ins_pipe( pipe_slow ); 5253 %} 5254 5255 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5256 predicate(UseAVX > 0); 5257 match(Set dst (AddVB src1 src2)); 5258 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5259 ins_encode %{ 5260 int vlen_enc = vector_length_encoding(this); 5261 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5262 %} 5263 ins_pipe( pipe_slow ); 5264 %} 5265 5266 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5267 predicate((UseAVX > 0) && 5268 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5269 match(Set dst (AddVB src (LoadVector mem))); 5270 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5271 ins_encode %{ 5272 int vlen_enc = vector_length_encoding(this); 5273 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5274 %} 5275 ins_pipe( pipe_slow ); 5276 %} 5277 5278 // Shorts/Chars vector add 5279 instruct vaddS(vec dst, vec src) %{ 5280 predicate(UseAVX == 0); 5281 match(Set dst (AddVS dst src)); 5282 format %{ "paddw $dst,$src\t! add packedS" %} 5283 ins_encode %{ 5284 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5285 %} 5286 ins_pipe( pipe_slow ); 5287 %} 5288 5289 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5290 predicate(UseAVX > 0); 5291 match(Set dst (AddVS src1 src2)); 5292 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5293 ins_encode %{ 5294 int vlen_enc = vector_length_encoding(this); 5295 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5296 %} 5297 ins_pipe( pipe_slow ); 5298 %} 5299 5300 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5301 predicate((UseAVX > 0) && 5302 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5303 match(Set dst (AddVS src (LoadVector mem))); 5304 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5305 ins_encode %{ 5306 int vlen_enc = vector_length_encoding(this); 5307 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5308 %} 5309 ins_pipe( pipe_slow ); 5310 %} 5311 5312 // Integers vector add 5313 instruct vaddI(vec dst, vec src) %{ 5314 predicate(UseAVX == 0); 5315 match(Set dst (AddVI dst src)); 5316 format %{ "paddd $dst,$src\t! add packedI" %} 5317 ins_encode %{ 5318 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5319 %} 5320 ins_pipe( pipe_slow ); 5321 %} 5322 5323 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5324 predicate(UseAVX > 0); 5325 match(Set dst (AddVI src1 src2)); 5326 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5327 ins_encode %{ 5328 int vlen_enc = vector_length_encoding(this); 5329 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5330 %} 5331 ins_pipe( pipe_slow ); 5332 %} 5333 5334 5335 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5336 predicate((UseAVX > 0) && 5337 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5338 match(Set dst (AddVI src (LoadVector mem))); 5339 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5340 ins_encode %{ 5341 int vlen_enc = vector_length_encoding(this); 5342 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5343 %} 5344 ins_pipe( pipe_slow ); 5345 %} 5346 5347 // Longs vector add 5348 instruct vaddL(vec dst, vec src) %{ 5349 predicate(UseAVX == 0); 5350 match(Set dst (AddVL dst src)); 5351 format %{ "paddq $dst,$src\t! add packedL" %} 5352 ins_encode %{ 5353 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5354 %} 5355 ins_pipe( pipe_slow ); 5356 %} 5357 5358 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5359 predicate(UseAVX > 0); 5360 match(Set dst (AddVL src1 src2)); 5361 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5362 ins_encode %{ 5363 int vlen_enc = vector_length_encoding(this); 5364 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5365 %} 5366 ins_pipe( pipe_slow ); 5367 %} 5368 5369 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5370 predicate((UseAVX > 0) && 5371 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5372 match(Set dst (AddVL src (LoadVector mem))); 5373 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5374 ins_encode %{ 5375 int vlen_enc = vector_length_encoding(this); 5376 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5377 %} 5378 ins_pipe( pipe_slow ); 5379 %} 5380 5381 // Floats vector add 5382 instruct vaddF(vec dst, vec src) %{ 5383 predicate(UseAVX == 0); 5384 match(Set dst (AddVF dst src)); 5385 format %{ "addps $dst,$src\t! add packedF" %} 5386 ins_encode %{ 5387 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5388 %} 5389 ins_pipe( pipe_slow ); 5390 %} 5391 5392 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5393 predicate(UseAVX > 0); 5394 match(Set dst (AddVF src1 src2)); 5395 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5396 ins_encode %{ 5397 int vlen_enc = vector_length_encoding(this); 5398 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5399 %} 5400 ins_pipe( pipe_slow ); 5401 %} 5402 5403 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5404 predicate((UseAVX > 0) && 5405 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5406 match(Set dst (AddVF src (LoadVector mem))); 5407 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5408 ins_encode %{ 5409 int vlen_enc = vector_length_encoding(this); 5410 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5411 %} 5412 ins_pipe( pipe_slow ); 5413 %} 5414 5415 // Doubles vector add 5416 instruct vaddD(vec dst, vec src) %{ 5417 predicate(UseAVX == 0); 5418 match(Set dst (AddVD dst src)); 5419 format %{ "addpd $dst,$src\t! add packedD" %} 5420 ins_encode %{ 5421 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5422 %} 5423 ins_pipe( pipe_slow ); 5424 %} 5425 5426 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5427 predicate(UseAVX > 0); 5428 match(Set dst (AddVD src1 src2)); 5429 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5430 ins_encode %{ 5431 int vlen_enc = vector_length_encoding(this); 5432 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5433 %} 5434 ins_pipe( pipe_slow ); 5435 %} 5436 5437 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5438 predicate((UseAVX > 0) && 5439 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5440 match(Set dst (AddVD src (LoadVector mem))); 5441 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5442 ins_encode %{ 5443 int vlen_enc = vector_length_encoding(this); 5444 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5445 %} 5446 ins_pipe( pipe_slow ); 5447 %} 5448 5449 // --------------------------------- SUB -------------------------------------- 5450 5451 // Bytes vector sub 5452 instruct vsubB(vec dst, vec src) %{ 5453 predicate(UseAVX == 0); 5454 match(Set dst (SubVB dst src)); 5455 format %{ "psubb $dst,$src\t! sub packedB" %} 5456 ins_encode %{ 5457 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5458 %} 5459 ins_pipe( pipe_slow ); 5460 %} 5461 5462 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5463 predicate(UseAVX > 0); 5464 match(Set dst (SubVB src1 src2)); 5465 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5466 ins_encode %{ 5467 int vlen_enc = vector_length_encoding(this); 5468 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5469 %} 5470 ins_pipe( pipe_slow ); 5471 %} 5472 5473 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5474 predicate((UseAVX > 0) && 5475 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5476 match(Set dst (SubVB src (LoadVector mem))); 5477 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5478 ins_encode %{ 5479 int vlen_enc = vector_length_encoding(this); 5480 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5481 %} 5482 ins_pipe( pipe_slow ); 5483 %} 5484 5485 // Shorts/Chars vector sub 5486 instruct vsubS(vec dst, vec src) %{ 5487 predicate(UseAVX == 0); 5488 match(Set dst (SubVS dst src)); 5489 format %{ "psubw $dst,$src\t! sub packedS" %} 5490 ins_encode %{ 5491 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5492 %} 5493 ins_pipe( pipe_slow ); 5494 %} 5495 5496 5497 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5498 predicate(UseAVX > 0); 5499 match(Set dst (SubVS src1 src2)); 5500 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5501 ins_encode %{ 5502 int vlen_enc = vector_length_encoding(this); 5503 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5504 %} 5505 ins_pipe( pipe_slow ); 5506 %} 5507 5508 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5509 predicate((UseAVX > 0) && 5510 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5511 match(Set dst (SubVS src (LoadVector mem))); 5512 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5513 ins_encode %{ 5514 int vlen_enc = vector_length_encoding(this); 5515 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5516 %} 5517 ins_pipe( pipe_slow ); 5518 %} 5519 5520 // Integers vector sub 5521 instruct vsubI(vec dst, vec src) %{ 5522 predicate(UseAVX == 0); 5523 match(Set dst (SubVI dst src)); 5524 format %{ "psubd $dst,$src\t! sub packedI" %} 5525 ins_encode %{ 5526 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5527 %} 5528 ins_pipe( pipe_slow ); 5529 %} 5530 5531 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5532 predicate(UseAVX > 0); 5533 match(Set dst (SubVI src1 src2)); 5534 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5535 ins_encode %{ 5536 int vlen_enc = vector_length_encoding(this); 5537 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5538 %} 5539 ins_pipe( pipe_slow ); 5540 %} 5541 5542 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5543 predicate((UseAVX > 0) && 5544 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5545 match(Set dst (SubVI src (LoadVector mem))); 5546 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5547 ins_encode %{ 5548 int vlen_enc = vector_length_encoding(this); 5549 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5550 %} 5551 ins_pipe( pipe_slow ); 5552 %} 5553 5554 // Longs vector sub 5555 instruct vsubL(vec dst, vec src) %{ 5556 predicate(UseAVX == 0); 5557 match(Set dst (SubVL dst src)); 5558 format %{ "psubq $dst,$src\t! sub packedL" %} 5559 ins_encode %{ 5560 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5561 %} 5562 ins_pipe( pipe_slow ); 5563 %} 5564 5565 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5566 predicate(UseAVX > 0); 5567 match(Set dst (SubVL src1 src2)); 5568 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5569 ins_encode %{ 5570 int vlen_enc = vector_length_encoding(this); 5571 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5572 %} 5573 ins_pipe( pipe_slow ); 5574 %} 5575 5576 5577 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5578 predicate((UseAVX > 0) && 5579 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5580 match(Set dst (SubVL src (LoadVector mem))); 5581 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5582 ins_encode %{ 5583 int vlen_enc = vector_length_encoding(this); 5584 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5585 %} 5586 ins_pipe( pipe_slow ); 5587 %} 5588 5589 // Floats vector sub 5590 instruct vsubF(vec dst, vec src) %{ 5591 predicate(UseAVX == 0); 5592 match(Set dst (SubVF dst src)); 5593 format %{ "subps $dst,$src\t! sub packedF" %} 5594 ins_encode %{ 5595 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5596 %} 5597 ins_pipe( pipe_slow ); 5598 %} 5599 5600 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5601 predicate(UseAVX > 0); 5602 match(Set dst (SubVF src1 src2)); 5603 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5604 ins_encode %{ 5605 int vlen_enc = vector_length_encoding(this); 5606 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5607 %} 5608 ins_pipe( pipe_slow ); 5609 %} 5610 5611 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5612 predicate((UseAVX > 0) && 5613 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5614 match(Set dst (SubVF src (LoadVector mem))); 5615 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5616 ins_encode %{ 5617 int vlen_enc = vector_length_encoding(this); 5618 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5619 %} 5620 ins_pipe( pipe_slow ); 5621 %} 5622 5623 // Doubles vector sub 5624 instruct vsubD(vec dst, vec src) %{ 5625 predicate(UseAVX == 0); 5626 match(Set dst (SubVD dst src)); 5627 format %{ "subpd $dst,$src\t! sub packedD" %} 5628 ins_encode %{ 5629 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5630 %} 5631 ins_pipe( pipe_slow ); 5632 %} 5633 5634 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5635 predicate(UseAVX > 0); 5636 match(Set dst (SubVD src1 src2)); 5637 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5638 ins_encode %{ 5639 int vlen_enc = vector_length_encoding(this); 5640 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5641 %} 5642 ins_pipe( pipe_slow ); 5643 %} 5644 5645 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 5646 predicate((UseAVX > 0) && 5647 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5648 match(Set dst (SubVD src (LoadVector mem))); 5649 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 5650 ins_encode %{ 5651 int vlen_enc = vector_length_encoding(this); 5652 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5653 %} 5654 ins_pipe( pipe_slow ); 5655 %} 5656 5657 // --------------------------------- MUL -------------------------------------- 5658 5659 // Byte vector mul 5660 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 5661 predicate(Matcher::vector_length_in_bytes(n) <= 8); 5662 match(Set dst (MulVB src1 src2)); 5663 effect(TEMP dst, TEMP xtmp); 5664 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5665 ins_encode %{ 5666 assert(UseSSE > 3, "required"); 5667 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 5668 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 5669 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5670 __ psllw($dst$$XMMRegister, 8); 5671 __ psrlw($dst$$XMMRegister, 8); 5672 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5673 %} 5674 ins_pipe( pipe_slow ); 5675 %} 5676 5677 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 5678 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 5679 match(Set dst (MulVB src1 src2)); 5680 effect(TEMP dst, TEMP xtmp); 5681 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5682 ins_encode %{ 5683 assert(UseSSE > 3, "required"); 5684 // Odd-index elements 5685 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 5686 __ psrlw($dst$$XMMRegister, 8); 5687 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 5688 __ psrlw($xtmp$$XMMRegister, 8); 5689 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5690 __ psllw($dst$$XMMRegister, 8); 5691 // Even-index elements 5692 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 5693 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 5694 __ psllw($xtmp$$XMMRegister, 8); 5695 __ psrlw($xtmp$$XMMRegister, 8); 5696 // Combine 5697 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 5698 %} 5699 ins_pipe( pipe_slow ); 5700 %} 5701 5702 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 5703 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 5704 match(Set dst (MulVB src1 src2)); 5705 effect(TEMP xtmp1, TEMP xtmp2); 5706 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 5707 ins_encode %{ 5708 int vlen_enc = vector_length_encoding(this); 5709 // Odd-index elements 5710 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 5711 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 5712 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5713 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 5714 // Even-index elements 5715 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5716 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5717 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5718 // Combine 5719 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5720 %} 5721 ins_pipe( pipe_slow ); 5722 %} 5723 5724 // Shorts/Chars vector mul 5725 instruct vmulS(vec dst, vec src) %{ 5726 predicate(UseAVX == 0); 5727 match(Set dst (MulVS dst src)); 5728 format %{ "pmullw $dst,$src\t! mul packedS" %} 5729 ins_encode %{ 5730 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 5731 %} 5732 ins_pipe( pipe_slow ); 5733 %} 5734 5735 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 5736 predicate(UseAVX > 0); 5737 match(Set dst (MulVS src1 src2)); 5738 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 5739 ins_encode %{ 5740 int vlen_enc = vector_length_encoding(this); 5741 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5742 %} 5743 ins_pipe( pipe_slow ); 5744 %} 5745 5746 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 5747 predicate((UseAVX > 0) && 5748 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5749 match(Set dst (MulVS src (LoadVector mem))); 5750 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 5751 ins_encode %{ 5752 int vlen_enc = vector_length_encoding(this); 5753 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5754 %} 5755 ins_pipe( pipe_slow ); 5756 %} 5757 5758 // Integers vector mul 5759 instruct vmulI(vec dst, vec src) %{ 5760 predicate(UseAVX == 0); 5761 match(Set dst (MulVI dst src)); 5762 format %{ "pmulld $dst,$src\t! mul packedI" %} 5763 ins_encode %{ 5764 assert(UseSSE > 3, "required"); 5765 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 5766 %} 5767 ins_pipe( pipe_slow ); 5768 %} 5769 5770 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 5771 predicate(UseAVX > 0); 5772 match(Set dst (MulVI src1 src2)); 5773 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 5774 ins_encode %{ 5775 int vlen_enc = vector_length_encoding(this); 5776 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5777 %} 5778 ins_pipe( pipe_slow ); 5779 %} 5780 5781 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 5782 predicate((UseAVX > 0) && 5783 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5784 match(Set dst (MulVI src (LoadVector mem))); 5785 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 5786 ins_encode %{ 5787 int vlen_enc = vector_length_encoding(this); 5788 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5789 %} 5790 ins_pipe( pipe_slow ); 5791 %} 5792 5793 // Longs vector mul 5794 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 5795 predicate((Matcher::vector_length_in_bytes(n) == 64 && 5796 VM_Version::supports_avx512dq()) || 5797 VM_Version::supports_avx512vldq()); 5798 match(Set dst (MulVL src1 src2)); 5799 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 5800 ins_encode %{ 5801 assert(UseAVX > 2, "required"); 5802 int vlen_enc = vector_length_encoding(this); 5803 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5804 %} 5805 ins_pipe( pipe_slow ); 5806 %} 5807 5808 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 5809 predicate((Matcher::vector_length_in_bytes(n) == 64 && 5810 VM_Version::supports_avx512dq()) || 5811 (Matcher::vector_length_in_bytes(n) > 8 && 5812 VM_Version::supports_avx512vldq())); 5813 match(Set dst (MulVL src (LoadVector mem))); 5814 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 5815 ins_encode %{ 5816 assert(UseAVX > 2, "required"); 5817 int vlen_enc = vector_length_encoding(this); 5818 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5819 %} 5820 ins_pipe( pipe_slow ); 5821 %} 5822 5823 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 5824 predicate(UseAVX == 0); 5825 match(Set dst (MulVL src1 src2)); 5826 effect(TEMP dst, TEMP xtmp); 5827 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5828 ins_encode %{ 5829 assert(VM_Version::supports_sse4_1(), "required"); 5830 // Get the lo-hi products, only the lower 32 bits is in concerns 5831 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 5832 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 5833 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 5834 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 5835 __ psllq($dst$$XMMRegister, 32); 5836 // Get the lo-lo products 5837 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 5838 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 5839 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 5840 %} 5841 ins_pipe( pipe_slow ); 5842 %} 5843 5844 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 5845 predicate(UseAVX > 0 && 5846 ((Matcher::vector_length_in_bytes(n) == 64 && 5847 !VM_Version::supports_avx512dq()) || 5848 (Matcher::vector_length_in_bytes(n) < 64 && 5849 !VM_Version::supports_avx512vldq()))); 5850 match(Set dst (MulVL src1 src2)); 5851 effect(TEMP xtmp1, TEMP xtmp2); 5852 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 5853 ins_encode %{ 5854 int vlen_enc = vector_length_encoding(this); 5855 // Get the lo-hi products, only the lower 32 bits is in concerns 5856 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 5857 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 5858 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 5859 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 5860 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 5861 // Get the lo-lo products 5862 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5863 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5864 %} 5865 ins_pipe( pipe_slow ); 5866 %} 5867 5868 // Floats vector mul 5869 instruct vmulF(vec dst, vec src) %{ 5870 predicate(UseAVX == 0); 5871 match(Set dst (MulVF dst src)); 5872 format %{ "mulps $dst,$src\t! mul packedF" %} 5873 ins_encode %{ 5874 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 5875 %} 5876 ins_pipe( pipe_slow ); 5877 %} 5878 5879 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 5880 predicate(UseAVX > 0); 5881 match(Set dst (MulVF src1 src2)); 5882 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 5883 ins_encode %{ 5884 int vlen_enc = vector_length_encoding(this); 5885 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5886 %} 5887 ins_pipe( pipe_slow ); 5888 %} 5889 5890 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 5891 predicate((UseAVX > 0) && 5892 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5893 match(Set dst (MulVF src (LoadVector mem))); 5894 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 5895 ins_encode %{ 5896 int vlen_enc = vector_length_encoding(this); 5897 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5898 %} 5899 ins_pipe( pipe_slow ); 5900 %} 5901 5902 // Doubles vector mul 5903 instruct vmulD(vec dst, vec src) %{ 5904 predicate(UseAVX == 0); 5905 match(Set dst (MulVD dst src)); 5906 format %{ "mulpd $dst,$src\t! mul packedD" %} 5907 ins_encode %{ 5908 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 5909 %} 5910 ins_pipe( pipe_slow ); 5911 %} 5912 5913 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 5914 predicate(UseAVX > 0); 5915 match(Set dst (MulVD src1 src2)); 5916 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 5917 ins_encode %{ 5918 int vlen_enc = vector_length_encoding(this); 5919 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5920 %} 5921 ins_pipe( pipe_slow ); 5922 %} 5923 5924 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 5925 predicate((UseAVX > 0) && 5926 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5927 match(Set dst (MulVD src (LoadVector mem))); 5928 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 5929 ins_encode %{ 5930 int vlen_enc = vector_length_encoding(this); 5931 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5932 %} 5933 ins_pipe( pipe_slow ); 5934 %} 5935 5936 // --------------------------------- DIV -------------------------------------- 5937 5938 // Floats vector div 5939 instruct vdivF(vec dst, vec src) %{ 5940 predicate(UseAVX == 0); 5941 match(Set dst (DivVF dst src)); 5942 format %{ "divps $dst,$src\t! div packedF" %} 5943 ins_encode %{ 5944 __ divps($dst$$XMMRegister, $src$$XMMRegister); 5945 %} 5946 ins_pipe( pipe_slow ); 5947 %} 5948 5949 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 5950 predicate(UseAVX > 0); 5951 match(Set dst (DivVF src1 src2)); 5952 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 5953 ins_encode %{ 5954 int vlen_enc = vector_length_encoding(this); 5955 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5956 %} 5957 ins_pipe( pipe_slow ); 5958 %} 5959 5960 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 5961 predicate((UseAVX > 0) && 5962 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5963 match(Set dst (DivVF src (LoadVector mem))); 5964 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 5965 ins_encode %{ 5966 int vlen_enc = vector_length_encoding(this); 5967 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5968 %} 5969 ins_pipe( pipe_slow ); 5970 %} 5971 5972 // Doubles vector div 5973 instruct vdivD(vec dst, vec src) %{ 5974 predicate(UseAVX == 0); 5975 match(Set dst (DivVD dst src)); 5976 format %{ "divpd $dst,$src\t! div packedD" %} 5977 ins_encode %{ 5978 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 5979 %} 5980 ins_pipe( pipe_slow ); 5981 %} 5982 5983 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 5984 predicate(UseAVX > 0); 5985 match(Set dst (DivVD src1 src2)); 5986 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 5987 ins_encode %{ 5988 int vlen_enc = vector_length_encoding(this); 5989 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5990 %} 5991 ins_pipe( pipe_slow ); 5992 %} 5993 5994 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 5995 predicate((UseAVX > 0) && 5996 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5997 match(Set dst (DivVD src (LoadVector mem))); 5998 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 5999 ins_encode %{ 6000 int vlen_enc = vector_length_encoding(this); 6001 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6002 %} 6003 ins_pipe( pipe_slow ); 6004 %} 6005 6006 // ------------------------------ MinMax --------------------------------------- 6007 6008 // Byte, Short, Int vector Min/Max 6009 instruct minmax_reg_sse(vec dst, vec src) %{ 6010 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6011 UseAVX == 0); 6012 match(Set dst (MinV dst src)); 6013 match(Set dst (MaxV dst src)); 6014 format %{ "vector_minmax $dst,$src\t! " %} 6015 ins_encode %{ 6016 assert(UseSSE >= 4, "required"); 6017 6018 int opcode = this->ideal_Opcode(); 6019 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6020 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6021 %} 6022 ins_pipe( pipe_slow ); 6023 %} 6024 6025 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6026 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6027 UseAVX > 0); 6028 match(Set dst (MinV src1 src2)); 6029 match(Set dst (MaxV src1 src2)); 6030 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6031 ins_encode %{ 6032 int opcode = this->ideal_Opcode(); 6033 int vlen_enc = vector_length_encoding(this); 6034 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6035 6036 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6037 %} 6038 ins_pipe( pipe_slow ); 6039 %} 6040 6041 // Long vector Min/Max 6042 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6043 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6044 UseAVX == 0); 6045 match(Set dst (MinV dst src)); 6046 match(Set dst (MaxV src dst)); 6047 effect(TEMP dst, TEMP tmp); 6048 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6049 ins_encode %{ 6050 assert(UseSSE >= 4, "required"); 6051 6052 int opcode = this->ideal_Opcode(); 6053 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6054 assert(elem_bt == T_LONG, "sanity"); 6055 6056 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6057 %} 6058 ins_pipe( pipe_slow ); 6059 %} 6060 6061 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6062 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6063 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6064 match(Set dst (MinV src1 src2)); 6065 match(Set dst (MaxV src1 src2)); 6066 effect(TEMP dst); 6067 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6068 ins_encode %{ 6069 int vlen_enc = vector_length_encoding(this); 6070 int opcode = this->ideal_Opcode(); 6071 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6072 assert(elem_bt == T_LONG, "sanity"); 6073 6074 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6075 %} 6076 ins_pipe( pipe_slow ); 6077 %} 6078 6079 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6080 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6081 Matcher::vector_element_basic_type(n) == T_LONG); 6082 match(Set dst (MinV src1 src2)); 6083 match(Set dst (MaxV src1 src2)); 6084 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6085 ins_encode %{ 6086 assert(UseAVX > 2, "required"); 6087 6088 int vlen_enc = vector_length_encoding(this); 6089 int opcode = this->ideal_Opcode(); 6090 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6091 assert(elem_bt == T_LONG, "sanity"); 6092 6093 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6094 %} 6095 ins_pipe( pipe_slow ); 6096 %} 6097 6098 // Float/Double vector Min/Max 6099 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6100 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6101 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6102 UseAVX > 0); 6103 match(Set dst (MinV a b)); 6104 match(Set dst (MaxV a b)); 6105 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6106 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6107 ins_encode %{ 6108 assert(UseAVX > 0, "required"); 6109 6110 int opcode = this->ideal_Opcode(); 6111 int vlen_enc = vector_length_encoding(this); 6112 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6113 6114 __ vminmax_fp(opcode, elem_bt, 6115 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6116 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6117 %} 6118 ins_pipe( pipe_slow ); 6119 %} 6120 6121 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6122 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6123 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6124 match(Set dst (MinV a b)); 6125 match(Set dst (MaxV a b)); 6126 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6127 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6128 ins_encode %{ 6129 assert(UseAVX > 2, "required"); 6130 6131 int opcode = this->ideal_Opcode(); 6132 int vlen_enc = vector_length_encoding(this); 6133 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6134 6135 __ evminmax_fp(opcode, elem_bt, 6136 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6137 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6138 %} 6139 ins_pipe( pipe_slow ); 6140 %} 6141 6142 // --------------------------------- Signum/CopySign --------------------------- 6143 6144 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6145 match(Set dst (SignumF dst (Binary zero one))); 6146 effect(KILL cr); 6147 format %{ "signumF $dst, $dst" %} 6148 ins_encode %{ 6149 int opcode = this->ideal_Opcode(); 6150 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6151 %} 6152 ins_pipe( pipe_slow ); 6153 %} 6154 6155 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6156 match(Set dst (SignumD dst (Binary zero one))); 6157 effect(KILL cr); 6158 format %{ "signumD $dst, $dst" %} 6159 ins_encode %{ 6160 int opcode = this->ideal_Opcode(); 6161 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6162 %} 6163 ins_pipe( pipe_slow ); 6164 %} 6165 6166 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6167 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6168 match(Set dst (SignumVF src (Binary zero one))); 6169 match(Set dst (SignumVD src (Binary zero one))); 6170 effect(TEMP dst, TEMP xtmp1); 6171 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6172 ins_encode %{ 6173 int opcode = this->ideal_Opcode(); 6174 int vec_enc = vector_length_encoding(this); 6175 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6176 $xtmp1$$XMMRegister, vec_enc); 6177 %} 6178 ins_pipe( pipe_slow ); 6179 %} 6180 6181 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6182 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6183 match(Set dst (SignumVF src (Binary zero one))); 6184 match(Set dst (SignumVD src (Binary zero one))); 6185 effect(TEMP dst, TEMP ktmp1); 6186 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6187 ins_encode %{ 6188 int opcode = this->ideal_Opcode(); 6189 int vec_enc = vector_length_encoding(this); 6190 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6191 $ktmp1$$KRegister, vec_enc); 6192 %} 6193 ins_pipe( pipe_slow ); 6194 %} 6195 6196 // --------------------------------------- 6197 // For copySign use 0xE4 as writemask for vpternlog 6198 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6199 // C (xmm2) is set to 0x7FFFFFFF 6200 // Wherever xmm2 is 0, we want to pick from B (sign) 6201 // Wherever xmm2 is 1, we want to pick from A (src) 6202 // 6203 // A B C Result 6204 // 0 0 0 0 6205 // 0 0 1 0 6206 // 0 1 0 1 6207 // 0 1 1 0 6208 // 1 0 0 0 6209 // 1 0 1 1 6210 // 1 1 0 1 6211 // 1 1 1 1 6212 // 6213 // Result going from high bit to low bit is 0x11100100 = 0xe4 6214 // --------------------------------------- 6215 6216 #ifdef _LP64 6217 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6218 match(Set dst (CopySignF dst src)); 6219 effect(TEMP tmp1, TEMP tmp2); 6220 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6221 ins_encode %{ 6222 __ movl($tmp2$$Register, 0x7FFFFFFF); 6223 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6224 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6225 %} 6226 ins_pipe( pipe_slow ); 6227 %} 6228 6229 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6230 match(Set dst (CopySignD dst (Binary src zero))); 6231 ins_cost(100); 6232 effect(TEMP tmp1, TEMP tmp2); 6233 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6234 ins_encode %{ 6235 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6236 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6237 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6238 %} 6239 ins_pipe( pipe_slow ); 6240 %} 6241 6242 #endif // _LP64 6243 6244 //----------------------------- CompressBits/ExpandBits ------------------------ 6245 6246 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6247 predicate(n->bottom_type()->isa_int()); 6248 match(Set dst (CompressBits src mask)); 6249 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6250 ins_encode %{ 6251 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6252 %} 6253 ins_pipe( pipe_slow ); 6254 %} 6255 6256 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6257 predicate(n->bottom_type()->isa_int()); 6258 match(Set dst (ExpandBits src mask)); 6259 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6260 ins_encode %{ 6261 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6262 %} 6263 ins_pipe( pipe_slow ); 6264 %} 6265 6266 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6267 predicate(n->bottom_type()->isa_int()); 6268 match(Set dst (CompressBits src (LoadI mask))); 6269 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6270 ins_encode %{ 6271 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6272 %} 6273 ins_pipe( pipe_slow ); 6274 %} 6275 6276 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6277 predicate(n->bottom_type()->isa_int()); 6278 match(Set dst (ExpandBits src (LoadI mask))); 6279 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6280 ins_encode %{ 6281 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6282 %} 6283 ins_pipe( pipe_slow ); 6284 %} 6285 6286 // --------------------------------- Sqrt -------------------------------------- 6287 6288 instruct vsqrtF_reg(vec dst, vec src) %{ 6289 match(Set dst (SqrtVF src)); 6290 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6291 ins_encode %{ 6292 assert(UseAVX > 0, "required"); 6293 int vlen_enc = vector_length_encoding(this); 6294 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6295 %} 6296 ins_pipe( pipe_slow ); 6297 %} 6298 6299 instruct vsqrtF_mem(vec dst, memory mem) %{ 6300 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6301 match(Set dst (SqrtVF (LoadVector mem))); 6302 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6303 ins_encode %{ 6304 assert(UseAVX > 0, "required"); 6305 int vlen_enc = vector_length_encoding(this); 6306 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6307 %} 6308 ins_pipe( pipe_slow ); 6309 %} 6310 6311 // Floating point vector sqrt 6312 instruct vsqrtD_reg(vec dst, vec src) %{ 6313 match(Set dst (SqrtVD src)); 6314 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6315 ins_encode %{ 6316 assert(UseAVX > 0, "required"); 6317 int vlen_enc = vector_length_encoding(this); 6318 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6319 %} 6320 ins_pipe( pipe_slow ); 6321 %} 6322 6323 instruct vsqrtD_mem(vec dst, memory mem) %{ 6324 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6325 match(Set dst (SqrtVD (LoadVector mem))); 6326 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6327 ins_encode %{ 6328 assert(UseAVX > 0, "required"); 6329 int vlen_enc = vector_length_encoding(this); 6330 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6331 %} 6332 ins_pipe( pipe_slow ); 6333 %} 6334 6335 // ------------------------------ Shift --------------------------------------- 6336 6337 // Left and right shift count vectors are the same on x86 6338 // (only lowest bits of xmm reg are used for count). 6339 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6340 match(Set dst (LShiftCntV cnt)); 6341 match(Set dst (RShiftCntV cnt)); 6342 format %{ "movdl $dst,$cnt\t! load shift count" %} 6343 ins_encode %{ 6344 __ movdl($dst$$XMMRegister, $cnt$$Register); 6345 %} 6346 ins_pipe( pipe_slow ); 6347 %} 6348 6349 // Byte vector shift 6350 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6351 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6352 match(Set dst ( LShiftVB src shift)); 6353 match(Set dst ( RShiftVB src shift)); 6354 match(Set dst (URShiftVB src shift)); 6355 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6356 format %{"vector_byte_shift $dst,$src,$shift" %} 6357 ins_encode %{ 6358 assert(UseSSE > 3, "required"); 6359 int opcode = this->ideal_Opcode(); 6360 bool sign = (opcode != Op_URShiftVB); 6361 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6362 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6363 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6364 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6365 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6366 %} 6367 ins_pipe( pipe_slow ); 6368 %} 6369 6370 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6371 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6372 UseAVX <= 1); 6373 match(Set dst ( LShiftVB src shift)); 6374 match(Set dst ( RShiftVB src shift)); 6375 match(Set dst (URShiftVB src shift)); 6376 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6377 format %{"vector_byte_shift $dst,$src,$shift" %} 6378 ins_encode %{ 6379 assert(UseSSE > 3, "required"); 6380 int opcode = this->ideal_Opcode(); 6381 bool sign = (opcode != Op_URShiftVB); 6382 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6383 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6384 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6385 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6386 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6387 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6388 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6389 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6390 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6391 %} 6392 ins_pipe( pipe_slow ); 6393 %} 6394 6395 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6396 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6397 UseAVX > 1); 6398 match(Set dst ( LShiftVB src shift)); 6399 match(Set dst ( RShiftVB src shift)); 6400 match(Set dst (URShiftVB src shift)); 6401 effect(TEMP dst, TEMP tmp); 6402 format %{"vector_byte_shift $dst,$src,$shift" %} 6403 ins_encode %{ 6404 int opcode = this->ideal_Opcode(); 6405 bool sign = (opcode != Op_URShiftVB); 6406 int vlen_enc = Assembler::AVX_256bit; 6407 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6408 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6409 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6410 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6411 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6412 %} 6413 ins_pipe( pipe_slow ); 6414 %} 6415 6416 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6417 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6418 match(Set dst ( LShiftVB src shift)); 6419 match(Set dst ( RShiftVB src shift)); 6420 match(Set dst (URShiftVB src shift)); 6421 effect(TEMP dst, TEMP tmp); 6422 format %{"vector_byte_shift $dst,$src,$shift" %} 6423 ins_encode %{ 6424 assert(UseAVX > 1, "required"); 6425 int opcode = this->ideal_Opcode(); 6426 bool sign = (opcode != Op_URShiftVB); 6427 int vlen_enc = Assembler::AVX_256bit; 6428 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6429 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6430 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6431 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6432 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6433 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6434 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6435 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6436 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6437 %} 6438 ins_pipe( pipe_slow ); 6439 %} 6440 6441 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6442 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6443 match(Set dst ( LShiftVB src shift)); 6444 match(Set dst (RShiftVB src shift)); 6445 match(Set dst (URShiftVB src shift)); 6446 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6447 format %{"vector_byte_shift $dst,$src,$shift" %} 6448 ins_encode %{ 6449 assert(UseAVX > 2, "required"); 6450 int opcode = this->ideal_Opcode(); 6451 bool sign = (opcode != Op_URShiftVB); 6452 int vlen_enc = Assembler::AVX_512bit; 6453 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6454 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6455 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6456 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6457 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6458 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6459 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6460 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6461 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6462 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6463 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 6464 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6465 %} 6466 ins_pipe( pipe_slow ); 6467 %} 6468 6469 // Shorts vector logical right shift produces incorrect Java result 6470 // for negative data because java code convert short value into int with 6471 // sign extension before a shift. But char vectors are fine since chars are 6472 // unsigned values. 6473 // Shorts/Chars vector left shift 6474 instruct vshiftS(vec dst, vec src, vec shift) %{ 6475 predicate(!n->as_ShiftV()->is_var_shift()); 6476 match(Set dst ( LShiftVS src shift)); 6477 match(Set dst ( RShiftVS src shift)); 6478 match(Set dst (URShiftVS src shift)); 6479 effect(TEMP dst, USE src, USE shift); 6480 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6481 ins_encode %{ 6482 int opcode = this->ideal_Opcode(); 6483 if (UseAVX > 0) { 6484 int vlen_enc = vector_length_encoding(this); 6485 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6486 } else { 6487 int vlen = Matcher::vector_length(this); 6488 if (vlen == 2) { 6489 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6490 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6491 } else if (vlen == 4) { 6492 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6493 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6494 } else { 6495 assert (vlen == 8, "sanity"); 6496 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6497 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6498 } 6499 } 6500 %} 6501 ins_pipe( pipe_slow ); 6502 %} 6503 6504 // Integers vector left shift 6505 instruct vshiftI(vec dst, vec src, vec shift) %{ 6506 predicate(!n->as_ShiftV()->is_var_shift()); 6507 match(Set dst ( LShiftVI src shift)); 6508 match(Set dst ( RShiftVI src shift)); 6509 match(Set dst (URShiftVI src shift)); 6510 effect(TEMP dst, USE src, USE shift); 6511 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6512 ins_encode %{ 6513 int opcode = this->ideal_Opcode(); 6514 if (UseAVX > 0) { 6515 int vlen_enc = vector_length_encoding(this); 6516 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6517 } else { 6518 int vlen = Matcher::vector_length(this); 6519 if (vlen == 2) { 6520 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6521 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6522 } else { 6523 assert(vlen == 4, "sanity"); 6524 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6525 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6526 } 6527 } 6528 %} 6529 ins_pipe( pipe_slow ); 6530 %} 6531 6532 // Integers vector left constant shift 6533 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6534 match(Set dst (LShiftVI src (LShiftCntV shift))); 6535 match(Set dst (RShiftVI src (RShiftCntV shift))); 6536 match(Set dst (URShiftVI src (RShiftCntV shift))); 6537 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6538 ins_encode %{ 6539 int opcode = this->ideal_Opcode(); 6540 if (UseAVX > 0) { 6541 int vector_len = vector_length_encoding(this); 6542 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6543 } else { 6544 int vlen = Matcher::vector_length(this); 6545 if (vlen == 2) { 6546 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6547 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6548 } else { 6549 assert(vlen == 4, "sanity"); 6550 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6551 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6552 } 6553 } 6554 %} 6555 ins_pipe( pipe_slow ); 6556 %} 6557 6558 // Longs vector shift 6559 instruct vshiftL(vec dst, vec src, vec shift) %{ 6560 predicate(!n->as_ShiftV()->is_var_shift()); 6561 match(Set dst ( LShiftVL src shift)); 6562 match(Set dst (URShiftVL src shift)); 6563 effect(TEMP dst, USE src, USE shift); 6564 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6565 ins_encode %{ 6566 int opcode = this->ideal_Opcode(); 6567 if (UseAVX > 0) { 6568 int vlen_enc = vector_length_encoding(this); 6569 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6570 } else { 6571 assert(Matcher::vector_length(this) == 2, ""); 6572 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6573 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6574 } 6575 %} 6576 ins_pipe( pipe_slow ); 6577 %} 6578 6579 // Longs vector constant shift 6580 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 6581 match(Set dst (LShiftVL src (LShiftCntV shift))); 6582 match(Set dst (URShiftVL src (RShiftCntV shift))); 6583 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 6584 ins_encode %{ 6585 int opcode = this->ideal_Opcode(); 6586 if (UseAVX > 0) { 6587 int vector_len = vector_length_encoding(this); 6588 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6589 } else { 6590 assert(Matcher::vector_length(this) == 2, ""); 6591 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6592 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6593 } 6594 %} 6595 ins_pipe( pipe_slow ); 6596 %} 6597 6598 // -------------------ArithmeticRightShift ----------------------------------- 6599 // Long vector arithmetic right shift 6600 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 6601 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 6602 match(Set dst (RShiftVL src shift)); 6603 effect(TEMP dst, TEMP tmp); 6604 format %{ "vshiftq $dst,$src,$shift" %} 6605 ins_encode %{ 6606 uint vlen = Matcher::vector_length(this); 6607 if (vlen == 2) { 6608 assert(UseSSE >= 2, "required"); 6609 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6610 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6611 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6612 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6613 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6614 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 6615 } else { 6616 assert(vlen == 4, "sanity"); 6617 assert(UseAVX > 1, "required"); 6618 int vlen_enc = Assembler::AVX_256bit; 6619 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6620 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6621 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6622 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6623 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6624 } 6625 %} 6626 ins_pipe( pipe_slow ); 6627 %} 6628 6629 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 6630 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 6631 match(Set dst (RShiftVL src shift)); 6632 format %{ "vshiftq $dst,$src,$shift" %} 6633 ins_encode %{ 6634 int vlen_enc = vector_length_encoding(this); 6635 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6636 %} 6637 ins_pipe( pipe_slow ); 6638 %} 6639 6640 // ------------------- Variable Shift ----------------------------- 6641 // Byte variable shift 6642 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 6643 predicate(Matcher::vector_length(n) <= 8 && 6644 n->as_ShiftV()->is_var_shift() && 6645 !VM_Version::supports_avx512bw()); 6646 match(Set dst ( LShiftVB src shift)); 6647 match(Set dst ( RShiftVB src shift)); 6648 match(Set dst (URShiftVB src shift)); 6649 effect(TEMP dst, TEMP vtmp); 6650 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 6651 ins_encode %{ 6652 assert(UseAVX >= 2, "required"); 6653 6654 int opcode = this->ideal_Opcode(); 6655 int vlen_enc = Assembler::AVX_128bit; 6656 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 6657 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 6658 %} 6659 ins_pipe( pipe_slow ); 6660 %} 6661 6662 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 6663 predicate(Matcher::vector_length(n) == 16 && 6664 n->as_ShiftV()->is_var_shift() && 6665 !VM_Version::supports_avx512bw()); 6666 match(Set dst ( LShiftVB src shift)); 6667 match(Set dst ( RShiftVB src shift)); 6668 match(Set dst (URShiftVB src shift)); 6669 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 6670 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 6671 ins_encode %{ 6672 assert(UseAVX >= 2, "required"); 6673 6674 int opcode = this->ideal_Opcode(); 6675 int vlen_enc = Assembler::AVX_128bit; 6676 // Shift lower half and get word result in dst 6677 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 6678 6679 // Shift upper half and get word result in vtmp1 6680 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6681 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6682 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6683 6684 // Merge and down convert the two word results to byte in dst 6685 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6686 %} 6687 ins_pipe( pipe_slow ); 6688 %} 6689 6690 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 6691 predicate(Matcher::vector_length(n) == 32 && 6692 n->as_ShiftV()->is_var_shift() && 6693 !VM_Version::supports_avx512bw()); 6694 match(Set dst ( LShiftVB src shift)); 6695 match(Set dst ( RShiftVB src shift)); 6696 match(Set dst (URShiftVB src shift)); 6697 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 6698 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 6699 ins_encode %{ 6700 assert(UseAVX >= 2, "required"); 6701 6702 int opcode = this->ideal_Opcode(); 6703 int vlen_enc = Assembler::AVX_128bit; 6704 // Process lower 128 bits and get result in dst 6705 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 6706 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6707 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6708 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6709 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6710 6711 // Process higher 128 bits and get result in vtmp3 6712 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6713 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6714 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 6715 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 6716 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 6717 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6718 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 6719 6720 // Merge the two results in dst 6721 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6722 %} 6723 ins_pipe( pipe_slow ); 6724 %} 6725 6726 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 6727 predicate(Matcher::vector_length(n) <= 32 && 6728 n->as_ShiftV()->is_var_shift() && 6729 VM_Version::supports_avx512bw()); 6730 match(Set dst ( LShiftVB src shift)); 6731 match(Set dst ( RShiftVB src shift)); 6732 match(Set dst (URShiftVB src shift)); 6733 effect(TEMP dst, TEMP vtmp); 6734 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 6735 ins_encode %{ 6736 assert(UseAVX > 2, "required"); 6737 6738 int opcode = this->ideal_Opcode(); 6739 int vlen_enc = vector_length_encoding(this); 6740 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 6741 %} 6742 ins_pipe( pipe_slow ); 6743 %} 6744 6745 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 6746 predicate(Matcher::vector_length(n) == 64 && 6747 n->as_ShiftV()->is_var_shift() && 6748 VM_Version::supports_avx512bw()); 6749 match(Set dst ( LShiftVB src shift)); 6750 match(Set dst ( RShiftVB src shift)); 6751 match(Set dst (URShiftVB src shift)); 6752 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 6753 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 6754 ins_encode %{ 6755 assert(UseAVX > 2, "required"); 6756 6757 int opcode = this->ideal_Opcode(); 6758 int vlen_enc = Assembler::AVX_256bit; 6759 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 6760 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6761 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6762 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6763 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6764 %} 6765 ins_pipe( pipe_slow ); 6766 %} 6767 6768 // Short variable shift 6769 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 6770 predicate(Matcher::vector_length(n) <= 8 && 6771 n->as_ShiftV()->is_var_shift() && 6772 !VM_Version::supports_avx512bw()); 6773 match(Set dst ( LShiftVS src shift)); 6774 match(Set dst ( RShiftVS src shift)); 6775 match(Set dst (URShiftVS src shift)); 6776 effect(TEMP dst, TEMP vtmp); 6777 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6778 ins_encode %{ 6779 assert(UseAVX >= 2, "required"); 6780 6781 int opcode = this->ideal_Opcode(); 6782 bool sign = (opcode != Op_URShiftVS); 6783 int vlen_enc = Assembler::AVX_256bit; 6784 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 6785 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 6786 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 6787 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 6788 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 6789 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 6790 %} 6791 ins_pipe( pipe_slow ); 6792 %} 6793 6794 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 6795 predicate(Matcher::vector_length(n) == 16 && 6796 n->as_ShiftV()->is_var_shift() && 6797 !VM_Version::supports_avx512bw()); 6798 match(Set dst ( LShiftVS src shift)); 6799 match(Set dst ( RShiftVS src shift)); 6800 match(Set dst (URShiftVS src shift)); 6801 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 6802 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6803 ins_encode %{ 6804 assert(UseAVX >= 2, "required"); 6805 6806 int opcode = this->ideal_Opcode(); 6807 bool sign = (opcode != Op_URShiftVS); 6808 int vlen_enc = Assembler::AVX_256bit; 6809 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 6810 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6811 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6812 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6813 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 6814 6815 // Shift upper half, with result in dst using vtmp1 as TEMP 6816 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 6817 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 6818 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6819 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6820 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6821 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 6822 6823 // Merge lower and upper half result into dst 6824 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6825 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6826 %} 6827 ins_pipe( pipe_slow ); 6828 %} 6829 6830 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 6831 predicate(n->as_ShiftV()->is_var_shift() && 6832 VM_Version::supports_avx512bw()); 6833 match(Set dst ( LShiftVS src shift)); 6834 match(Set dst ( RShiftVS src shift)); 6835 match(Set dst (URShiftVS src shift)); 6836 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 6837 ins_encode %{ 6838 assert(UseAVX > 2, "required"); 6839 6840 int opcode = this->ideal_Opcode(); 6841 int vlen_enc = vector_length_encoding(this); 6842 if (!VM_Version::supports_avx512vl()) { 6843 vlen_enc = Assembler::AVX_512bit; 6844 } 6845 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6846 %} 6847 ins_pipe( pipe_slow ); 6848 %} 6849 6850 //Integer variable shift 6851 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 6852 predicate(n->as_ShiftV()->is_var_shift()); 6853 match(Set dst ( LShiftVI src shift)); 6854 match(Set dst ( RShiftVI src shift)); 6855 match(Set dst (URShiftVI src shift)); 6856 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 6857 ins_encode %{ 6858 assert(UseAVX >= 2, "required"); 6859 6860 int opcode = this->ideal_Opcode(); 6861 int vlen_enc = vector_length_encoding(this); 6862 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6863 %} 6864 ins_pipe( pipe_slow ); 6865 %} 6866 6867 //Long variable shift 6868 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 6869 predicate(n->as_ShiftV()->is_var_shift()); 6870 match(Set dst ( LShiftVL src shift)); 6871 match(Set dst (URShiftVL src shift)); 6872 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 6873 ins_encode %{ 6874 assert(UseAVX >= 2, "required"); 6875 6876 int opcode = this->ideal_Opcode(); 6877 int vlen_enc = vector_length_encoding(this); 6878 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6879 %} 6880 ins_pipe( pipe_slow ); 6881 %} 6882 6883 //Long variable right shift arithmetic 6884 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 6885 predicate(Matcher::vector_length(n) <= 4 && 6886 n->as_ShiftV()->is_var_shift() && 6887 UseAVX == 2); 6888 match(Set dst (RShiftVL src shift)); 6889 effect(TEMP dst, TEMP vtmp); 6890 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 6891 ins_encode %{ 6892 int opcode = this->ideal_Opcode(); 6893 int vlen_enc = vector_length_encoding(this); 6894 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 6895 $vtmp$$XMMRegister); 6896 %} 6897 ins_pipe( pipe_slow ); 6898 %} 6899 6900 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 6901 predicate(n->as_ShiftV()->is_var_shift() && 6902 UseAVX > 2); 6903 match(Set dst (RShiftVL src shift)); 6904 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 6905 ins_encode %{ 6906 int opcode = this->ideal_Opcode(); 6907 int vlen_enc = vector_length_encoding(this); 6908 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6909 %} 6910 ins_pipe( pipe_slow ); 6911 %} 6912 6913 // --------------------------------- AND -------------------------------------- 6914 6915 instruct vand(vec dst, vec src) %{ 6916 predicate(UseAVX == 0); 6917 match(Set dst (AndV dst src)); 6918 format %{ "pand $dst,$src\t! and vectors" %} 6919 ins_encode %{ 6920 __ pand($dst$$XMMRegister, $src$$XMMRegister); 6921 %} 6922 ins_pipe( pipe_slow ); 6923 %} 6924 6925 instruct vand_reg(vec dst, vec src1, vec src2) %{ 6926 predicate(UseAVX > 0); 6927 match(Set dst (AndV src1 src2)); 6928 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 6929 ins_encode %{ 6930 int vlen_enc = vector_length_encoding(this); 6931 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6932 %} 6933 ins_pipe( pipe_slow ); 6934 %} 6935 6936 instruct vand_mem(vec dst, vec src, memory mem) %{ 6937 predicate((UseAVX > 0) && 6938 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6939 match(Set dst (AndV src (LoadVector mem))); 6940 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 6941 ins_encode %{ 6942 int vlen_enc = vector_length_encoding(this); 6943 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6944 %} 6945 ins_pipe( pipe_slow ); 6946 %} 6947 6948 // --------------------------------- OR --------------------------------------- 6949 6950 instruct vor(vec dst, vec src) %{ 6951 predicate(UseAVX == 0); 6952 match(Set dst (OrV dst src)); 6953 format %{ "por $dst,$src\t! or vectors" %} 6954 ins_encode %{ 6955 __ por($dst$$XMMRegister, $src$$XMMRegister); 6956 %} 6957 ins_pipe( pipe_slow ); 6958 %} 6959 6960 instruct vor_reg(vec dst, vec src1, vec src2) %{ 6961 predicate(UseAVX > 0); 6962 match(Set dst (OrV src1 src2)); 6963 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 6964 ins_encode %{ 6965 int vlen_enc = vector_length_encoding(this); 6966 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6967 %} 6968 ins_pipe( pipe_slow ); 6969 %} 6970 6971 instruct vor_mem(vec dst, vec src, memory mem) %{ 6972 predicate((UseAVX > 0) && 6973 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6974 match(Set dst (OrV src (LoadVector mem))); 6975 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 6976 ins_encode %{ 6977 int vlen_enc = vector_length_encoding(this); 6978 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6979 %} 6980 ins_pipe( pipe_slow ); 6981 %} 6982 6983 // --------------------------------- XOR -------------------------------------- 6984 6985 instruct vxor(vec dst, vec src) %{ 6986 predicate(UseAVX == 0); 6987 match(Set dst (XorV dst src)); 6988 format %{ "pxor $dst,$src\t! xor vectors" %} 6989 ins_encode %{ 6990 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 6991 %} 6992 ins_pipe( pipe_slow ); 6993 %} 6994 6995 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 6996 predicate(UseAVX > 0); 6997 match(Set dst (XorV src1 src2)); 6998 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 6999 ins_encode %{ 7000 int vlen_enc = vector_length_encoding(this); 7001 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7002 %} 7003 ins_pipe( pipe_slow ); 7004 %} 7005 7006 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7007 predicate((UseAVX > 0) && 7008 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7009 match(Set dst (XorV src (LoadVector mem))); 7010 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7011 ins_encode %{ 7012 int vlen_enc = vector_length_encoding(this); 7013 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7014 %} 7015 ins_pipe( pipe_slow ); 7016 %} 7017 7018 // --------------------------------- VectorCast -------------------------------------- 7019 7020 instruct vcastBtoX(vec dst, vec src) %{ 7021 match(Set dst (VectorCastB2X src)); 7022 format %{ "vector_cast_b2x $dst,$src\t!" %} 7023 ins_encode %{ 7024 assert(UseAVX > 0, "required"); 7025 7026 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7027 int vlen_enc = vector_length_encoding(this); 7028 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7029 %} 7030 ins_pipe( pipe_slow ); 7031 %} 7032 7033 instruct castStoX(vec dst, vec src) %{ 7034 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7035 Matcher::vector_length(n->in(1)) <= 8 && // src 7036 Matcher::vector_element_basic_type(n) == T_BYTE); 7037 match(Set dst (VectorCastS2X src)); 7038 format %{ "vector_cast_s2x $dst,$src" %} 7039 ins_encode %{ 7040 assert(UseAVX > 0, "required"); 7041 7042 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7043 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7044 %} 7045 ins_pipe( pipe_slow ); 7046 %} 7047 7048 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7049 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7050 Matcher::vector_length(n->in(1)) == 16 && // src 7051 Matcher::vector_element_basic_type(n) == T_BYTE); 7052 effect(TEMP dst, TEMP vtmp); 7053 match(Set dst (VectorCastS2X src)); 7054 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7055 ins_encode %{ 7056 assert(UseAVX > 0, "required"); 7057 7058 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7059 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7060 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7061 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7062 %} 7063 ins_pipe( pipe_slow ); 7064 %} 7065 7066 instruct vcastStoX_evex(vec dst, vec src) %{ 7067 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7068 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7069 match(Set dst (VectorCastS2X src)); 7070 format %{ "vector_cast_s2x $dst,$src\t!" %} 7071 ins_encode %{ 7072 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7073 int src_vlen_enc = vector_length_encoding(this, $src); 7074 int vlen_enc = vector_length_encoding(this); 7075 switch (to_elem_bt) { 7076 case T_BYTE: 7077 if (!VM_Version::supports_avx512vl()) { 7078 vlen_enc = Assembler::AVX_512bit; 7079 } 7080 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7081 break; 7082 case T_INT: 7083 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7084 break; 7085 case T_FLOAT: 7086 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7087 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7088 break; 7089 case T_LONG: 7090 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7091 break; 7092 case T_DOUBLE: { 7093 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7094 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7095 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7096 break; 7097 } 7098 default: 7099 ShouldNotReachHere(); 7100 } 7101 %} 7102 ins_pipe( pipe_slow ); 7103 %} 7104 7105 instruct castItoX(vec dst, vec src) %{ 7106 predicate(UseAVX <= 2 && 7107 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7108 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7109 match(Set dst (VectorCastI2X src)); 7110 format %{ "vector_cast_i2x $dst,$src" %} 7111 ins_encode %{ 7112 assert(UseAVX > 0, "required"); 7113 7114 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7115 int vlen_enc = vector_length_encoding(this, $src); 7116 7117 if (to_elem_bt == T_BYTE) { 7118 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7119 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7120 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7121 } else { 7122 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7123 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7124 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7125 } 7126 %} 7127 ins_pipe( pipe_slow ); 7128 %} 7129 7130 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7131 predicate(UseAVX <= 2 && 7132 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7133 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7134 match(Set dst (VectorCastI2X src)); 7135 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7136 effect(TEMP dst, TEMP vtmp); 7137 ins_encode %{ 7138 assert(UseAVX > 0, "required"); 7139 7140 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7141 int vlen_enc = vector_length_encoding(this, $src); 7142 7143 if (to_elem_bt == T_BYTE) { 7144 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7145 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7146 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7147 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7148 } else { 7149 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7150 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7151 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7152 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7153 } 7154 %} 7155 ins_pipe( pipe_slow ); 7156 %} 7157 7158 instruct vcastItoX_evex(vec dst, vec src) %{ 7159 predicate(UseAVX > 2 || 7160 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7161 match(Set dst (VectorCastI2X src)); 7162 format %{ "vector_cast_i2x $dst,$src\t!" %} 7163 ins_encode %{ 7164 assert(UseAVX > 0, "required"); 7165 7166 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7167 int src_vlen_enc = vector_length_encoding(this, $src); 7168 int dst_vlen_enc = vector_length_encoding(this); 7169 switch (dst_elem_bt) { 7170 case T_BYTE: 7171 if (!VM_Version::supports_avx512vl()) { 7172 src_vlen_enc = Assembler::AVX_512bit; 7173 } 7174 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7175 break; 7176 case T_SHORT: 7177 if (!VM_Version::supports_avx512vl()) { 7178 src_vlen_enc = Assembler::AVX_512bit; 7179 } 7180 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7181 break; 7182 case T_FLOAT: 7183 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7184 break; 7185 case T_LONG: 7186 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7187 break; 7188 case T_DOUBLE: 7189 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7190 break; 7191 default: 7192 ShouldNotReachHere(); 7193 } 7194 %} 7195 ins_pipe( pipe_slow ); 7196 %} 7197 7198 instruct vcastLtoBS(vec dst, vec src) %{ 7199 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7200 UseAVX <= 2); 7201 match(Set dst (VectorCastL2X src)); 7202 format %{ "vector_cast_l2x $dst,$src" %} 7203 ins_encode %{ 7204 assert(UseAVX > 0, "required"); 7205 7206 int vlen = Matcher::vector_length_in_bytes(this, $src); 7207 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7208 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7209 : ExternalAddress(vector_int_to_short_mask()); 7210 if (vlen <= 16) { 7211 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7212 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7213 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7214 } else { 7215 assert(vlen <= 32, "required"); 7216 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7217 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7218 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7219 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7220 } 7221 if (to_elem_bt == T_BYTE) { 7222 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7223 } 7224 %} 7225 ins_pipe( pipe_slow ); 7226 %} 7227 7228 instruct vcastLtoX_evex(vec dst, vec src) %{ 7229 predicate(UseAVX > 2 || 7230 (Matcher::vector_element_basic_type(n) == T_INT || 7231 Matcher::vector_element_basic_type(n) == T_FLOAT || 7232 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7233 match(Set dst (VectorCastL2X src)); 7234 format %{ "vector_cast_l2x $dst,$src\t!" %} 7235 ins_encode %{ 7236 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7237 int vlen = Matcher::vector_length_in_bytes(this, $src); 7238 int vlen_enc = vector_length_encoding(this, $src); 7239 switch (to_elem_bt) { 7240 case T_BYTE: 7241 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7242 vlen_enc = Assembler::AVX_512bit; 7243 } 7244 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7245 break; 7246 case T_SHORT: 7247 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7248 vlen_enc = Assembler::AVX_512bit; 7249 } 7250 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7251 break; 7252 case T_INT: 7253 if (vlen == 8) { 7254 if ($dst$$XMMRegister != $src$$XMMRegister) { 7255 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7256 } 7257 } else if (vlen == 16) { 7258 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7259 } else if (vlen == 32) { 7260 if (UseAVX > 2) { 7261 if (!VM_Version::supports_avx512vl()) { 7262 vlen_enc = Assembler::AVX_512bit; 7263 } 7264 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7265 } else { 7266 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7267 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7268 } 7269 } else { // vlen == 64 7270 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7271 } 7272 break; 7273 case T_FLOAT: 7274 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7275 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7276 break; 7277 case T_DOUBLE: 7278 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7279 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7280 break; 7281 7282 default: assert(false, "%s", type2name(to_elem_bt)); 7283 } 7284 %} 7285 ins_pipe( pipe_slow ); 7286 %} 7287 7288 instruct vcastFtoD_reg(vec dst, vec src) %{ 7289 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7290 match(Set dst (VectorCastF2X src)); 7291 format %{ "vector_cast_f2d $dst,$src\t!" %} 7292 ins_encode %{ 7293 int vlen_enc = vector_length_encoding(this); 7294 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7295 %} 7296 ins_pipe( pipe_slow ); 7297 %} 7298 7299 7300 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7301 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7302 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7303 match(Set dst (VectorCastF2X src)); 7304 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7305 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7306 ins_encode %{ 7307 int vlen_enc = vector_length_encoding(this, $src); 7308 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7309 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7310 // 32 bit addresses for register indirect addressing mode since stub constants 7311 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7312 // However, targets are free to increase this limit, but having a large code cache size 7313 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7314 // cap we save a temporary register allocation which in limiting case can prevent 7315 // spilling in high register pressure blocks. 7316 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7317 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7318 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7319 %} 7320 ins_pipe( pipe_slow ); 7321 %} 7322 7323 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7324 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7325 is_integral_type(Matcher::vector_element_basic_type(n))); 7326 match(Set dst (VectorCastF2X src)); 7327 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7328 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7329 ins_encode %{ 7330 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7331 if (to_elem_bt == T_LONG) { 7332 int vlen_enc = vector_length_encoding(this); 7333 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7334 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7335 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7336 } else { 7337 int vlen_enc = vector_length_encoding(this, $src); 7338 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7339 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7340 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7341 } 7342 %} 7343 ins_pipe( pipe_slow ); 7344 %} 7345 7346 instruct vcastDtoF_reg(vec dst, vec src) %{ 7347 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7348 match(Set dst (VectorCastD2X src)); 7349 format %{ "vector_cast_d2x $dst,$src\t!" %} 7350 ins_encode %{ 7351 int vlen_enc = vector_length_encoding(this, $src); 7352 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7353 %} 7354 ins_pipe( pipe_slow ); 7355 %} 7356 7357 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7358 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7359 is_integral_type(Matcher::vector_element_basic_type(n))); 7360 match(Set dst (VectorCastD2X src)); 7361 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7362 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7363 ins_encode %{ 7364 int vlen_enc = vector_length_encoding(this, $src); 7365 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7366 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7367 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7368 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7369 %} 7370 ins_pipe( pipe_slow ); 7371 %} 7372 7373 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7374 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7375 is_integral_type(Matcher::vector_element_basic_type(n))); 7376 match(Set dst (VectorCastD2X src)); 7377 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7378 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7379 ins_encode %{ 7380 int vlen_enc = vector_length_encoding(this, $src); 7381 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7382 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7383 ExternalAddress(vector_float_signflip()); 7384 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7385 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7386 %} 7387 ins_pipe( pipe_slow ); 7388 %} 7389 7390 instruct vucast(vec dst, vec src) %{ 7391 match(Set dst (VectorUCastB2X src)); 7392 match(Set dst (VectorUCastS2X src)); 7393 match(Set dst (VectorUCastI2X src)); 7394 format %{ "vector_ucast $dst,$src\t!" %} 7395 ins_encode %{ 7396 assert(UseAVX > 0, "required"); 7397 7398 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7399 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7400 int vlen_enc = vector_length_encoding(this); 7401 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7402 %} 7403 ins_pipe( pipe_slow ); 7404 %} 7405 7406 #ifdef _LP64 7407 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7408 predicate(!VM_Version::supports_avx512vl() && 7409 Matcher::vector_length_in_bytes(n) < 64 && 7410 Matcher::vector_element_basic_type(n) == T_INT); 7411 match(Set dst (RoundVF src)); 7412 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7413 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7414 ins_encode %{ 7415 int vlen_enc = vector_length_encoding(this); 7416 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7417 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7418 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7419 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7420 %} 7421 ins_pipe( pipe_slow ); 7422 %} 7423 7424 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7425 predicate((VM_Version::supports_avx512vl() || 7426 Matcher::vector_length_in_bytes(n) == 64) && 7427 Matcher::vector_element_basic_type(n) == T_INT); 7428 match(Set dst (RoundVF src)); 7429 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7430 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7431 ins_encode %{ 7432 int vlen_enc = vector_length_encoding(this); 7433 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7434 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7435 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7436 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7437 %} 7438 ins_pipe( pipe_slow ); 7439 %} 7440 7441 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7442 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7443 match(Set dst (RoundVD src)); 7444 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7445 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7446 ins_encode %{ 7447 int vlen_enc = vector_length_encoding(this); 7448 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7449 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 7450 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 7451 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7452 %} 7453 ins_pipe( pipe_slow ); 7454 %} 7455 7456 #endif // _LP64 7457 7458 // --------------------------------- VectorMaskCmp -------------------------------------- 7459 7460 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7461 predicate(n->bottom_type()->isa_vectmask() == NULL && 7462 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7463 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7464 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7465 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7466 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7467 ins_encode %{ 7468 int vlen_enc = vector_length_encoding(this, $src1); 7469 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7470 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7471 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7472 } else { 7473 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7474 } 7475 %} 7476 ins_pipe( pipe_slow ); 7477 %} 7478 7479 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7480 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7481 n->bottom_type()->isa_vectmask() == NULL && 7482 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7483 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7484 effect(TEMP ktmp); 7485 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7486 ins_encode %{ 7487 int vlen_enc = Assembler::AVX_512bit; 7488 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7489 KRegister mask = k0; // The comparison itself is not being masked. 7490 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7491 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7492 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7493 } else { 7494 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7495 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7496 } 7497 %} 7498 ins_pipe( pipe_slow ); 7499 %} 7500 7501 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7502 predicate(n->bottom_type()->isa_vectmask() && 7503 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7504 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7505 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7506 ins_encode %{ 7507 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7508 int vlen_enc = vector_length_encoding(this, $src1); 7509 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7510 KRegister mask = k0; // The comparison itself is not being masked. 7511 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7512 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7513 } else { 7514 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7515 } 7516 %} 7517 ins_pipe( pipe_slow ); 7518 %} 7519 7520 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7521 predicate(n->bottom_type()->isa_vectmask() == NULL && 7522 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7523 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7524 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7525 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7526 (n->in(2)->get_int() == BoolTest::eq || 7527 n->in(2)->get_int() == BoolTest::lt || 7528 n->in(2)->get_int() == BoolTest::gt)); // cond 7529 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7530 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7531 ins_encode %{ 7532 int vlen_enc = vector_length_encoding(this, $src1); 7533 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7534 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7535 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 7536 %} 7537 ins_pipe( pipe_slow ); 7538 %} 7539 7540 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7541 predicate(n->bottom_type()->isa_vectmask() == NULL && 7542 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7543 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7544 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7545 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7546 (n->in(2)->get_int() == BoolTest::ne || 7547 n->in(2)->get_int() == BoolTest::le || 7548 n->in(2)->get_int() == BoolTest::ge)); // cond 7549 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7550 effect(TEMP dst, TEMP xtmp); 7551 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7552 ins_encode %{ 7553 int vlen_enc = vector_length_encoding(this, $src1); 7554 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7555 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7556 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7557 %} 7558 ins_pipe( pipe_slow ); 7559 %} 7560 7561 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7562 predicate(n->bottom_type()->isa_vectmask() == NULL && 7563 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7564 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7565 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7566 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7567 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7568 effect(TEMP dst, TEMP xtmp); 7569 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7570 ins_encode %{ 7571 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 7572 int vlen_enc = vector_length_encoding(this, $src1); 7573 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7574 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7575 7576 if (vlen_enc == Assembler::AVX_128bit) { 7577 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7578 } else { 7579 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7580 } 7581 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7582 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7583 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7584 %} 7585 ins_pipe( pipe_slow ); 7586 %} 7587 7588 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7589 predicate((n->bottom_type()->isa_vectmask() == NULL && 7590 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 7591 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7592 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7593 effect(TEMP ktmp); 7594 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7595 ins_encode %{ 7596 assert(UseAVX > 2, "required"); 7597 7598 int vlen_enc = vector_length_encoding(this, $src1); 7599 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7600 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7601 KRegister mask = k0; // The comparison itself is not being masked. 7602 bool merge = false; 7603 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7604 7605 switch (src1_elem_bt) { 7606 case T_INT: { 7607 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7608 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7609 break; 7610 } 7611 case T_LONG: { 7612 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7613 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7614 break; 7615 } 7616 default: assert(false, "%s", type2name(src1_elem_bt)); 7617 } 7618 %} 7619 ins_pipe( pipe_slow ); 7620 %} 7621 7622 7623 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 7624 predicate(n->bottom_type()->isa_vectmask() && 7625 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7626 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7627 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 7628 ins_encode %{ 7629 assert(UseAVX > 2, "required"); 7630 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7631 7632 int vlen_enc = vector_length_encoding(this, $src1); 7633 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7634 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7635 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7636 7637 // Comparison i 7638 switch (src1_elem_bt) { 7639 case T_BYTE: { 7640 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7641 break; 7642 } 7643 case T_SHORT: { 7644 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7645 break; 7646 } 7647 case T_INT: { 7648 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7649 break; 7650 } 7651 case T_LONG: { 7652 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7653 break; 7654 } 7655 default: assert(false, "%s", type2name(src1_elem_bt)); 7656 } 7657 %} 7658 ins_pipe( pipe_slow ); 7659 %} 7660 7661 // Extract 7662 7663 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 7664 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 7665 match(Set dst (ExtractI src idx)); 7666 match(Set dst (ExtractS src idx)); 7667 #ifdef _LP64 7668 match(Set dst (ExtractB src idx)); 7669 #endif 7670 format %{ "extractI $dst,$src,$idx\t!" %} 7671 ins_encode %{ 7672 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7673 7674 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 7675 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7676 %} 7677 ins_pipe( pipe_slow ); 7678 %} 7679 7680 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 7681 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 7682 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 7683 match(Set dst (ExtractI src idx)); 7684 match(Set dst (ExtractS src idx)); 7685 #ifdef _LP64 7686 match(Set dst (ExtractB src idx)); 7687 #endif 7688 effect(TEMP vtmp); 7689 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 7690 ins_encode %{ 7691 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7692 7693 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 7694 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7695 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 7696 %} 7697 ins_pipe( pipe_slow ); 7698 %} 7699 7700 #ifdef _LP64 7701 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 7702 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 7703 match(Set dst (ExtractL src idx)); 7704 format %{ "extractL $dst,$src,$idx\t!" %} 7705 ins_encode %{ 7706 assert(UseSSE >= 4, "required"); 7707 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7708 7709 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7710 %} 7711 ins_pipe( pipe_slow ); 7712 %} 7713 7714 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 7715 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 7716 Matcher::vector_length(n->in(1)) == 8); // src 7717 match(Set dst (ExtractL src idx)); 7718 effect(TEMP vtmp); 7719 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 7720 ins_encode %{ 7721 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7722 7723 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7724 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 7725 %} 7726 ins_pipe( pipe_slow ); 7727 %} 7728 #endif 7729 7730 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 7731 predicate(Matcher::vector_length(n->in(1)) <= 4); 7732 match(Set dst (ExtractF src idx)); 7733 effect(TEMP dst, TEMP vtmp); 7734 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 7735 ins_encode %{ 7736 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7737 7738 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 7739 %} 7740 ins_pipe( pipe_slow ); 7741 %} 7742 7743 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 7744 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 7745 Matcher::vector_length(n->in(1)/*src*/) == 16); 7746 match(Set dst (ExtractF src idx)); 7747 effect(TEMP vtmp); 7748 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 7749 ins_encode %{ 7750 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7751 7752 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7753 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 7754 %} 7755 ins_pipe( pipe_slow ); 7756 %} 7757 7758 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 7759 predicate(Matcher::vector_length(n->in(1)) == 2); // src 7760 match(Set dst (ExtractD src idx)); 7761 format %{ "extractD $dst,$src,$idx\t!" %} 7762 ins_encode %{ 7763 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7764 7765 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7766 %} 7767 ins_pipe( pipe_slow ); 7768 %} 7769 7770 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 7771 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 7772 Matcher::vector_length(n->in(1)) == 8); // src 7773 match(Set dst (ExtractD src idx)); 7774 effect(TEMP vtmp); 7775 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 7776 ins_encode %{ 7777 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7778 7779 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7780 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 7781 %} 7782 ins_pipe( pipe_slow ); 7783 %} 7784 7785 // --------------------------------- Vector Blend -------------------------------------- 7786 7787 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 7788 predicate(UseAVX == 0); 7789 match(Set dst (VectorBlend (Binary dst src) mask)); 7790 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 7791 effect(TEMP tmp); 7792 ins_encode %{ 7793 assert(UseSSE >= 4, "required"); 7794 7795 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 7796 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 7797 } 7798 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 7799 %} 7800 ins_pipe( pipe_slow ); 7801 %} 7802 7803 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7804 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 7805 n->in(2)->bottom_type()->isa_vectmask() == NULL && 7806 Matcher::vector_length_in_bytes(n) <= 32 && 7807 is_integral_type(Matcher::vector_element_basic_type(n))); 7808 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7809 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7810 ins_encode %{ 7811 int vlen_enc = vector_length_encoding(this); 7812 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7813 %} 7814 ins_pipe( pipe_slow ); 7815 %} 7816 7817 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7818 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 7819 n->in(2)->bottom_type()->isa_vectmask() == NULL && 7820 Matcher::vector_length_in_bytes(n) <= 32 && 7821 !is_integral_type(Matcher::vector_element_basic_type(n))); 7822 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7823 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7824 ins_encode %{ 7825 int vlen_enc = vector_length_encoding(this); 7826 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7827 %} 7828 ins_pipe( pipe_slow ); 7829 %} 7830 7831 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ 7832 predicate(UseAVX > 0 && EnableX86ECoreOpts && 7833 n->in(2)->bottom_type()->isa_vectmask() == NULL && 7834 Matcher::vector_length_in_bytes(n) <= 32); 7835 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7836 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} 7837 effect(TEMP vtmp, TEMP dst); 7838 ins_encode %{ 7839 int vlen_enc = vector_length_encoding(this); 7840 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7841 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7842 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7843 %} 7844 ins_pipe( pipe_slow ); 7845 %} 7846 7847 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 7848 predicate(Matcher::vector_length_in_bytes(n) == 64 && 7849 n->in(2)->bottom_type()->isa_vectmask() == NULL); 7850 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7851 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 7852 effect(TEMP ktmp); 7853 ins_encode %{ 7854 int vlen_enc = Assembler::AVX_512bit; 7855 BasicType elem_bt = Matcher::vector_element_basic_type(this); 7856 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 7857 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7858 %} 7859 ins_pipe( pipe_slow ); 7860 %} 7861 7862 7863 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 7864 predicate(n->in(2)->bottom_type()->isa_vectmask() && 7865 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 7866 VM_Version::supports_avx512bw())); 7867 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7868 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 7869 ins_encode %{ 7870 int vlen_enc = vector_length_encoding(this); 7871 BasicType elem_bt = Matcher::vector_element_basic_type(this); 7872 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7873 %} 7874 ins_pipe( pipe_slow ); 7875 %} 7876 7877 // --------------------------------- ABS -------------------------------------- 7878 // a = |a| 7879 instruct vabsB_reg(vec dst, vec src) %{ 7880 match(Set dst (AbsVB src)); 7881 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 7882 ins_encode %{ 7883 uint vlen = Matcher::vector_length(this); 7884 if (vlen <= 16) { 7885 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 7886 } else { 7887 int vlen_enc = vector_length_encoding(this); 7888 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7889 } 7890 %} 7891 ins_pipe( pipe_slow ); 7892 %} 7893 7894 instruct vabsS_reg(vec dst, vec src) %{ 7895 match(Set dst (AbsVS src)); 7896 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 7897 ins_encode %{ 7898 uint vlen = Matcher::vector_length(this); 7899 if (vlen <= 8) { 7900 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 7901 } else { 7902 int vlen_enc = vector_length_encoding(this); 7903 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7904 } 7905 %} 7906 ins_pipe( pipe_slow ); 7907 %} 7908 7909 instruct vabsI_reg(vec dst, vec src) %{ 7910 match(Set dst (AbsVI src)); 7911 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 7912 ins_encode %{ 7913 uint vlen = Matcher::vector_length(this); 7914 if (vlen <= 4) { 7915 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 7916 } else { 7917 int vlen_enc = vector_length_encoding(this); 7918 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7919 } 7920 %} 7921 ins_pipe( pipe_slow ); 7922 %} 7923 7924 instruct vabsL_reg(vec dst, vec src) %{ 7925 match(Set dst (AbsVL src)); 7926 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 7927 ins_encode %{ 7928 assert(UseAVX > 2, "required"); 7929 int vlen_enc = vector_length_encoding(this); 7930 if (!VM_Version::supports_avx512vl()) { 7931 vlen_enc = Assembler::AVX_512bit; 7932 } 7933 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7934 %} 7935 ins_pipe( pipe_slow ); 7936 %} 7937 7938 // --------------------------------- ABSNEG -------------------------------------- 7939 7940 instruct vabsnegF(vec dst, vec src) %{ 7941 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 7942 match(Set dst (AbsVF src)); 7943 match(Set dst (NegVF src)); 7944 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 7945 ins_cost(150); 7946 ins_encode %{ 7947 int opcode = this->ideal_Opcode(); 7948 int vlen = Matcher::vector_length(this); 7949 if (vlen == 2) { 7950 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 7951 } else { 7952 assert(vlen == 8 || vlen == 16, "required"); 7953 int vlen_enc = vector_length_encoding(this); 7954 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7955 } 7956 %} 7957 ins_pipe( pipe_slow ); 7958 %} 7959 7960 instruct vabsneg4F(vec dst) %{ 7961 predicate(Matcher::vector_length(n) == 4); 7962 match(Set dst (AbsVF dst)); 7963 match(Set dst (NegVF dst)); 7964 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 7965 ins_cost(150); 7966 ins_encode %{ 7967 int opcode = this->ideal_Opcode(); 7968 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 7969 %} 7970 ins_pipe( pipe_slow ); 7971 %} 7972 7973 instruct vabsnegD(vec dst, vec src) %{ 7974 match(Set dst (AbsVD src)); 7975 match(Set dst (NegVD src)); 7976 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 7977 ins_encode %{ 7978 int opcode = this->ideal_Opcode(); 7979 uint vlen = Matcher::vector_length(this); 7980 if (vlen == 2) { 7981 assert(UseSSE >= 2, "required"); 7982 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 7983 } else { 7984 int vlen_enc = vector_length_encoding(this); 7985 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7986 } 7987 %} 7988 ins_pipe( pipe_slow ); 7989 %} 7990 7991 //------------------------------------- VectorTest -------------------------------------------- 7992 7993 #ifdef _LP64 7994 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 7995 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 7996 match(Set cr (VectorTest src1 src2)); 7997 effect(TEMP vtmp); 7998 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 7999 ins_encode %{ 8000 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8001 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8002 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8003 %} 8004 ins_pipe( pipe_slow ); 8005 %} 8006 8007 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8008 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8009 match(Set cr (VectorTest src1 src2)); 8010 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8011 ins_encode %{ 8012 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8013 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8014 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8015 %} 8016 ins_pipe( pipe_slow ); 8017 %} 8018 8019 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8020 predicate((Matcher::vector_length(n->in(1)) < 8 || 8021 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8022 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8023 match(Set cr (VectorTest src1 src2)); 8024 effect(TEMP tmp); 8025 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8026 ins_encode %{ 8027 uint masklen = Matcher::vector_length(this, $src1); 8028 __ kmovwl($tmp$$Register, $src1$$KRegister); 8029 __ andl($tmp$$Register, (1 << masklen) - 1); 8030 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8031 %} 8032 ins_pipe( pipe_slow ); 8033 %} 8034 8035 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8036 predicate((Matcher::vector_length(n->in(1)) < 8 || 8037 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8038 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8039 match(Set cr (VectorTest src1 src2)); 8040 effect(TEMP tmp); 8041 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8042 ins_encode %{ 8043 uint masklen = Matcher::vector_length(this, $src1); 8044 __ kmovwl($tmp$$Register, $src1$$KRegister); 8045 __ andl($tmp$$Register, (1 << masklen) - 1); 8046 %} 8047 ins_pipe( pipe_slow ); 8048 %} 8049 8050 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8051 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8052 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8053 match(Set cr (VectorTest src1 src2)); 8054 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8055 ins_encode %{ 8056 uint masklen = Matcher::vector_length(this, $src1); 8057 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8058 %} 8059 ins_pipe( pipe_slow ); 8060 %} 8061 #endif 8062 8063 //------------------------------------- LoadMask -------------------------------------------- 8064 8065 instruct loadMask(legVec dst, legVec src) %{ 8066 predicate(n->bottom_type()->isa_vectmask() == NULL && !VM_Version::supports_avx512vlbw()); 8067 match(Set dst (VectorLoadMask src)); 8068 effect(TEMP dst); 8069 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8070 ins_encode %{ 8071 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8072 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8073 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8074 %} 8075 ins_pipe( pipe_slow ); 8076 %} 8077 8078 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8079 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8080 match(Set dst (VectorLoadMask src)); 8081 effect(TEMP xtmp); 8082 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8083 ins_encode %{ 8084 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8085 true, Assembler::AVX_512bit); 8086 %} 8087 ins_pipe( pipe_slow ); 8088 %} 8089 8090 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8091 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8092 match(Set dst (VectorLoadMask src)); 8093 effect(TEMP xtmp); 8094 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8095 ins_encode %{ 8096 int vlen_enc = vector_length_encoding(in(1)); 8097 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8098 false, vlen_enc); 8099 %} 8100 ins_pipe( pipe_slow ); 8101 %} 8102 8103 //------------------------------------- StoreMask -------------------------------------------- 8104 8105 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8106 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8107 match(Set dst (VectorStoreMask src size)); 8108 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8109 ins_encode %{ 8110 int vlen = Matcher::vector_length(this); 8111 if (vlen <= 16 && UseAVX <= 2) { 8112 assert(UseSSE >= 3, "required"); 8113 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8114 } else { 8115 assert(UseAVX > 0, "required"); 8116 int src_vlen_enc = vector_length_encoding(this, $src); 8117 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8118 } 8119 %} 8120 ins_pipe( pipe_slow ); 8121 %} 8122 8123 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8124 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8125 match(Set dst (VectorStoreMask src size)); 8126 effect(TEMP_DEF dst, TEMP xtmp); 8127 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8128 ins_encode %{ 8129 int vlen_enc = Assembler::AVX_128bit; 8130 int vlen = Matcher::vector_length(this); 8131 if (vlen <= 8) { 8132 assert(UseSSE >= 3, "required"); 8133 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8134 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8135 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8136 } else { 8137 assert(UseAVX > 0, "required"); 8138 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8139 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8140 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8141 } 8142 %} 8143 ins_pipe( pipe_slow ); 8144 %} 8145 8146 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8147 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8148 match(Set dst (VectorStoreMask src size)); 8149 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8150 effect(TEMP_DEF dst, TEMP xtmp); 8151 ins_encode %{ 8152 int vlen_enc = Assembler::AVX_128bit; 8153 int vlen = Matcher::vector_length(this); 8154 if (vlen <= 4) { 8155 assert(UseSSE >= 3, "required"); 8156 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8157 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8158 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8159 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8160 } else { 8161 assert(UseAVX > 0, "required"); 8162 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8163 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8164 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8165 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8166 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8167 } 8168 %} 8169 ins_pipe( pipe_slow ); 8170 %} 8171 8172 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8173 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8174 match(Set dst (VectorStoreMask src size)); 8175 effect(TEMP_DEF dst, TEMP xtmp); 8176 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8177 ins_encode %{ 8178 assert(UseSSE >= 3, "required"); 8179 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8180 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8181 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8182 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8183 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8184 %} 8185 ins_pipe( pipe_slow ); 8186 %} 8187 8188 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8189 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8190 match(Set dst (VectorStoreMask src size)); 8191 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8192 effect(TEMP_DEF dst, TEMP vtmp); 8193 ins_encode %{ 8194 int vlen_enc = Assembler::AVX_128bit; 8195 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8196 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8197 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8198 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8199 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8200 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8201 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8202 %} 8203 ins_pipe( pipe_slow ); 8204 %} 8205 8206 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8207 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8208 match(Set dst (VectorStoreMask src size)); 8209 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8210 ins_encode %{ 8211 int src_vlen_enc = vector_length_encoding(this, $src); 8212 int dst_vlen_enc = vector_length_encoding(this); 8213 if (!VM_Version::supports_avx512vl()) { 8214 src_vlen_enc = Assembler::AVX_512bit; 8215 } 8216 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8217 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8218 %} 8219 ins_pipe( pipe_slow ); 8220 %} 8221 8222 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8223 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8224 match(Set dst (VectorStoreMask src size)); 8225 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8226 ins_encode %{ 8227 int src_vlen_enc = vector_length_encoding(this, $src); 8228 int dst_vlen_enc = vector_length_encoding(this); 8229 if (!VM_Version::supports_avx512vl()) { 8230 src_vlen_enc = Assembler::AVX_512bit; 8231 } 8232 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8233 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8234 %} 8235 ins_pipe( pipe_slow ); 8236 %} 8237 8238 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8239 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8240 match(Set dst (VectorStoreMask mask size)); 8241 effect(TEMP_DEF dst); 8242 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8243 ins_encode %{ 8244 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8245 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8246 false, Assembler::AVX_512bit, noreg); 8247 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8248 %} 8249 ins_pipe( pipe_slow ); 8250 %} 8251 8252 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8253 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8254 match(Set dst (VectorStoreMask mask size)); 8255 effect(TEMP_DEF dst); 8256 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8257 ins_encode %{ 8258 int dst_vlen_enc = vector_length_encoding(this); 8259 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8260 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8261 %} 8262 ins_pipe( pipe_slow ); 8263 %} 8264 8265 instruct vmaskcast_evex(kReg dst) %{ 8266 match(Set dst (VectorMaskCast dst)); 8267 ins_cost(0); 8268 format %{ "vector_mask_cast $dst" %} 8269 ins_encode %{ 8270 // empty 8271 %} 8272 ins_pipe(empty); 8273 %} 8274 8275 instruct vmaskcast(vec dst) %{ 8276 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8277 match(Set dst (VectorMaskCast dst)); 8278 ins_cost(0); 8279 format %{ "vector_mask_cast $dst" %} 8280 ins_encode %{ 8281 // empty 8282 %} 8283 ins_pipe(empty); 8284 %} 8285 8286 instruct vmaskcast_avx(vec dst, vec src) %{ 8287 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8288 match(Set dst (VectorMaskCast src)); 8289 format %{ "vector_mask_cast $dst, $src" %} 8290 ins_encode %{ 8291 int vlen = Matcher::vector_length(this); 8292 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8293 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8294 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8295 %} 8296 ins_pipe(pipe_slow); 8297 %} 8298 8299 //-------------------------------- Load Iota Indices ---------------------------------- 8300 8301 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8302 match(Set dst (VectorLoadConst src)); 8303 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8304 ins_encode %{ 8305 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8306 BasicType bt = Matcher::vector_element_basic_type(this); 8307 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8308 %} 8309 ins_pipe( pipe_slow ); 8310 %} 8311 8312 #ifdef _LP64 8313 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8314 match(Set dst (PopulateIndex src1 src2)); 8315 effect(TEMP dst, TEMP vtmp); 8316 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8317 ins_encode %{ 8318 assert($src2$$constant == 1, "required"); 8319 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8320 int vlen_enc = vector_length_encoding(this); 8321 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8322 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8323 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8324 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8325 %} 8326 ins_pipe( pipe_slow ); 8327 %} 8328 8329 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8330 match(Set dst (PopulateIndex src1 src2)); 8331 effect(TEMP dst, TEMP vtmp); 8332 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8333 ins_encode %{ 8334 assert($src2$$constant == 1, "required"); 8335 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8336 int vlen_enc = vector_length_encoding(this); 8337 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8338 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8339 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8340 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8341 %} 8342 ins_pipe( pipe_slow ); 8343 %} 8344 #endif 8345 //-------------------------------- Rearrange ---------------------------------- 8346 8347 // LoadShuffle/Rearrange for Byte 8348 8349 instruct loadShuffleB(vec dst) %{ 8350 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 8351 match(Set dst (VectorLoadShuffle dst)); 8352 format %{ "vector_load_shuffle $dst, $dst" %} 8353 ins_encode %{ 8354 // empty 8355 %} 8356 ins_pipe( pipe_slow ); 8357 %} 8358 8359 instruct rearrangeB(vec dst, vec shuffle) %{ 8360 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8361 Matcher::vector_length(n) < 32); 8362 match(Set dst (VectorRearrange dst shuffle)); 8363 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8364 ins_encode %{ 8365 assert(UseSSE >= 4, "required"); 8366 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8367 %} 8368 ins_pipe( pipe_slow ); 8369 %} 8370 8371 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8372 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8373 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8374 match(Set dst (VectorRearrange src shuffle)); 8375 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8376 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8377 ins_encode %{ 8378 assert(UseAVX >= 2, "required"); 8379 // Swap src into vtmp1 8380 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8381 // Shuffle swapped src to get entries from other 128 bit lane 8382 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8383 // Shuffle original src to get entries from self 128 bit lane 8384 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8385 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8386 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8387 // Perform the blend 8388 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8389 %} 8390 ins_pipe( pipe_slow ); 8391 %} 8392 8393 8394 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8395 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8396 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8397 match(Set dst (VectorRearrange src shuffle)); 8398 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8399 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8400 ins_encode %{ 8401 int vlen_enc = vector_length_encoding(this); 8402 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8403 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8404 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8405 %} 8406 ins_pipe( pipe_slow ); 8407 %} 8408 8409 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8410 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8411 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8412 match(Set dst (VectorRearrange src shuffle)); 8413 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8414 ins_encode %{ 8415 int vlen_enc = vector_length_encoding(this); 8416 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8417 %} 8418 ins_pipe( pipe_slow ); 8419 %} 8420 8421 // LoadShuffle/Rearrange for Short 8422 8423 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8424 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8425 Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS 8426 match(Set dst (VectorLoadShuffle src)); 8427 effect(TEMP dst, TEMP vtmp); 8428 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8429 ins_encode %{ 8430 // Create a byte shuffle mask from short shuffle mask 8431 // only byte shuffle instruction available on these platforms 8432 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8433 if (UseAVX == 0) { 8434 assert(vlen_in_bytes <= 16, "required"); 8435 // Multiply each shuffle by two to get byte index 8436 __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); 8437 __ psllw($vtmp$$XMMRegister, 1); 8438 8439 // Duplicate to create 2 copies of byte index 8440 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8441 __ psllw($dst$$XMMRegister, 8); 8442 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8443 8444 // Add one to get alternate byte index 8445 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8446 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8447 } else { 8448 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8449 int vlen_enc = vector_length_encoding(this); 8450 // Multiply each shuffle by two to get byte index 8451 __ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8452 __ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8453 8454 // Duplicate to create 2 copies of byte index 8455 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8456 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8457 8458 // Add one to get alternate byte index 8459 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 8460 } 8461 %} 8462 ins_pipe( pipe_slow ); 8463 %} 8464 8465 instruct rearrangeS(vec dst, vec shuffle) %{ 8466 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8467 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8468 match(Set dst (VectorRearrange dst shuffle)); 8469 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8470 ins_encode %{ 8471 assert(UseSSE >= 4, "required"); 8472 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8473 %} 8474 ins_pipe( pipe_slow ); 8475 %} 8476 8477 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8478 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8479 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8480 match(Set dst (VectorRearrange src shuffle)); 8481 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8482 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8483 ins_encode %{ 8484 assert(UseAVX >= 2, "required"); 8485 // Swap src into vtmp1 8486 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8487 // Shuffle swapped src to get entries from other 128 bit lane 8488 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8489 // Shuffle original src to get entries from self 128 bit lane 8490 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8491 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8492 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8493 // Perform the blend 8494 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8495 %} 8496 ins_pipe( pipe_slow ); 8497 %} 8498 8499 instruct loadShuffleS_evex(vec dst, vec src) %{ 8500 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8501 VM_Version::supports_avx512bw()); 8502 match(Set dst (VectorLoadShuffle src)); 8503 format %{ "vector_load_shuffle $dst, $src" %} 8504 ins_encode %{ 8505 int vlen_enc = vector_length_encoding(this); 8506 if (!VM_Version::supports_avx512vl()) { 8507 vlen_enc = Assembler::AVX_512bit; 8508 } 8509 __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8510 %} 8511 ins_pipe( pipe_slow ); 8512 %} 8513 8514 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8515 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8516 VM_Version::supports_avx512bw()); 8517 match(Set dst (VectorRearrange src shuffle)); 8518 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8519 ins_encode %{ 8520 int vlen_enc = vector_length_encoding(this); 8521 if (!VM_Version::supports_avx512vl()) { 8522 vlen_enc = Assembler::AVX_512bit; 8523 } 8524 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8525 %} 8526 ins_pipe( pipe_slow ); 8527 %} 8528 8529 // LoadShuffle/Rearrange for Integer and Float 8530 8531 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 8532 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8533 Matcher::vector_length(n) == 4 && UseAVX == 0); 8534 match(Set dst (VectorLoadShuffle src)); 8535 effect(TEMP dst, TEMP vtmp); 8536 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8537 ins_encode %{ 8538 assert(UseSSE >= 4, "required"); 8539 8540 // Create a byte shuffle mask from int shuffle mask 8541 // only byte shuffle instruction available on these platforms 8542 8543 // Duplicate and multiply each shuffle by 4 8544 __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); 8545 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8546 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8547 __ psllw($vtmp$$XMMRegister, 2); 8548 8549 // Duplicate again to create 4 copies of byte index 8550 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8551 __ psllw($dst$$XMMRegister, 8); 8552 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 8553 8554 // Add 3,2,1,0 to get alternate byte index 8555 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 8556 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8557 %} 8558 ins_pipe( pipe_slow ); 8559 %} 8560 8561 instruct rearrangeI(vec dst, vec shuffle) %{ 8562 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8563 UseAVX == 0); 8564 match(Set dst (VectorRearrange dst shuffle)); 8565 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8566 ins_encode %{ 8567 assert(UseSSE >= 4, "required"); 8568 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8569 %} 8570 ins_pipe( pipe_slow ); 8571 %} 8572 8573 instruct loadShuffleI_avx(vec dst, vec src) %{ 8574 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8575 UseAVX > 0); 8576 match(Set dst (VectorLoadShuffle src)); 8577 format %{ "vector_load_shuffle $dst, $src" %} 8578 ins_encode %{ 8579 int vlen_enc = vector_length_encoding(this); 8580 __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8581 %} 8582 ins_pipe( pipe_slow ); 8583 %} 8584 8585 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 8586 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8587 UseAVX > 0); 8588 match(Set dst (VectorRearrange src shuffle)); 8589 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8590 ins_encode %{ 8591 int vlen_enc = vector_length_encoding(this); 8592 BasicType bt = Matcher::vector_element_basic_type(this); 8593 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8594 %} 8595 ins_pipe( pipe_slow ); 8596 %} 8597 8598 // LoadShuffle/Rearrange for Long and Double 8599 8600 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 8601 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8602 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8603 match(Set dst (VectorLoadShuffle src)); 8604 effect(TEMP dst, TEMP vtmp); 8605 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8606 ins_encode %{ 8607 assert(UseAVX >= 2, "required"); 8608 8609 int vlen_enc = vector_length_encoding(this); 8610 // Create a double word shuffle mask from long shuffle mask 8611 // only double word shuffle instruction available on these platforms 8612 8613 // Multiply each shuffle by two to get double word index 8614 __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8615 __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8616 8617 // Duplicate each double word shuffle 8618 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 8619 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8620 8621 // Add one to get alternate double word index 8622 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 8623 %} 8624 ins_pipe( pipe_slow ); 8625 %} 8626 8627 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 8628 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8629 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8630 match(Set dst (VectorRearrange src shuffle)); 8631 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8632 ins_encode %{ 8633 assert(UseAVX >= 2, "required"); 8634 8635 int vlen_enc = vector_length_encoding(this); 8636 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8637 %} 8638 ins_pipe( pipe_slow ); 8639 %} 8640 8641 instruct loadShuffleL_evex(vec dst, vec src) %{ 8642 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8643 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8644 match(Set dst (VectorLoadShuffle src)); 8645 format %{ "vector_load_shuffle $dst, $src" %} 8646 ins_encode %{ 8647 assert(UseAVX > 2, "required"); 8648 8649 int vlen_enc = vector_length_encoding(this); 8650 __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8651 %} 8652 ins_pipe( pipe_slow ); 8653 %} 8654 8655 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 8656 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8657 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8658 match(Set dst (VectorRearrange src shuffle)); 8659 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8660 ins_encode %{ 8661 assert(UseAVX > 2, "required"); 8662 8663 int vlen_enc = vector_length_encoding(this); 8664 if (vlen_enc == Assembler::AVX_128bit) { 8665 vlen_enc = Assembler::AVX_256bit; 8666 } 8667 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8668 %} 8669 ins_pipe( pipe_slow ); 8670 %} 8671 8672 // --------------------------------- FMA -------------------------------------- 8673 // a * b + c 8674 8675 instruct vfmaF_reg(vec a, vec b, vec c) %{ 8676 match(Set c (FmaVF c (Binary a b))); 8677 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8678 ins_cost(150); 8679 ins_encode %{ 8680 assert(UseFMA, "not enabled"); 8681 int vlen_enc = vector_length_encoding(this); 8682 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8683 %} 8684 ins_pipe( pipe_slow ); 8685 %} 8686 8687 instruct vfmaF_mem(vec a, memory b, vec c) %{ 8688 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8689 match(Set c (FmaVF c (Binary a (LoadVector b)))); 8690 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8691 ins_cost(150); 8692 ins_encode %{ 8693 assert(UseFMA, "not enabled"); 8694 int vlen_enc = vector_length_encoding(this); 8695 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8696 %} 8697 ins_pipe( pipe_slow ); 8698 %} 8699 8700 instruct vfmaD_reg(vec a, vec b, vec c) %{ 8701 match(Set c (FmaVD c (Binary a b))); 8702 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8703 ins_cost(150); 8704 ins_encode %{ 8705 assert(UseFMA, "not enabled"); 8706 int vlen_enc = vector_length_encoding(this); 8707 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8708 %} 8709 ins_pipe( pipe_slow ); 8710 %} 8711 8712 instruct vfmaD_mem(vec a, memory b, vec c) %{ 8713 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8714 match(Set c (FmaVD c (Binary a (LoadVector b)))); 8715 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8716 ins_cost(150); 8717 ins_encode %{ 8718 assert(UseFMA, "not enabled"); 8719 int vlen_enc = vector_length_encoding(this); 8720 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8721 %} 8722 ins_pipe( pipe_slow ); 8723 %} 8724 8725 // --------------------------------- Vector Multiply Add -------------------------------------- 8726 8727 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 8728 predicate(UseAVX == 0); 8729 match(Set dst (MulAddVS2VI dst src1)); 8730 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 8731 ins_encode %{ 8732 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 8733 %} 8734 ins_pipe( pipe_slow ); 8735 %} 8736 8737 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 8738 predicate(UseAVX > 0); 8739 match(Set dst (MulAddVS2VI src1 src2)); 8740 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 8741 ins_encode %{ 8742 int vlen_enc = vector_length_encoding(this); 8743 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8744 %} 8745 ins_pipe( pipe_slow ); 8746 %} 8747 8748 // --------------------------------- Vector Multiply Add Add ---------------------------------- 8749 8750 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 8751 predicate(VM_Version::supports_avx512_vnni()); 8752 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 8753 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 8754 ins_encode %{ 8755 assert(UseAVX > 2, "required"); 8756 int vlen_enc = vector_length_encoding(this); 8757 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8758 %} 8759 ins_pipe( pipe_slow ); 8760 ins_cost(10); 8761 %} 8762 8763 // --------------------------------- PopCount -------------------------------------- 8764 8765 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 8766 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8767 match(Set dst (PopCountVI src)); 8768 match(Set dst (PopCountVL src)); 8769 format %{ "vector_popcount_integral $dst, $src" %} 8770 ins_encode %{ 8771 int opcode = this->ideal_Opcode(); 8772 int vlen_enc = vector_length_encoding(this, $src); 8773 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8774 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 8775 %} 8776 ins_pipe( pipe_slow ); 8777 %} 8778 8779 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 8780 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8781 match(Set dst (PopCountVI src mask)); 8782 match(Set dst (PopCountVL src mask)); 8783 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 8784 ins_encode %{ 8785 int vlen_enc = vector_length_encoding(this, $src); 8786 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8787 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8788 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 8789 %} 8790 ins_pipe( pipe_slow ); 8791 %} 8792 8793 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 8794 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8795 match(Set dst (PopCountVI src)); 8796 match(Set dst (PopCountVL src)); 8797 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 8798 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 8799 ins_encode %{ 8800 int opcode = this->ideal_Opcode(); 8801 int vlen_enc = vector_length_encoding(this, $src); 8802 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8803 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8804 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 8805 %} 8806 ins_pipe( pipe_slow ); 8807 %} 8808 8809 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 8810 8811 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 8812 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 8813 Matcher::vector_length_in_bytes(n->in(1)))); 8814 match(Set dst (CountTrailingZerosV src)); 8815 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 8816 ins_cost(400); 8817 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 8818 ins_encode %{ 8819 int vlen_enc = vector_length_encoding(this, $src); 8820 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8821 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 8822 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 8823 %} 8824 ins_pipe( pipe_slow ); 8825 %} 8826 8827 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 8828 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 8829 VM_Version::supports_avx512cd() && 8830 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 8831 match(Set dst (CountTrailingZerosV src)); 8832 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 8833 ins_cost(400); 8834 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 8835 ins_encode %{ 8836 int vlen_enc = vector_length_encoding(this, $src); 8837 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8838 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8839 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 8840 %} 8841 ins_pipe( pipe_slow ); 8842 %} 8843 8844 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 8845 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 8846 match(Set dst (CountTrailingZerosV src)); 8847 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 8848 ins_cost(400); 8849 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 8850 ins_encode %{ 8851 int vlen_enc = vector_length_encoding(this, $src); 8852 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8853 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8854 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 8855 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 8856 %} 8857 ins_pipe( pipe_slow ); 8858 %} 8859 8860 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 8861 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 8862 match(Set dst (CountTrailingZerosV src)); 8863 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 8864 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 8865 ins_encode %{ 8866 int vlen_enc = vector_length_encoding(this, $src); 8867 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8868 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8869 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 8870 %} 8871 ins_pipe( pipe_slow ); 8872 %} 8873 8874 8875 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 8876 8877 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 8878 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 8879 effect(TEMP dst); 8880 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8881 ins_encode %{ 8882 int vector_len = vector_length_encoding(this); 8883 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 8884 %} 8885 ins_pipe( pipe_slow ); 8886 %} 8887 8888 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 8889 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 8890 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 8891 effect(TEMP dst); 8892 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8893 ins_encode %{ 8894 int vector_len = vector_length_encoding(this); 8895 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 8896 %} 8897 ins_pipe( pipe_slow ); 8898 %} 8899 8900 // --------------------------------- Rotation Operations ---------------------------------- 8901 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 8902 match(Set dst (RotateLeftV src shift)); 8903 match(Set dst (RotateRightV src shift)); 8904 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 8905 ins_encode %{ 8906 int opcode = this->ideal_Opcode(); 8907 int vector_len = vector_length_encoding(this); 8908 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 8909 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 8910 %} 8911 ins_pipe( pipe_slow ); 8912 %} 8913 8914 instruct vprorate(vec dst, vec src, vec shift) %{ 8915 match(Set dst (RotateLeftV src shift)); 8916 match(Set dst (RotateRightV src shift)); 8917 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 8918 ins_encode %{ 8919 int opcode = this->ideal_Opcode(); 8920 int vector_len = vector_length_encoding(this); 8921 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 8922 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8923 %} 8924 ins_pipe( pipe_slow ); 8925 %} 8926 8927 // ---------------------------------- Masked Operations ------------------------------------ 8928 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 8929 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 8930 match(Set dst (LoadVectorMasked mem mask)); 8931 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 8932 ins_encode %{ 8933 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 8934 int vlen_enc = vector_length_encoding(this); 8935 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 8936 %} 8937 ins_pipe( pipe_slow ); 8938 %} 8939 8940 8941 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 8942 predicate(n->in(3)->bottom_type()->isa_vectmask()); 8943 match(Set dst (LoadVectorMasked mem mask)); 8944 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 8945 ins_encode %{ 8946 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 8947 int vector_len = vector_length_encoding(this); 8948 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 8949 %} 8950 ins_pipe( pipe_slow ); 8951 %} 8952 8953 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 8954 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 8955 match(Set mem (StoreVectorMasked mem (Binary src mask))); 8956 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 8957 ins_encode %{ 8958 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 8959 int vlen_enc = vector_length_encoding(src_node); 8960 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 8961 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8962 %} 8963 ins_pipe( pipe_slow ); 8964 %} 8965 8966 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 8967 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 8968 match(Set mem (StoreVectorMasked mem (Binary src mask))); 8969 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 8970 ins_encode %{ 8971 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 8972 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 8973 int vlen_enc = vector_length_encoding(src_node); 8974 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 8975 %} 8976 ins_pipe( pipe_slow ); 8977 %} 8978 8979 #ifdef _LP64 8980 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 8981 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 8982 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 8983 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 8984 ins_encode %{ 8985 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 8986 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 8987 8988 Label DONE; 8989 int vlen_enc = vector_length_encoding(this, $src1); 8990 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 8991 8992 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 8993 __ mov64($dst$$Register, -1L); 8994 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 8995 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 8996 __ jccb(Assembler::carrySet, DONE); 8997 __ kmovql($dst$$Register, $ktmp1$$KRegister); 8998 __ notq($dst$$Register); 8999 __ tzcntq($dst$$Register, $dst$$Register); 9000 __ bind(DONE); 9001 %} 9002 ins_pipe( pipe_slow ); 9003 %} 9004 9005 9006 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9007 match(Set dst (VectorMaskGen len)); 9008 effect(TEMP temp, KILL cr); 9009 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9010 ins_encode %{ 9011 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9012 %} 9013 ins_pipe( pipe_slow ); 9014 %} 9015 9016 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9017 match(Set dst (VectorMaskGen len)); 9018 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9019 effect(TEMP temp); 9020 ins_encode %{ 9021 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9022 __ kmovql($dst$$KRegister, $temp$$Register); 9023 %} 9024 ins_pipe( pipe_slow ); 9025 %} 9026 9027 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9028 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9029 match(Set dst (VectorMaskToLong mask)); 9030 effect(TEMP dst, KILL cr); 9031 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9032 ins_encode %{ 9033 int opcode = this->ideal_Opcode(); 9034 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9035 int mask_len = Matcher::vector_length(this, $mask); 9036 int mask_size = mask_len * type2aelembytes(mbt); 9037 int vlen_enc = vector_length_encoding(this, $mask); 9038 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9039 $dst$$Register, mask_len, mask_size, vlen_enc); 9040 %} 9041 ins_pipe( pipe_slow ); 9042 %} 9043 9044 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9045 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL); 9046 match(Set dst (VectorMaskToLong mask)); 9047 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9048 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9049 ins_encode %{ 9050 int opcode = this->ideal_Opcode(); 9051 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9052 int mask_len = Matcher::vector_length(this, $mask); 9053 int vlen_enc = vector_length_encoding(this, $mask); 9054 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9055 $dst$$Register, mask_len, mbt, vlen_enc); 9056 %} 9057 ins_pipe( pipe_slow ); 9058 %} 9059 9060 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9061 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL); 9062 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9063 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9064 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9065 ins_encode %{ 9066 int opcode = this->ideal_Opcode(); 9067 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9068 int mask_len = Matcher::vector_length(this, $mask); 9069 int vlen_enc = vector_length_encoding(this, $mask); 9070 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9071 $dst$$Register, mask_len, mbt, vlen_enc); 9072 %} 9073 ins_pipe( pipe_slow ); 9074 %} 9075 9076 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9077 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9078 match(Set dst (VectorMaskTrueCount mask)); 9079 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9080 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9081 ins_encode %{ 9082 int opcode = this->ideal_Opcode(); 9083 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9084 int mask_len = Matcher::vector_length(this, $mask); 9085 int mask_size = mask_len * type2aelembytes(mbt); 9086 int vlen_enc = vector_length_encoding(this, $mask); 9087 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9088 $tmp$$Register, mask_len, mask_size, vlen_enc); 9089 %} 9090 ins_pipe( pipe_slow ); 9091 %} 9092 9093 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9094 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL); 9095 match(Set dst (VectorMaskTrueCount mask)); 9096 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9097 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9098 ins_encode %{ 9099 int opcode = this->ideal_Opcode(); 9100 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9101 int mask_len = Matcher::vector_length(this, $mask); 9102 int vlen_enc = vector_length_encoding(this, $mask); 9103 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9104 $tmp$$Register, mask_len, mbt, vlen_enc); 9105 %} 9106 ins_pipe( pipe_slow ); 9107 %} 9108 9109 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9110 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL); 9111 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9112 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9113 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9114 ins_encode %{ 9115 int opcode = this->ideal_Opcode(); 9116 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9117 int mask_len = Matcher::vector_length(this, $mask); 9118 int vlen_enc = vector_length_encoding(this, $mask); 9119 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9120 $tmp$$Register, mask_len, mbt, vlen_enc); 9121 %} 9122 ins_pipe( pipe_slow ); 9123 %} 9124 9125 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9126 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9127 match(Set dst (VectorMaskFirstTrue mask)); 9128 match(Set dst (VectorMaskLastTrue mask)); 9129 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9130 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9131 ins_encode %{ 9132 int opcode = this->ideal_Opcode(); 9133 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9134 int mask_len = Matcher::vector_length(this, $mask); 9135 int mask_size = mask_len * type2aelembytes(mbt); 9136 int vlen_enc = vector_length_encoding(this, $mask); 9137 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9138 $tmp$$Register, mask_len, mask_size, vlen_enc); 9139 %} 9140 ins_pipe( pipe_slow ); 9141 %} 9142 9143 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9144 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL); 9145 match(Set dst (VectorMaskFirstTrue mask)); 9146 match(Set dst (VectorMaskLastTrue mask)); 9147 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9148 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9149 ins_encode %{ 9150 int opcode = this->ideal_Opcode(); 9151 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9152 int mask_len = Matcher::vector_length(this, $mask); 9153 int vlen_enc = vector_length_encoding(this, $mask); 9154 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9155 $tmp$$Register, mask_len, mbt, vlen_enc); 9156 %} 9157 ins_pipe( pipe_slow ); 9158 %} 9159 9160 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9161 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL); 9162 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9163 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9164 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9165 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9166 ins_encode %{ 9167 int opcode = this->ideal_Opcode(); 9168 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9169 int mask_len = Matcher::vector_length(this, $mask); 9170 int vlen_enc = vector_length_encoding(this, $mask); 9171 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9172 $tmp$$Register, mask_len, mbt, vlen_enc); 9173 %} 9174 ins_pipe( pipe_slow ); 9175 %} 9176 9177 // --------------------------------- Compress/Expand Operations --------------------------- 9178 9179 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9180 match(Set dst (CompressV src mask)); 9181 match(Set dst (ExpandV src mask)); 9182 format %{ "vector_compress_expand $dst, $src, $mask" %} 9183 ins_encode %{ 9184 int opcode = this->ideal_Opcode(); 9185 int vector_len = vector_length_encoding(this); 9186 BasicType bt = Matcher::vector_element_basic_type(this); 9187 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9188 %} 9189 ins_pipe( pipe_slow ); 9190 %} 9191 9192 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9193 match(Set dst (CompressM mask)); 9194 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9195 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9196 ins_encode %{ 9197 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9198 int mask_len = Matcher::vector_length(this); 9199 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9200 %} 9201 ins_pipe( pipe_slow ); 9202 %} 9203 9204 #endif // _LP64 9205 9206 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9207 9208 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9209 predicate(!VM_Version::supports_gfni()); 9210 match(Set dst (ReverseV src)); 9211 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9212 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9213 ins_encode %{ 9214 int vec_enc = vector_length_encoding(this); 9215 BasicType bt = Matcher::vector_element_basic_type(this); 9216 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9217 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9218 %} 9219 ins_pipe( pipe_slow ); 9220 %} 9221 9222 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9223 predicate(VM_Version::supports_gfni()); 9224 match(Set dst (ReverseV src)); 9225 effect(TEMP dst, TEMP xtmp); 9226 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9227 ins_encode %{ 9228 int vec_enc = vector_length_encoding(this); 9229 BasicType bt = Matcher::vector_element_basic_type(this); 9230 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1)); 9231 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9232 $xtmp$$XMMRegister); 9233 %} 9234 ins_pipe( pipe_slow ); 9235 %} 9236 9237 instruct vreverse_byte_reg(vec dst, vec src) %{ 9238 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9239 match(Set dst (ReverseBytesV src)); 9240 effect(TEMP dst); 9241 format %{ "vector_reverse_byte $dst, $src" %} 9242 ins_encode %{ 9243 int vec_enc = vector_length_encoding(this); 9244 BasicType bt = Matcher::vector_element_basic_type(this); 9245 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9246 %} 9247 ins_pipe( pipe_slow ); 9248 %} 9249 9250 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9251 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9252 match(Set dst (ReverseBytesV src)); 9253 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9254 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9255 ins_encode %{ 9256 int vec_enc = vector_length_encoding(this); 9257 BasicType bt = Matcher::vector_element_basic_type(this); 9258 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9259 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9260 %} 9261 ins_pipe( pipe_slow ); 9262 %} 9263 9264 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9265 9266 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9267 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9268 Matcher::vector_length_in_bytes(n->in(1)))); 9269 match(Set dst (CountLeadingZerosV src)); 9270 format %{ "vector_count_leading_zeros $dst, $src" %} 9271 ins_encode %{ 9272 int vlen_enc = vector_length_encoding(this, $src); 9273 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9274 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9275 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9276 %} 9277 ins_pipe( pipe_slow ); 9278 %} 9279 9280 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9281 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9282 Matcher::vector_length_in_bytes(n->in(1)))); 9283 match(Set dst (CountLeadingZerosV src mask)); 9284 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9285 ins_encode %{ 9286 int vlen_enc = vector_length_encoding(this, $src); 9287 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9288 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9289 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9290 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9291 %} 9292 ins_pipe( pipe_slow ); 9293 %} 9294 9295 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9296 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9297 VM_Version::supports_avx512cd() && 9298 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9299 match(Set dst (CountLeadingZerosV src)); 9300 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9301 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9302 ins_encode %{ 9303 int vlen_enc = vector_length_encoding(this, $src); 9304 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9305 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9306 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9307 %} 9308 ins_pipe( pipe_slow ); 9309 %} 9310 9311 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9312 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9313 match(Set dst (CountLeadingZerosV src)); 9314 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9315 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9316 ins_encode %{ 9317 int vlen_enc = vector_length_encoding(this, $src); 9318 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9319 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9320 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9321 $rtmp$$Register, true, vlen_enc); 9322 %} 9323 ins_pipe( pipe_slow ); 9324 %} 9325 9326 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9327 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9328 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9329 match(Set dst (CountLeadingZerosV src)); 9330 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9331 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9332 ins_encode %{ 9333 int vlen_enc = vector_length_encoding(this, $src); 9334 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9335 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9336 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9337 %} 9338 ins_pipe( pipe_slow ); 9339 %} 9340 9341 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9342 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9343 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9344 match(Set dst (CountLeadingZerosV src)); 9345 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9346 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9347 ins_encode %{ 9348 int vlen_enc = vector_length_encoding(this, $src); 9349 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9350 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9351 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9352 %} 9353 ins_pipe( pipe_slow ); 9354 %} 9355 9356 // ---------------------------------- Vector Masked Operations ------------------------------------ 9357 9358 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9359 match(Set dst (AddVB (Binary dst src2) mask)); 9360 match(Set dst (AddVS (Binary dst src2) mask)); 9361 match(Set dst (AddVI (Binary dst src2) mask)); 9362 match(Set dst (AddVL (Binary dst src2) mask)); 9363 match(Set dst (AddVF (Binary dst src2) mask)); 9364 match(Set dst (AddVD (Binary dst src2) mask)); 9365 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9366 ins_encode %{ 9367 int vlen_enc = vector_length_encoding(this); 9368 BasicType bt = Matcher::vector_element_basic_type(this); 9369 int opc = this->ideal_Opcode(); 9370 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9371 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9372 %} 9373 ins_pipe( pipe_slow ); 9374 %} 9375 9376 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9377 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9378 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9379 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9380 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9381 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9382 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9383 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9384 ins_encode %{ 9385 int vlen_enc = vector_length_encoding(this); 9386 BasicType bt = Matcher::vector_element_basic_type(this); 9387 int opc = this->ideal_Opcode(); 9388 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9389 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9390 %} 9391 ins_pipe( pipe_slow ); 9392 %} 9393 9394 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9395 match(Set dst (XorV (Binary dst src2) mask)); 9396 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9397 ins_encode %{ 9398 int vlen_enc = vector_length_encoding(this); 9399 BasicType bt = Matcher::vector_element_basic_type(this); 9400 int opc = this->ideal_Opcode(); 9401 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9402 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9403 %} 9404 ins_pipe( pipe_slow ); 9405 %} 9406 9407 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9408 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9409 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9410 ins_encode %{ 9411 int vlen_enc = vector_length_encoding(this); 9412 BasicType bt = Matcher::vector_element_basic_type(this); 9413 int opc = this->ideal_Opcode(); 9414 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9415 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9416 %} 9417 ins_pipe( pipe_slow ); 9418 %} 9419 9420 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9421 match(Set dst (OrV (Binary dst src2) mask)); 9422 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9423 ins_encode %{ 9424 int vlen_enc = vector_length_encoding(this); 9425 BasicType bt = Matcher::vector_element_basic_type(this); 9426 int opc = this->ideal_Opcode(); 9427 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9428 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9429 %} 9430 ins_pipe( pipe_slow ); 9431 %} 9432 9433 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9434 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9435 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9436 ins_encode %{ 9437 int vlen_enc = vector_length_encoding(this); 9438 BasicType bt = Matcher::vector_element_basic_type(this); 9439 int opc = this->ideal_Opcode(); 9440 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9441 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9442 %} 9443 ins_pipe( pipe_slow ); 9444 %} 9445 9446 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9447 match(Set dst (AndV (Binary dst src2) mask)); 9448 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9449 ins_encode %{ 9450 int vlen_enc = vector_length_encoding(this); 9451 BasicType bt = Matcher::vector_element_basic_type(this); 9452 int opc = this->ideal_Opcode(); 9453 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9454 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9455 %} 9456 ins_pipe( pipe_slow ); 9457 %} 9458 9459 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9460 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9461 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9462 ins_encode %{ 9463 int vlen_enc = vector_length_encoding(this); 9464 BasicType bt = Matcher::vector_element_basic_type(this); 9465 int opc = this->ideal_Opcode(); 9466 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9467 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9468 %} 9469 ins_pipe( pipe_slow ); 9470 %} 9471 9472 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9473 match(Set dst (SubVB (Binary dst src2) mask)); 9474 match(Set dst (SubVS (Binary dst src2) mask)); 9475 match(Set dst (SubVI (Binary dst src2) mask)); 9476 match(Set dst (SubVL (Binary dst src2) mask)); 9477 match(Set dst (SubVF (Binary dst src2) mask)); 9478 match(Set dst (SubVD (Binary dst src2) mask)); 9479 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9480 ins_encode %{ 9481 int vlen_enc = vector_length_encoding(this); 9482 BasicType bt = Matcher::vector_element_basic_type(this); 9483 int opc = this->ideal_Opcode(); 9484 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9485 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9486 %} 9487 ins_pipe( pipe_slow ); 9488 %} 9489 9490 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9491 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9492 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9493 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9494 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9495 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9496 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9497 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9498 ins_encode %{ 9499 int vlen_enc = vector_length_encoding(this); 9500 BasicType bt = Matcher::vector_element_basic_type(this); 9501 int opc = this->ideal_Opcode(); 9502 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9503 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9504 %} 9505 ins_pipe( pipe_slow ); 9506 %} 9507 9508 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9509 match(Set dst (MulVS (Binary dst src2) mask)); 9510 match(Set dst (MulVI (Binary dst src2) mask)); 9511 match(Set dst (MulVL (Binary dst src2) mask)); 9512 match(Set dst (MulVF (Binary dst src2) mask)); 9513 match(Set dst (MulVD (Binary dst src2) mask)); 9514 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9515 ins_encode %{ 9516 int vlen_enc = vector_length_encoding(this); 9517 BasicType bt = Matcher::vector_element_basic_type(this); 9518 int opc = this->ideal_Opcode(); 9519 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9520 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9521 %} 9522 ins_pipe( pipe_slow ); 9523 %} 9524 9525 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 9526 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 9527 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 9528 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 9529 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 9530 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 9531 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9532 ins_encode %{ 9533 int vlen_enc = vector_length_encoding(this); 9534 BasicType bt = Matcher::vector_element_basic_type(this); 9535 int opc = this->ideal_Opcode(); 9536 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9537 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9538 %} 9539 ins_pipe( pipe_slow ); 9540 %} 9541 9542 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 9543 match(Set dst (SqrtVF dst mask)); 9544 match(Set dst (SqrtVD dst mask)); 9545 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 9546 ins_encode %{ 9547 int vlen_enc = vector_length_encoding(this); 9548 BasicType bt = Matcher::vector_element_basic_type(this); 9549 int opc = this->ideal_Opcode(); 9550 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9551 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9552 %} 9553 ins_pipe( pipe_slow ); 9554 %} 9555 9556 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 9557 match(Set dst (DivVF (Binary dst src2) mask)); 9558 match(Set dst (DivVD (Binary dst src2) mask)); 9559 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9560 ins_encode %{ 9561 int vlen_enc = vector_length_encoding(this); 9562 BasicType bt = Matcher::vector_element_basic_type(this); 9563 int opc = this->ideal_Opcode(); 9564 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9565 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9566 %} 9567 ins_pipe( pipe_slow ); 9568 %} 9569 9570 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 9571 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 9572 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 9573 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9574 ins_encode %{ 9575 int vlen_enc = vector_length_encoding(this); 9576 BasicType bt = Matcher::vector_element_basic_type(this); 9577 int opc = this->ideal_Opcode(); 9578 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9579 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9580 %} 9581 ins_pipe( pipe_slow ); 9582 %} 9583 9584 9585 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9586 match(Set dst (RotateLeftV (Binary dst shift) mask)); 9587 match(Set dst (RotateRightV (Binary dst shift) mask)); 9588 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 9589 ins_encode %{ 9590 int vlen_enc = vector_length_encoding(this); 9591 BasicType bt = Matcher::vector_element_basic_type(this); 9592 int opc = this->ideal_Opcode(); 9593 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9594 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9595 %} 9596 ins_pipe( pipe_slow ); 9597 %} 9598 9599 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 9600 match(Set dst (RotateLeftV (Binary dst src2) mask)); 9601 match(Set dst (RotateRightV (Binary dst src2) mask)); 9602 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 9603 ins_encode %{ 9604 int vlen_enc = vector_length_encoding(this); 9605 BasicType bt = Matcher::vector_element_basic_type(this); 9606 int opc = this->ideal_Opcode(); 9607 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9608 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9609 %} 9610 ins_pipe( pipe_slow ); 9611 %} 9612 9613 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9614 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 9615 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 9616 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 9617 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 9618 ins_encode %{ 9619 int vlen_enc = vector_length_encoding(this); 9620 BasicType bt = Matcher::vector_element_basic_type(this); 9621 int opc = this->ideal_Opcode(); 9622 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9623 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9624 %} 9625 ins_pipe( pipe_slow ); 9626 %} 9627 9628 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9629 predicate(!n->as_ShiftV()->is_var_shift()); 9630 match(Set dst (LShiftVS (Binary dst src2) mask)); 9631 match(Set dst (LShiftVI (Binary dst src2) mask)); 9632 match(Set dst (LShiftVL (Binary dst src2) mask)); 9633 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9634 ins_encode %{ 9635 int vlen_enc = vector_length_encoding(this); 9636 BasicType bt = Matcher::vector_element_basic_type(this); 9637 int opc = this->ideal_Opcode(); 9638 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9639 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9640 %} 9641 ins_pipe( pipe_slow ); 9642 %} 9643 9644 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9645 predicate(n->as_ShiftV()->is_var_shift()); 9646 match(Set dst (LShiftVS (Binary dst src2) mask)); 9647 match(Set dst (LShiftVI (Binary dst src2) mask)); 9648 match(Set dst (LShiftVL (Binary dst src2) mask)); 9649 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9650 ins_encode %{ 9651 int vlen_enc = vector_length_encoding(this); 9652 BasicType bt = Matcher::vector_element_basic_type(this); 9653 int opc = this->ideal_Opcode(); 9654 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9655 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9656 %} 9657 ins_pipe( pipe_slow ); 9658 %} 9659 9660 instruct vlshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9661 match(Set dst (LShiftVS (Binary dst (LoadVector src2)) mask)); 9662 match(Set dst (LShiftVI (Binary dst (LoadVector src2)) mask)); 9663 match(Set dst (LShiftVL (Binary dst (LoadVector src2)) mask)); 9664 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9665 ins_encode %{ 9666 int vlen_enc = vector_length_encoding(this); 9667 BasicType bt = Matcher::vector_element_basic_type(this); 9668 int opc = this->ideal_Opcode(); 9669 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9670 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9671 %} 9672 ins_pipe( pipe_slow ); 9673 %} 9674 9675 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9676 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 9677 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 9678 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 9679 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 9680 ins_encode %{ 9681 int vlen_enc = vector_length_encoding(this); 9682 BasicType bt = Matcher::vector_element_basic_type(this); 9683 int opc = this->ideal_Opcode(); 9684 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9685 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9686 %} 9687 ins_pipe( pipe_slow ); 9688 %} 9689 9690 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9691 predicate(!n->as_ShiftV()->is_var_shift()); 9692 match(Set dst (RShiftVS (Binary dst src2) mask)); 9693 match(Set dst (RShiftVI (Binary dst src2) mask)); 9694 match(Set dst (RShiftVL (Binary dst src2) mask)); 9695 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9696 ins_encode %{ 9697 int vlen_enc = vector_length_encoding(this); 9698 BasicType bt = Matcher::vector_element_basic_type(this); 9699 int opc = this->ideal_Opcode(); 9700 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9701 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9702 %} 9703 ins_pipe( pipe_slow ); 9704 %} 9705 9706 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9707 predicate(n->as_ShiftV()->is_var_shift()); 9708 match(Set dst (RShiftVS (Binary dst src2) mask)); 9709 match(Set dst (RShiftVI (Binary dst src2) mask)); 9710 match(Set dst (RShiftVL (Binary dst src2) mask)); 9711 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9712 ins_encode %{ 9713 int vlen_enc = vector_length_encoding(this); 9714 BasicType bt = Matcher::vector_element_basic_type(this); 9715 int opc = this->ideal_Opcode(); 9716 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9717 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9718 %} 9719 ins_pipe( pipe_slow ); 9720 %} 9721 9722 instruct vrshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9723 match(Set dst (RShiftVS (Binary dst (LoadVector src2)) mask)); 9724 match(Set dst (RShiftVI (Binary dst (LoadVector src2)) mask)); 9725 match(Set dst (RShiftVL (Binary dst (LoadVector src2)) mask)); 9726 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9727 ins_encode %{ 9728 int vlen_enc = vector_length_encoding(this); 9729 BasicType bt = Matcher::vector_element_basic_type(this); 9730 int opc = this->ideal_Opcode(); 9731 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9732 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9733 %} 9734 ins_pipe( pipe_slow ); 9735 %} 9736 9737 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9738 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 9739 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 9740 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 9741 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 9742 ins_encode %{ 9743 int vlen_enc = vector_length_encoding(this); 9744 BasicType bt = Matcher::vector_element_basic_type(this); 9745 int opc = this->ideal_Opcode(); 9746 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9747 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9748 %} 9749 ins_pipe( pipe_slow ); 9750 %} 9751 9752 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9753 predicate(!n->as_ShiftV()->is_var_shift()); 9754 match(Set dst (URShiftVS (Binary dst src2) mask)); 9755 match(Set dst (URShiftVI (Binary dst src2) mask)); 9756 match(Set dst (URShiftVL (Binary dst src2) mask)); 9757 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9758 ins_encode %{ 9759 int vlen_enc = vector_length_encoding(this); 9760 BasicType bt = Matcher::vector_element_basic_type(this); 9761 int opc = this->ideal_Opcode(); 9762 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9763 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9764 %} 9765 ins_pipe( pipe_slow ); 9766 %} 9767 9768 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9769 predicate(n->as_ShiftV()->is_var_shift()); 9770 match(Set dst (URShiftVS (Binary dst src2) mask)); 9771 match(Set dst (URShiftVI (Binary dst src2) mask)); 9772 match(Set dst (URShiftVL (Binary dst src2) mask)); 9773 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9774 ins_encode %{ 9775 int vlen_enc = vector_length_encoding(this); 9776 BasicType bt = Matcher::vector_element_basic_type(this); 9777 int opc = this->ideal_Opcode(); 9778 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9779 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9780 %} 9781 ins_pipe( pipe_slow ); 9782 %} 9783 9784 instruct vurshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9785 match(Set dst (URShiftVS (Binary dst (LoadVector src2)) mask)); 9786 match(Set dst (URShiftVI (Binary dst (LoadVector src2)) mask)); 9787 match(Set dst (URShiftVL (Binary dst (LoadVector src2)) mask)); 9788 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9789 ins_encode %{ 9790 int vlen_enc = vector_length_encoding(this); 9791 BasicType bt = Matcher::vector_element_basic_type(this); 9792 int opc = this->ideal_Opcode(); 9793 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9794 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9795 %} 9796 ins_pipe( pipe_slow ); 9797 %} 9798 9799 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 9800 match(Set dst (MaxV (Binary dst src2) mask)); 9801 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 9802 ins_encode %{ 9803 int vlen_enc = vector_length_encoding(this); 9804 BasicType bt = Matcher::vector_element_basic_type(this); 9805 int opc = this->ideal_Opcode(); 9806 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9807 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9808 %} 9809 ins_pipe( pipe_slow ); 9810 %} 9811 9812 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 9813 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 9814 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 9815 ins_encode %{ 9816 int vlen_enc = vector_length_encoding(this); 9817 BasicType bt = Matcher::vector_element_basic_type(this); 9818 int opc = this->ideal_Opcode(); 9819 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9820 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9821 %} 9822 ins_pipe( pipe_slow ); 9823 %} 9824 9825 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 9826 match(Set dst (MinV (Binary dst src2) mask)); 9827 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 9828 ins_encode %{ 9829 int vlen_enc = vector_length_encoding(this); 9830 BasicType bt = Matcher::vector_element_basic_type(this); 9831 int opc = this->ideal_Opcode(); 9832 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9833 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9834 %} 9835 ins_pipe( pipe_slow ); 9836 %} 9837 9838 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 9839 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 9840 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 9841 ins_encode %{ 9842 int vlen_enc = vector_length_encoding(this); 9843 BasicType bt = Matcher::vector_element_basic_type(this); 9844 int opc = this->ideal_Opcode(); 9845 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9846 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9847 %} 9848 ins_pipe( pipe_slow ); 9849 %} 9850 9851 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 9852 match(Set dst (VectorRearrange (Binary dst src2) mask)); 9853 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 9854 ins_encode %{ 9855 int vlen_enc = vector_length_encoding(this); 9856 BasicType bt = Matcher::vector_element_basic_type(this); 9857 int opc = this->ideal_Opcode(); 9858 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9859 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 9860 %} 9861 ins_pipe( pipe_slow ); 9862 %} 9863 9864 instruct vabs_masked(vec dst, kReg mask) %{ 9865 match(Set dst (AbsVB dst mask)); 9866 match(Set dst (AbsVS dst mask)); 9867 match(Set dst (AbsVI dst mask)); 9868 match(Set dst (AbsVL dst mask)); 9869 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 9870 ins_encode %{ 9871 int vlen_enc = vector_length_encoding(this); 9872 BasicType bt = Matcher::vector_element_basic_type(this); 9873 int opc = this->ideal_Opcode(); 9874 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9875 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9876 %} 9877 ins_pipe( pipe_slow ); 9878 %} 9879 9880 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 9881 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 9882 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 9883 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 9884 ins_encode %{ 9885 assert(UseFMA, "Needs FMA instructions support."); 9886 int vlen_enc = vector_length_encoding(this); 9887 BasicType bt = Matcher::vector_element_basic_type(this); 9888 int opc = this->ideal_Opcode(); 9889 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9890 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 9891 %} 9892 ins_pipe( pipe_slow ); 9893 %} 9894 9895 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 9896 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 9897 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 9898 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 9899 ins_encode %{ 9900 assert(UseFMA, "Needs FMA instructions support."); 9901 int vlen_enc = vector_length_encoding(this); 9902 BasicType bt = Matcher::vector_element_basic_type(this); 9903 int opc = this->ideal_Opcode(); 9904 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9905 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 9906 %} 9907 ins_pipe( pipe_slow ); 9908 %} 9909 9910 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 9911 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 9912 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 9913 ins_encode %{ 9914 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 9915 int vlen_enc = vector_length_encoding(this, $src1); 9916 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 9917 9918 // Comparison i 9919 switch (src1_elem_bt) { 9920 case T_BYTE: { 9921 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 9922 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9923 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9924 break; 9925 } 9926 case T_SHORT: { 9927 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 9928 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9929 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9930 break; 9931 } 9932 case T_INT: { 9933 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 9934 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9935 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9936 break; 9937 } 9938 case T_LONG: { 9939 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 9940 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9941 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9942 break; 9943 } 9944 case T_FLOAT: { 9945 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 9946 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 9947 break; 9948 } 9949 case T_DOUBLE: { 9950 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 9951 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 9952 break; 9953 } 9954 default: assert(false, "%s", type2name(src1_elem_bt)); break; 9955 } 9956 %} 9957 ins_pipe( pipe_slow ); 9958 %} 9959 9960 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 9961 predicate(Matcher::vector_length(n) <= 32); 9962 match(Set dst (MaskAll src)); 9963 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 9964 ins_encode %{ 9965 int mask_len = Matcher::vector_length(this); 9966 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 9967 %} 9968 ins_pipe( pipe_slow ); 9969 %} 9970 9971 #ifdef _LP64 9972 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 9973 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 9974 match(Set dst (XorVMask src (MaskAll cnt))); 9975 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 9976 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 9977 ins_encode %{ 9978 uint masklen = Matcher::vector_length(this); 9979 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 9980 %} 9981 ins_pipe( pipe_slow ); 9982 %} 9983 9984 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 9985 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 9986 (Matcher::vector_length(n) == 16) || 9987 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 9988 match(Set dst (XorVMask src (MaskAll cnt))); 9989 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 9990 ins_encode %{ 9991 uint masklen = Matcher::vector_length(this); 9992 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 9993 %} 9994 ins_pipe( pipe_slow ); 9995 %} 9996 9997 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 9998 predicate(n->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length(n) <= 8); 9999 match(Set dst (VectorLongToMask src)); 10000 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10001 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10002 ins_encode %{ 10003 int mask_len = Matcher::vector_length(this); 10004 int vec_enc = vector_length_encoding(mask_len); 10005 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10006 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10007 %} 10008 ins_pipe( pipe_slow ); 10009 %} 10010 10011 10012 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10013 predicate(n->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length(n) > 8); 10014 match(Set dst (VectorLongToMask src)); 10015 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10016 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10017 ins_encode %{ 10018 int mask_len = Matcher::vector_length(this); 10019 assert(mask_len <= 32, "invalid mask length"); 10020 int vec_enc = vector_length_encoding(mask_len); 10021 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10022 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10023 %} 10024 ins_pipe( pipe_slow ); 10025 %} 10026 10027 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10028 predicate(n->bottom_type()->isa_vectmask()); 10029 match(Set dst (VectorLongToMask src)); 10030 format %{ "long_to_mask_evex $dst, $src\t!" %} 10031 ins_encode %{ 10032 __ kmov($dst$$KRegister, $src$$Register); 10033 %} 10034 ins_pipe( pipe_slow ); 10035 %} 10036 #endif 10037 10038 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10039 match(Set dst (AndVMask src1 src2)); 10040 match(Set dst (OrVMask src1 src2)); 10041 match(Set dst (XorVMask src1 src2)); 10042 effect(TEMP kscratch); 10043 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10044 ins_encode %{ 10045 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10046 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10047 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 10048 uint masklen = Matcher::vector_length(this); 10049 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10050 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10051 %} 10052 ins_pipe( pipe_slow ); 10053 %} 10054 10055 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10056 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10057 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10058 ins_encode %{ 10059 int vlen_enc = vector_length_encoding(this); 10060 BasicType bt = Matcher::vector_element_basic_type(this); 10061 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10062 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10063 %} 10064 ins_pipe( pipe_slow ); 10065 %} 10066 10067 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10068 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10069 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10070 ins_encode %{ 10071 int vlen_enc = vector_length_encoding(this); 10072 BasicType bt = Matcher::vector_element_basic_type(this); 10073 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10074 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10075 %} 10076 ins_pipe( pipe_slow ); 10077 %} 10078 10079 instruct castMM(kReg dst) 10080 %{ 10081 match(Set dst (CastVV dst)); 10082 10083 size(0); 10084 format %{ "# castVV of $dst" %} 10085 ins_encode(/* empty encoding */); 10086 ins_cost(0); 10087 ins_pipe(empty); 10088 %} 10089 10090 instruct castVV(vec dst) 10091 %{ 10092 match(Set dst (CastVV dst)); 10093 10094 size(0); 10095 format %{ "# castVV of $dst" %} 10096 ins_encode(/* empty encoding */); 10097 ins_cost(0); 10098 ins_pipe(empty); 10099 %} 10100 10101 instruct castVVLeg(legVec dst) 10102 %{ 10103 match(Set dst (CastVV dst)); 10104 10105 size(0); 10106 format %{ "# castVV of $dst" %} 10107 ins_encode(/* empty encoding */); 10108 ins_cost(0); 10109 ins_pipe(empty); 10110 %} 10111 10112 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10113 %{ 10114 match(Set dst (IsInfiniteF src)); 10115 effect(TEMP ktmp, KILL cr); 10116 format %{ "float_class_check $dst, $src" %} 10117 ins_encode %{ 10118 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10119 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10120 %} 10121 ins_pipe(pipe_slow); 10122 %} 10123 10124 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10125 %{ 10126 match(Set dst (IsInfiniteD src)); 10127 effect(TEMP ktmp, KILL cr); 10128 format %{ "double_class_check $dst, $src" %} 10129 ins_encode %{ 10130 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10131 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10132 %} 10133 ins_pipe(pipe_slow); 10134 %}