1 // 2 // Copyright (c) 2011, 2024, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 // AVX3 Mask Registers. 632 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 633 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 634 635 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 636 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 637 638 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 639 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 640 641 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 642 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 643 644 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 645 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 646 647 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 648 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 649 650 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 651 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 652 653 654 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 655 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 656 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 657 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 658 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 659 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 660 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 661 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 662 #ifdef _LP64 663 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 664 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 665 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 666 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 667 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 668 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 669 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 670 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 671 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 672 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 673 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 674 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 675 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 676 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 677 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 678 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 679 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 680 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 681 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 682 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 683 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 684 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 685 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 686 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 687 #endif 688 ); 689 690 alloc_class chunk2(K7, K7_H, 691 K6, K6_H, 692 K5, K5_H, 693 K4, K4_H, 694 K3, K3_H, 695 K2, K2_H, 696 K1, K1_H); 697 698 reg_class vectmask_reg(K1, K1_H, 699 K2, K2_H, 700 K3, K3_H, 701 K4, K4_H, 702 K5, K5_H, 703 K6, K6_H, 704 K7, K7_H); 705 706 reg_class vectmask_reg_K1(K1, K1_H); 707 reg_class vectmask_reg_K2(K2, K2_H); 708 reg_class vectmask_reg_K3(K3, K3_H); 709 reg_class vectmask_reg_K4(K4, K4_H); 710 reg_class vectmask_reg_K5(K5, K5_H); 711 reg_class vectmask_reg_K6(K6, K6_H); 712 reg_class vectmask_reg_K7(K7, K7_H); 713 714 // flags allocation class should be last. 715 alloc_class chunk3(RFLAGS); 716 717 718 // Singleton class for condition codes 719 reg_class int_flags(RFLAGS); 720 721 // Class for pre evex float registers 722 reg_class float_reg_legacy(XMM0, 723 XMM1, 724 XMM2, 725 XMM3, 726 XMM4, 727 XMM5, 728 XMM6, 729 XMM7 730 #ifdef _LP64 731 ,XMM8, 732 XMM9, 733 XMM10, 734 XMM11, 735 XMM12, 736 XMM13, 737 XMM14, 738 XMM15 739 #endif 740 ); 741 742 // Class for evex float registers 743 reg_class float_reg_evex(XMM0, 744 XMM1, 745 XMM2, 746 XMM3, 747 XMM4, 748 XMM5, 749 XMM6, 750 XMM7 751 #ifdef _LP64 752 ,XMM8, 753 XMM9, 754 XMM10, 755 XMM11, 756 XMM12, 757 XMM13, 758 XMM14, 759 XMM15, 760 XMM16, 761 XMM17, 762 XMM18, 763 XMM19, 764 XMM20, 765 XMM21, 766 XMM22, 767 XMM23, 768 XMM24, 769 XMM25, 770 XMM26, 771 XMM27, 772 XMM28, 773 XMM29, 774 XMM30, 775 XMM31 776 #endif 777 ); 778 779 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 780 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 781 782 // Class for pre evex double registers 783 reg_class double_reg_legacy(XMM0, XMM0b, 784 XMM1, XMM1b, 785 XMM2, XMM2b, 786 XMM3, XMM3b, 787 XMM4, XMM4b, 788 XMM5, XMM5b, 789 XMM6, XMM6b, 790 XMM7, XMM7b 791 #ifdef _LP64 792 ,XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b 800 #endif 801 ); 802 803 // Class for evex double registers 804 reg_class double_reg_evex(XMM0, XMM0b, 805 XMM1, XMM1b, 806 XMM2, XMM2b, 807 XMM3, XMM3b, 808 XMM4, XMM4b, 809 XMM5, XMM5b, 810 XMM6, XMM6b, 811 XMM7, XMM7b 812 #ifdef _LP64 813 ,XMM8, XMM8b, 814 XMM9, XMM9b, 815 XMM10, XMM10b, 816 XMM11, XMM11b, 817 XMM12, XMM12b, 818 XMM13, XMM13b, 819 XMM14, XMM14b, 820 XMM15, XMM15b, 821 XMM16, XMM16b, 822 XMM17, XMM17b, 823 XMM18, XMM18b, 824 XMM19, XMM19b, 825 XMM20, XMM20b, 826 XMM21, XMM21b, 827 XMM22, XMM22b, 828 XMM23, XMM23b, 829 XMM24, XMM24b, 830 XMM25, XMM25b, 831 XMM26, XMM26b, 832 XMM27, XMM27b, 833 XMM28, XMM28b, 834 XMM29, XMM29b, 835 XMM30, XMM30b, 836 XMM31, XMM31b 837 #endif 838 ); 839 840 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 841 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 842 843 // Class for pre evex 32bit vector registers 844 reg_class vectors_reg_legacy(XMM0, 845 XMM1, 846 XMM2, 847 XMM3, 848 XMM4, 849 XMM5, 850 XMM6, 851 XMM7 852 #ifdef _LP64 853 ,XMM8, 854 XMM9, 855 XMM10, 856 XMM11, 857 XMM12, 858 XMM13, 859 XMM14, 860 XMM15 861 #endif 862 ); 863 864 // Class for evex 32bit vector registers 865 reg_class vectors_reg_evex(XMM0, 866 XMM1, 867 XMM2, 868 XMM3, 869 XMM4, 870 XMM5, 871 XMM6, 872 XMM7 873 #ifdef _LP64 874 ,XMM8, 875 XMM9, 876 XMM10, 877 XMM11, 878 XMM12, 879 XMM13, 880 XMM14, 881 XMM15, 882 XMM16, 883 XMM17, 884 XMM18, 885 XMM19, 886 XMM20, 887 XMM21, 888 XMM22, 889 XMM23, 890 XMM24, 891 XMM25, 892 XMM26, 893 XMM27, 894 XMM28, 895 XMM29, 896 XMM30, 897 XMM31 898 #endif 899 ); 900 901 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 902 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 903 904 // Class for all 64bit vector registers 905 reg_class vectord_reg_legacy(XMM0, XMM0b, 906 XMM1, XMM1b, 907 XMM2, XMM2b, 908 XMM3, XMM3b, 909 XMM4, XMM4b, 910 XMM5, XMM5b, 911 XMM6, XMM6b, 912 XMM7, XMM7b 913 #ifdef _LP64 914 ,XMM8, XMM8b, 915 XMM9, XMM9b, 916 XMM10, XMM10b, 917 XMM11, XMM11b, 918 XMM12, XMM12b, 919 XMM13, XMM13b, 920 XMM14, XMM14b, 921 XMM15, XMM15b 922 #endif 923 ); 924 925 // Class for all 64bit vector registers 926 reg_class vectord_reg_evex(XMM0, XMM0b, 927 XMM1, XMM1b, 928 XMM2, XMM2b, 929 XMM3, XMM3b, 930 XMM4, XMM4b, 931 XMM5, XMM5b, 932 XMM6, XMM6b, 933 XMM7, XMM7b 934 #ifdef _LP64 935 ,XMM8, XMM8b, 936 XMM9, XMM9b, 937 XMM10, XMM10b, 938 XMM11, XMM11b, 939 XMM12, XMM12b, 940 XMM13, XMM13b, 941 XMM14, XMM14b, 942 XMM15, XMM15b, 943 XMM16, XMM16b, 944 XMM17, XMM17b, 945 XMM18, XMM18b, 946 XMM19, XMM19b, 947 XMM20, XMM20b, 948 XMM21, XMM21b, 949 XMM22, XMM22b, 950 XMM23, XMM23b, 951 XMM24, XMM24b, 952 XMM25, XMM25b, 953 XMM26, XMM26b, 954 XMM27, XMM27b, 955 XMM28, XMM28b, 956 XMM29, XMM29b, 957 XMM30, XMM30b, 958 XMM31, XMM31b 959 #endif 960 ); 961 962 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 963 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 964 965 // Class for all 128bit vector registers 966 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 967 XMM1, XMM1b, XMM1c, XMM1d, 968 XMM2, XMM2b, XMM2c, XMM2d, 969 XMM3, XMM3b, XMM3c, XMM3d, 970 XMM4, XMM4b, XMM4c, XMM4d, 971 XMM5, XMM5b, XMM5c, XMM5d, 972 XMM6, XMM6b, XMM6c, XMM6d, 973 XMM7, XMM7b, XMM7c, XMM7d 974 #ifdef _LP64 975 ,XMM8, XMM8b, XMM8c, XMM8d, 976 XMM9, XMM9b, XMM9c, XMM9d, 977 XMM10, XMM10b, XMM10c, XMM10d, 978 XMM11, XMM11b, XMM11c, XMM11d, 979 XMM12, XMM12b, XMM12c, XMM12d, 980 XMM13, XMM13b, XMM13c, XMM13d, 981 XMM14, XMM14b, XMM14c, XMM14d, 982 XMM15, XMM15b, XMM15c, XMM15d 983 #endif 984 ); 985 986 // Class for all 128bit vector registers 987 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 988 XMM1, XMM1b, XMM1c, XMM1d, 989 XMM2, XMM2b, XMM2c, XMM2d, 990 XMM3, XMM3b, XMM3c, XMM3d, 991 XMM4, XMM4b, XMM4c, XMM4d, 992 XMM5, XMM5b, XMM5c, XMM5d, 993 XMM6, XMM6b, XMM6c, XMM6d, 994 XMM7, XMM7b, XMM7c, XMM7d 995 #ifdef _LP64 996 ,XMM8, XMM8b, XMM8c, XMM8d, 997 XMM9, XMM9b, XMM9c, XMM9d, 998 XMM10, XMM10b, XMM10c, XMM10d, 999 XMM11, XMM11b, XMM11c, XMM11d, 1000 XMM12, XMM12b, XMM12c, XMM12d, 1001 XMM13, XMM13b, XMM13c, XMM13d, 1002 XMM14, XMM14b, XMM14c, XMM14d, 1003 XMM15, XMM15b, XMM15c, XMM15d, 1004 XMM16, XMM16b, XMM16c, XMM16d, 1005 XMM17, XMM17b, XMM17c, XMM17d, 1006 XMM18, XMM18b, XMM18c, XMM18d, 1007 XMM19, XMM19b, XMM19c, XMM19d, 1008 XMM20, XMM20b, XMM20c, XMM20d, 1009 XMM21, XMM21b, XMM21c, XMM21d, 1010 XMM22, XMM22b, XMM22c, XMM22d, 1011 XMM23, XMM23b, XMM23c, XMM23d, 1012 XMM24, XMM24b, XMM24c, XMM24d, 1013 XMM25, XMM25b, XMM25c, XMM25d, 1014 XMM26, XMM26b, XMM26c, XMM26d, 1015 XMM27, XMM27b, XMM27c, XMM27d, 1016 XMM28, XMM28b, XMM28c, XMM28d, 1017 XMM29, XMM29b, XMM29c, XMM29d, 1018 XMM30, XMM30b, XMM30c, XMM30d, 1019 XMM31, XMM31b, XMM31c, XMM31d 1020 #endif 1021 ); 1022 1023 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1024 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1025 1026 // Class for all 256bit vector registers 1027 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1028 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1029 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1030 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1031 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1032 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1033 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1034 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1035 #ifdef _LP64 1036 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1037 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1038 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1039 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1040 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1041 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1042 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1043 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1044 #endif 1045 ); 1046 1047 // Class for all 256bit vector registers 1048 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1049 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1050 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1051 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1052 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1053 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1054 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1055 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1056 #ifdef _LP64 1057 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1058 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1059 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1060 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1061 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1062 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1063 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1064 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1065 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1066 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1067 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1068 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1069 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1070 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1071 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1072 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1073 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1074 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1075 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1076 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1077 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1078 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1079 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1080 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1081 #endif 1082 ); 1083 1084 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1085 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1086 1087 // Class for all 512bit vector registers 1088 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1089 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1090 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1091 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1092 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1093 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1094 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1095 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1096 #ifdef _LP64 1097 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1098 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1099 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1100 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1101 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1102 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1103 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1104 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1105 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1106 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1107 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1108 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1109 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1110 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1111 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1112 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1113 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1114 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1115 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1116 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1117 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1118 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1119 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1120 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1121 #endif 1122 ); 1123 1124 // Class for restricted 512bit vector registers 1125 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1126 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1127 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1128 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1129 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1130 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1131 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1132 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1133 #ifdef _LP64 1134 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1135 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1136 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1137 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1138 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1139 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1140 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1141 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1142 #endif 1143 ); 1144 1145 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1146 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1147 1148 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1149 %} 1150 1151 1152 //----------SOURCE BLOCK------------------------------------------------------- 1153 // This is a block of C++ code which provides values, functions, and 1154 // definitions necessary in the rest of the architecture description 1155 1156 source_hpp %{ 1157 // Header information of the source block. 1158 // Method declarations/definitions which are used outside 1159 // the ad-scope can conveniently be defined here. 1160 // 1161 // To keep related declarations/definitions/uses close together, 1162 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1163 1164 #include "runtime/vm_version.hpp" 1165 1166 class NativeJump; 1167 1168 class CallStubImpl { 1169 1170 //-------------------------------------------------------------- 1171 //---< Used for optimization in Compile::shorten_branches >--- 1172 //-------------------------------------------------------------- 1173 1174 public: 1175 // Size of call trampoline stub. 1176 static uint size_call_trampoline() { 1177 return 0; // no call trampolines on this platform 1178 } 1179 1180 // number of relocations needed by a call trampoline stub 1181 static uint reloc_call_trampoline() { 1182 return 0; // no call trampolines on this platform 1183 } 1184 }; 1185 1186 class HandlerImpl { 1187 1188 public: 1189 1190 static int emit_exception_handler(CodeBuffer &cbuf); 1191 static int emit_deopt_handler(CodeBuffer& cbuf); 1192 1193 static uint size_exception_handler() { 1194 // NativeCall instruction size is the same as NativeJump. 1195 // exception handler starts out as jump and can be patched to 1196 // a call be deoptimization. (4932387) 1197 // Note that this value is also credited (in output.cpp) to 1198 // the size of the code section. 1199 return NativeJump::instruction_size; 1200 } 1201 1202 #ifdef _LP64 1203 static uint size_deopt_handler() { 1204 // three 5 byte instructions plus one move for unreachable address. 1205 return 15+3; 1206 } 1207 #else 1208 static uint size_deopt_handler() { 1209 // NativeCall instruction size is the same as NativeJump. 1210 // exception handler starts out as jump and can be patched to 1211 // a call be deoptimization. (4932387) 1212 // Note that this value is also credited (in output.cpp) to 1213 // the size of the code section. 1214 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1215 } 1216 #endif 1217 }; 1218 1219 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1220 switch(bytes) { 1221 case 4: // fall-through 1222 case 8: // fall-through 1223 case 16: return Assembler::AVX_128bit; 1224 case 32: return Assembler::AVX_256bit; 1225 case 64: return Assembler::AVX_512bit; 1226 1227 default: { 1228 ShouldNotReachHere(); 1229 return Assembler::AVX_NoVec; 1230 } 1231 } 1232 } 1233 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1236 } 1237 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1239 uint def_idx = use->operand_index(opnd); 1240 Node* def = use->in(def_idx); 1241 return vector_length_encoding(def); 1242 } 1243 1244 static inline bool is_vector_popcount_predicate(BasicType bt) { 1245 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1246 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1247 } 1248 1249 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1250 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1251 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1252 } 1253 1254 class Node::PD { 1255 public: 1256 enum NodeFlags { 1257 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1258 Flag_sets_carry_flag = Node::_last_flag << 2, 1259 Flag_sets_parity_flag = Node::_last_flag << 3, 1260 Flag_sets_zero_flag = Node::_last_flag << 4, 1261 Flag_sets_overflow_flag = Node::_last_flag << 5, 1262 Flag_sets_sign_flag = Node::_last_flag << 6, 1263 Flag_clears_carry_flag = Node::_last_flag << 7, 1264 Flag_clears_parity_flag = Node::_last_flag << 8, 1265 Flag_clears_zero_flag = Node::_last_flag << 9, 1266 Flag_clears_overflow_flag = Node::_last_flag << 10, 1267 Flag_clears_sign_flag = Node::_last_flag << 11, 1268 _last_flag = Flag_clears_sign_flag 1269 }; 1270 }; 1271 1272 %} // end source_hpp 1273 1274 source %{ 1275 1276 #include "opto/addnode.hpp" 1277 #include "c2_intelJccErratum_x86.hpp" 1278 1279 void PhaseOutput::pd_perform_mach_node_analysis() { 1280 if (VM_Version::has_intel_jcc_erratum()) { 1281 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1282 _buf_sizes._code += extra_padding; 1283 } 1284 } 1285 1286 int MachNode::pd_alignment_required() const { 1287 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1288 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1289 return IntelJccErratum::largest_jcc_size() + 1; 1290 } else { 1291 return 1; 1292 } 1293 } 1294 1295 int MachNode::compute_padding(int current_offset) const { 1296 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1297 Compile* C = Compile::current(); 1298 PhaseOutput* output = C->output(); 1299 Block* block = output->block(); 1300 int index = output->index(); 1301 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1302 } else { 1303 return 0; 1304 } 1305 } 1306 1307 // Emit exception handler code. 1308 // Stuff framesize into a register and call a VM stub routine. 1309 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1310 1311 // Note that the code buffer's insts_mark is always relative to insts. 1312 // That's why we must use the macroassembler to generate a handler. 1313 C2_MacroAssembler _masm(&cbuf); 1314 address base = __ start_a_stub(size_exception_handler()); 1315 if (base == nullptr) { 1316 ciEnv::current()->record_failure("CodeCache is full"); 1317 return 0; // CodeBuffer::expand failed 1318 } 1319 int offset = __ offset(); 1320 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1321 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1322 __ end_a_stub(); 1323 return offset; 1324 } 1325 1326 // Emit deopt handler code. 1327 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1328 1329 // Note that the code buffer's insts_mark is always relative to insts. 1330 // That's why we must use the macroassembler to generate a handler. 1331 C2_MacroAssembler _masm(&cbuf); 1332 address base = __ start_a_stub(size_deopt_handler()); 1333 if (base == nullptr) { 1334 ciEnv::current()->record_failure("CodeCache is full"); 1335 return 0; // CodeBuffer::expand failed 1336 } 1337 int offset = __ offset(); 1338 1339 #ifdef _LP64 1340 address the_pc = (address) __ pc(); 1341 Label next; 1342 // push a "the_pc" on the stack without destroying any registers 1343 // as they all may be live. 1344 1345 // push address of "next" 1346 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1347 __ bind(next); 1348 // adjust it so it matches "the_pc" 1349 __ subptr(Address(rsp, 0), __ offset() - offset); 1350 #else 1351 InternalAddress here(__ pc()); 1352 __ pushptr(here.addr(), noreg); 1353 #endif 1354 1355 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1356 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1357 __ end_a_stub(); 1358 return offset; 1359 } 1360 1361 static Assembler::Width widthForType(BasicType bt) { 1362 if (bt == T_BYTE) { 1363 return Assembler::B; 1364 } else if (bt == T_SHORT) { 1365 return Assembler::W; 1366 } else if (bt == T_INT) { 1367 return Assembler::D; 1368 } else { 1369 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1370 return Assembler::Q; 1371 } 1372 } 1373 1374 //============================================================================= 1375 1376 // Float masks come from different places depending on platform. 1377 #ifdef _LP64 1378 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1379 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1380 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1381 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1382 #else 1383 static address float_signmask() { return (address)float_signmask_pool; } 1384 static address float_signflip() { return (address)float_signflip_pool; } 1385 static address double_signmask() { return (address)double_signmask_pool; } 1386 static address double_signflip() { return (address)double_signflip_pool; } 1387 #endif 1388 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1389 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1390 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1391 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1392 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1393 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1394 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1395 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1396 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1397 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1398 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1399 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1400 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1401 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1402 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1403 1404 //============================================================================= 1405 bool Matcher::match_rule_supported(int opcode) { 1406 if (!has_match_rule(opcode)) { 1407 return false; // no match rule present 1408 } 1409 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1410 switch (opcode) { 1411 case Op_AbsVL: 1412 case Op_StoreVectorScatter: 1413 if (UseAVX < 3) { 1414 return false; 1415 } 1416 break; 1417 case Op_PopCountI: 1418 case Op_PopCountL: 1419 if (!UsePopCountInstruction) { 1420 return false; 1421 } 1422 break; 1423 case Op_PopCountVI: 1424 if (UseAVX < 2) { 1425 return false; 1426 } 1427 break; 1428 case Op_CompressV: 1429 case Op_ExpandV: 1430 case Op_PopCountVL: 1431 if (UseAVX < 2) { 1432 return false; 1433 } 1434 break; 1435 case Op_MulVI: 1436 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1437 return false; 1438 } 1439 break; 1440 case Op_MulVL: 1441 if (UseSSE < 4) { // only with SSE4_1 or AVX 1442 return false; 1443 } 1444 break; 1445 case Op_MulReductionVL: 1446 if (VM_Version::supports_avx512dq() == false) { 1447 return false; 1448 } 1449 break; 1450 case Op_AddReductionVL: 1451 if (UseSSE < 2) { // requires at least SSE2 1452 return false; 1453 } 1454 break; 1455 case Op_AbsVB: 1456 case Op_AbsVS: 1457 case Op_AbsVI: 1458 case Op_AddReductionVI: 1459 case Op_AndReductionV: 1460 case Op_OrReductionV: 1461 case Op_XorReductionV: 1462 if (UseSSE < 3) { // requires at least SSSE3 1463 return false; 1464 } 1465 break; 1466 case Op_VectorLoadShuffle: 1467 case Op_VectorRearrange: 1468 case Op_MulReductionVI: 1469 if (UseSSE < 4) { // requires at least SSE4 1470 return false; 1471 } 1472 break; 1473 case Op_IsInfiniteF: 1474 case Op_IsInfiniteD: 1475 if (!VM_Version::supports_avx512dq()) { 1476 return false; 1477 } 1478 break; 1479 case Op_SqrtVD: 1480 case Op_SqrtVF: 1481 case Op_VectorMaskCmp: 1482 case Op_VectorCastB2X: 1483 case Op_VectorCastS2X: 1484 case Op_VectorCastI2X: 1485 case Op_VectorCastL2X: 1486 case Op_VectorCastF2X: 1487 case Op_VectorCastD2X: 1488 case Op_VectorUCastB2X: 1489 case Op_VectorUCastS2X: 1490 case Op_VectorUCastI2X: 1491 case Op_VectorMaskCast: 1492 if (UseAVX < 1) { // enabled for AVX only 1493 return false; 1494 } 1495 break; 1496 case Op_PopulateIndex: 1497 if (!is_LP64 || (UseAVX < 2)) { 1498 return false; 1499 } 1500 break; 1501 case Op_RoundVF: 1502 if (UseAVX < 2) { // enabled for AVX2 only 1503 return false; 1504 } 1505 break; 1506 case Op_RoundVD: 1507 if (UseAVX < 3) { 1508 return false; // enabled for AVX3 only 1509 } 1510 break; 1511 case Op_CompareAndSwapL: 1512 #ifdef _LP64 1513 case Op_CompareAndSwapP: 1514 #endif 1515 break; 1516 case Op_StrIndexOf: 1517 if (!UseSSE42Intrinsics) { 1518 return false; 1519 } 1520 break; 1521 case Op_StrIndexOfChar: 1522 if (!UseSSE42Intrinsics) { 1523 return false; 1524 } 1525 break; 1526 case Op_OnSpinWait: 1527 if (VM_Version::supports_on_spin_wait() == false) { 1528 return false; 1529 } 1530 break; 1531 case Op_MulVB: 1532 case Op_LShiftVB: 1533 case Op_RShiftVB: 1534 case Op_URShiftVB: 1535 case Op_VectorInsert: 1536 case Op_VectorLoadMask: 1537 case Op_VectorStoreMask: 1538 case Op_VectorBlend: 1539 if (UseSSE < 4) { 1540 return false; 1541 } 1542 break; 1543 #ifdef _LP64 1544 case Op_MaxD: 1545 case Op_MaxF: 1546 case Op_MinD: 1547 case Op_MinF: 1548 if (UseAVX < 1) { // enabled for AVX only 1549 return false; 1550 } 1551 break; 1552 #endif 1553 case Op_CacheWB: 1554 case Op_CacheWBPreSync: 1555 case Op_CacheWBPostSync: 1556 if (!VM_Version::supports_data_cache_line_flush()) { 1557 return false; 1558 } 1559 break; 1560 case Op_ExtractB: 1561 case Op_ExtractL: 1562 case Op_ExtractI: 1563 case Op_RoundDoubleMode: 1564 if (UseSSE < 4) { 1565 return false; 1566 } 1567 break; 1568 case Op_RoundDoubleModeV: 1569 if (VM_Version::supports_avx() == false) { 1570 return false; // 128bit vroundpd is not available 1571 } 1572 break; 1573 case Op_LoadVectorGather: 1574 if (UseAVX < 2) { 1575 return false; 1576 } 1577 break; 1578 case Op_FmaF: 1579 case Op_FmaD: 1580 case Op_FmaVD: 1581 case Op_FmaVF: 1582 if (!UseFMA) { 1583 return false; 1584 } 1585 break; 1586 case Op_MacroLogicV: 1587 if (UseAVX < 3 || !UseVectorMacroLogic) { 1588 return false; 1589 } 1590 break; 1591 1592 case Op_VectorCmpMasked: 1593 case Op_VectorMaskGen: 1594 if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { 1595 return false; 1596 } 1597 break; 1598 case Op_VectorMaskFirstTrue: 1599 case Op_VectorMaskLastTrue: 1600 case Op_VectorMaskTrueCount: 1601 case Op_VectorMaskToLong: 1602 if (!is_LP64 || UseAVX < 1) { 1603 return false; 1604 } 1605 break; 1606 case Op_RoundF: 1607 case Op_RoundD: 1608 if (!is_LP64) { 1609 return false; 1610 } 1611 break; 1612 case Op_CopySignD: 1613 case Op_CopySignF: 1614 if (UseAVX < 3 || !is_LP64) { 1615 return false; 1616 } 1617 if (!VM_Version::supports_avx512vl()) { 1618 return false; 1619 } 1620 break; 1621 #ifndef _LP64 1622 case Op_AddReductionVF: 1623 case Op_AddReductionVD: 1624 case Op_MulReductionVF: 1625 case Op_MulReductionVD: 1626 if (UseSSE < 1) { // requires at least SSE 1627 return false; 1628 } 1629 break; 1630 case Op_MulAddVS2VI: 1631 case Op_RShiftVL: 1632 case Op_AbsVD: 1633 case Op_NegVD: 1634 if (UseSSE < 2) { 1635 return false; 1636 } 1637 break; 1638 #endif // !LP64 1639 case Op_CompressBits: 1640 if (!VM_Version::supports_bmi2() || (!is_LP64 && UseSSE < 2)) { 1641 return false; 1642 } 1643 break; 1644 case Op_ExpandBits: 1645 if (!VM_Version::supports_bmi2() || (!is_LP64 && (UseSSE < 2 || !VM_Version::supports_bmi1()))) { 1646 return false; 1647 } 1648 break; 1649 case Op_SignumF: 1650 if (UseSSE < 1) { 1651 return false; 1652 } 1653 break; 1654 case Op_SignumD: 1655 if (UseSSE < 2) { 1656 return false; 1657 } 1658 break; 1659 case Op_CompressM: 1660 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1661 return false; 1662 } 1663 break; 1664 case Op_SqrtF: 1665 if (UseSSE < 1) { 1666 return false; 1667 } 1668 break; 1669 case Op_SqrtD: 1670 #ifdef _LP64 1671 if (UseSSE < 2) { 1672 return false; 1673 } 1674 #else 1675 // x86_32.ad has a special match rule for SqrtD. 1676 // Together with common x86 rules, this handles all UseSSE cases. 1677 #endif 1678 break; 1679 case Op_ConvF2HF: 1680 case Op_ConvHF2F: 1681 if (!VM_Version::supports_float16()) { 1682 return false; 1683 } 1684 break; 1685 case Op_VectorCastF2HF: 1686 case Op_VectorCastHF2F: 1687 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1688 return false; 1689 } 1690 break; 1691 } 1692 return true; // Match rules are supported by default. 1693 } 1694 1695 //------------------------------------------------------------------------ 1696 1697 static inline bool is_pop_count_instr_target(BasicType bt) { 1698 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1699 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1700 } 1701 1702 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) { 1703 return match_rule_supported_vector(opcode, vlen, bt); 1704 } 1705 1706 // Identify extra cases that we might want to provide match rules for vector nodes and 1707 // other intrinsics guarded with vector length (vlen) and element type (bt). 1708 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1709 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1710 if (!match_rule_supported(opcode)) { 1711 return false; 1712 } 1713 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1714 // * SSE2 supports 128bit vectors for all types; 1715 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1716 // * AVX2 supports 256bit vectors for all types; 1717 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1718 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1719 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1720 // And MaxVectorSize is taken into account as well. 1721 if (!vector_size_supported(bt, vlen)) { 1722 return false; 1723 } 1724 // Special cases which require vector length follow: 1725 // * implementation limitations 1726 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1727 // * 128bit vroundpd instruction is present only in AVX1 1728 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1729 switch (opcode) { 1730 case Op_AbsVF: 1731 case Op_NegVF: 1732 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1733 return false; // 512bit vandps and vxorps are not available 1734 } 1735 break; 1736 case Op_AbsVD: 1737 case Op_NegVD: 1738 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1739 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1740 } 1741 break; 1742 case Op_RotateRightV: 1743 case Op_RotateLeftV: 1744 if (bt != T_INT && bt != T_LONG) { 1745 return false; 1746 } // fallthrough 1747 case Op_MacroLogicV: 1748 if (!VM_Version::supports_evex() || 1749 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1750 return false; 1751 } 1752 break; 1753 case Op_ClearArray: 1754 case Op_VectorMaskGen: 1755 case Op_VectorCmpMasked: 1756 if (!is_LP64 || !VM_Version::supports_avx512bw()) { 1757 return false; 1758 } 1759 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1760 return false; 1761 } 1762 break; 1763 case Op_LoadVectorMasked: 1764 case Op_StoreVectorMasked: 1765 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1766 return false; 1767 } 1768 break; 1769 case Op_MaxV: 1770 case Op_MinV: 1771 if (UseSSE < 4 && is_integral_type(bt)) { 1772 return false; 1773 } 1774 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1775 // Float/Double intrinsics are enabled for AVX family currently. 1776 if (UseAVX == 0) { 1777 return false; 1778 } 1779 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1780 return false; 1781 } 1782 } 1783 break; 1784 case Op_CallLeafVector: 1785 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1786 return false; 1787 } 1788 break; 1789 case Op_AddReductionVI: 1790 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1791 return false; 1792 } 1793 // fallthrough 1794 case Op_AndReductionV: 1795 case Op_OrReductionV: 1796 case Op_XorReductionV: 1797 if (is_subword_type(bt) && (UseSSE < 4)) { 1798 return false; 1799 } 1800 #ifndef _LP64 1801 if (bt == T_BYTE || bt == T_LONG) { 1802 return false; 1803 } 1804 #endif 1805 break; 1806 #ifndef _LP64 1807 case Op_VectorInsert: 1808 if (bt == T_LONG || bt == T_DOUBLE) { 1809 return false; 1810 } 1811 break; 1812 #endif 1813 case Op_MinReductionV: 1814 case Op_MaxReductionV: 1815 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1816 return false; 1817 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1818 return false; 1819 } 1820 // Float/Double intrinsics enabled for AVX family. 1821 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1822 return false; 1823 } 1824 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1825 return false; 1826 } 1827 #ifndef _LP64 1828 if (bt == T_BYTE || bt == T_LONG) { 1829 return false; 1830 } 1831 #endif 1832 break; 1833 case Op_VectorTest: 1834 if (UseSSE < 4) { 1835 return false; // Implementation limitation 1836 } else if (size_in_bits < 32) { 1837 return false; // Implementation limitation 1838 } 1839 break; 1840 case Op_VectorLoadShuffle: 1841 case Op_VectorRearrange: 1842 if(vlen == 2) { 1843 return false; // Implementation limitation due to how shuffle is loaded 1844 } else if (size_in_bits == 256 && UseAVX < 2) { 1845 return false; // Implementation limitation 1846 } 1847 break; 1848 case Op_VectorLoadMask: 1849 case Op_VectorMaskCast: 1850 if (size_in_bits == 256 && UseAVX < 2) { 1851 return false; // Implementation limitation 1852 } 1853 // fallthrough 1854 case Op_VectorStoreMask: 1855 if (vlen == 2) { 1856 return false; // Implementation limitation 1857 } 1858 break; 1859 case Op_PopulateIndex: 1860 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1861 return false; 1862 } 1863 break; 1864 case Op_VectorCastB2X: 1865 case Op_VectorCastS2X: 1866 case Op_VectorCastI2X: 1867 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1868 return false; 1869 } 1870 break; 1871 case Op_VectorCastL2X: 1872 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1873 return false; 1874 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1875 return false; 1876 } 1877 break; 1878 case Op_VectorCastF2X: { 1879 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1880 // happen after intermediate conversion to integer and special handling 1881 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1882 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1883 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1884 return false; 1885 } 1886 } 1887 // fallthrough 1888 case Op_VectorCastD2X: 1889 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1890 return false; 1891 } 1892 break; 1893 case Op_VectorCastF2HF: 1894 case Op_VectorCastHF2F: 1895 if (!VM_Version::supports_f16c() && 1896 ((!VM_Version::supports_evex() || 1897 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1898 return false; 1899 } 1900 break; 1901 case Op_RoundVD: 1902 if (!VM_Version::supports_avx512dq()) { 1903 return false; 1904 } 1905 break; 1906 case Op_MulReductionVI: 1907 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1908 return false; 1909 } 1910 break; 1911 case Op_LoadVectorGatherMasked: 1912 case Op_StoreVectorScatterMasked: 1913 case Op_StoreVectorScatter: 1914 if (is_subword_type(bt)) { 1915 return false; 1916 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1917 return false; 1918 } 1919 // fallthrough 1920 case Op_LoadVectorGather: 1921 if (size_in_bits == 64 ) { 1922 return false; 1923 } 1924 break; 1925 case Op_MaskAll: 1926 if (!VM_Version::supports_evex()) { 1927 return false; 1928 } 1929 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1930 return false; 1931 } 1932 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1933 return false; 1934 } 1935 break; 1936 case Op_VectorMaskCmp: 1937 if (vlen < 2 || size_in_bits < 32) { 1938 return false; 1939 } 1940 break; 1941 case Op_CompressM: 1942 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1943 return false; 1944 } 1945 break; 1946 case Op_CompressV: 1947 case Op_ExpandV: 1948 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1949 return false; 1950 } 1951 if (!is_LP64 && !VM_Version::supports_avx512vl() && size_in_bits < 512) { 1952 return false; 1953 } 1954 if (size_in_bits < 128 ) { 1955 return false; 1956 } 1957 case Op_VectorLongToMask: 1958 if (UseAVX < 1 || !is_LP64) { 1959 return false; 1960 } 1961 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1962 return false; 1963 } 1964 break; 1965 case Op_SignumVD: 1966 case Op_SignumVF: 1967 if (UseAVX < 1) { 1968 return false; 1969 } 1970 break; 1971 case Op_PopCountVI: 1972 case Op_PopCountVL: { 1973 if (!is_pop_count_instr_target(bt) && 1974 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 1975 return false; 1976 } 1977 } 1978 break; 1979 case Op_ReverseV: 1980 case Op_ReverseBytesV: 1981 if (UseAVX < 2) { 1982 return false; 1983 } 1984 break; 1985 case Op_CountTrailingZerosV: 1986 case Op_CountLeadingZerosV: 1987 if (UseAVX < 2) { 1988 return false; 1989 } 1990 break; 1991 } 1992 return true; // Per default match rules are supported. 1993 } 1994 1995 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 1996 // ADLC based match_rule_supported routine checks for the existence of pattern based 1997 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 1998 // of their non-masked counterpart with mask edge being the differentiator. 1999 // This routine does a strict check on the existence of masked operation patterns 2000 // by returning a default false value for all the other opcodes apart from the 2001 // ones whose masked instruction patterns are defined in this file. 2002 if (!match_rule_supported_vector(opcode, vlen, bt)) { 2003 return false; 2004 } 2005 2006 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 2007 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 2008 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 2009 return false; 2010 } 2011 switch(opcode) { 2012 // Unary masked operations 2013 case Op_AbsVB: 2014 case Op_AbsVS: 2015 if(!VM_Version::supports_avx512bw()) { 2016 return false; // Implementation limitation 2017 } 2018 case Op_AbsVI: 2019 case Op_AbsVL: 2020 return true; 2021 2022 // Ternary masked operations 2023 case Op_FmaVF: 2024 case Op_FmaVD: 2025 return true; 2026 2027 case Op_MacroLogicV: 2028 if(bt != T_INT && bt != T_LONG) { 2029 return false; 2030 } 2031 return true; 2032 2033 // Binary masked operations 2034 case Op_AddVB: 2035 case Op_AddVS: 2036 case Op_SubVB: 2037 case Op_SubVS: 2038 case Op_MulVS: 2039 case Op_LShiftVS: 2040 case Op_RShiftVS: 2041 case Op_URShiftVS: 2042 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2043 if (!VM_Version::supports_avx512bw()) { 2044 return false; // Implementation limitation 2045 } 2046 return true; 2047 2048 case Op_MulVL: 2049 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2050 if (!VM_Version::supports_avx512dq()) { 2051 return false; // Implementation limitation 2052 } 2053 return true; 2054 2055 case Op_AndV: 2056 case Op_OrV: 2057 case Op_XorV: 2058 case Op_RotateRightV: 2059 case Op_RotateLeftV: 2060 if (bt != T_INT && bt != T_LONG) { 2061 return false; // Implementation limitation 2062 } 2063 return true; 2064 2065 case Op_VectorLoadMask: 2066 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2067 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2068 return false; 2069 } 2070 return true; 2071 2072 case Op_AddVI: 2073 case Op_AddVL: 2074 case Op_AddVF: 2075 case Op_AddVD: 2076 case Op_SubVI: 2077 case Op_SubVL: 2078 case Op_SubVF: 2079 case Op_SubVD: 2080 case Op_MulVI: 2081 case Op_MulVF: 2082 case Op_MulVD: 2083 case Op_DivVF: 2084 case Op_DivVD: 2085 case Op_SqrtVF: 2086 case Op_SqrtVD: 2087 case Op_LShiftVI: 2088 case Op_LShiftVL: 2089 case Op_RShiftVI: 2090 case Op_RShiftVL: 2091 case Op_URShiftVI: 2092 case Op_URShiftVL: 2093 case Op_LoadVectorMasked: 2094 case Op_StoreVectorMasked: 2095 case Op_LoadVectorGatherMasked: 2096 case Op_StoreVectorScatterMasked: 2097 return true; 2098 2099 case Op_MaxV: 2100 case Op_MinV: 2101 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2102 return false; // Implementation limitation 2103 } 2104 if (is_floating_point_type(bt)) { 2105 return false; // Implementation limitation 2106 } 2107 return true; 2108 2109 case Op_VectorMaskCmp: 2110 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2111 return false; // Implementation limitation 2112 } 2113 return true; 2114 2115 case Op_VectorRearrange: 2116 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2117 return false; // Implementation limitation 2118 } 2119 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2120 return false; // Implementation limitation 2121 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2122 return false; // Implementation limitation 2123 } 2124 return true; 2125 2126 // Binary Logical operations 2127 case Op_AndVMask: 2128 case Op_OrVMask: 2129 case Op_XorVMask: 2130 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2131 return false; // Implementation limitation 2132 } 2133 return true; 2134 2135 case Op_PopCountVI: 2136 case Op_PopCountVL: 2137 if (!is_pop_count_instr_target(bt)) { 2138 return false; 2139 } 2140 return true; 2141 2142 case Op_MaskAll: 2143 return true; 2144 2145 case Op_CountLeadingZerosV: 2146 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2147 return true; 2148 } 2149 default: 2150 return false; 2151 } 2152 } 2153 2154 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2155 return false; 2156 } 2157 2158 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2159 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2160 bool legacy = (generic_opnd->opcode() == LEGVEC); 2161 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2162 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2163 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2164 return new legVecZOper(); 2165 } 2166 if (legacy) { 2167 switch (ideal_reg) { 2168 case Op_VecS: return new legVecSOper(); 2169 case Op_VecD: return new legVecDOper(); 2170 case Op_VecX: return new legVecXOper(); 2171 case Op_VecY: return new legVecYOper(); 2172 case Op_VecZ: return new legVecZOper(); 2173 } 2174 } else { 2175 switch (ideal_reg) { 2176 case Op_VecS: return new vecSOper(); 2177 case Op_VecD: return new vecDOper(); 2178 case Op_VecX: return new vecXOper(); 2179 case Op_VecY: return new vecYOper(); 2180 case Op_VecZ: return new vecZOper(); 2181 } 2182 } 2183 ShouldNotReachHere(); 2184 return nullptr; 2185 } 2186 2187 bool Matcher::is_reg2reg_move(MachNode* m) { 2188 switch (m->rule()) { 2189 case MoveVec2Leg_rule: 2190 case MoveLeg2Vec_rule: 2191 case MoveF2VL_rule: 2192 case MoveF2LEG_rule: 2193 case MoveVL2F_rule: 2194 case MoveLEG2F_rule: 2195 case MoveD2VL_rule: 2196 case MoveD2LEG_rule: 2197 case MoveVL2D_rule: 2198 case MoveLEG2D_rule: 2199 return true; 2200 default: 2201 return false; 2202 } 2203 } 2204 2205 bool Matcher::is_generic_vector(MachOper* opnd) { 2206 switch (opnd->opcode()) { 2207 case VEC: 2208 case LEGVEC: 2209 return true; 2210 default: 2211 return false; 2212 } 2213 } 2214 2215 //------------------------------------------------------------------------ 2216 2217 const RegMask* Matcher::predicate_reg_mask(void) { 2218 return &_VECTMASK_REG_mask; 2219 } 2220 2221 const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) { 2222 return new TypeVectMask(elemTy, length); 2223 } 2224 2225 // Max vector size in bytes. 0 if not supported. 2226 int Matcher::vector_width_in_bytes(BasicType bt) { 2227 assert(is_java_primitive(bt), "only primitive type vectors"); 2228 if (UseSSE < 2) return 0; 2229 // SSE2 supports 128bit vectors for all types. 2230 // AVX2 supports 256bit vectors for all types. 2231 // AVX2/EVEX supports 512bit vectors for all types. 2232 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2233 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2234 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2235 size = (UseAVX > 2) ? 64 : 32; 2236 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2237 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2238 // Use flag to limit vector size. 2239 size = MIN2(size,(int)MaxVectorSize); 2240 // Minimum 2 values in vector (or 4 for bytes). 2241 switch (bt) { 2242 case T_DOUBLE: 2243 case T_LONG: 2244 if (size < 16) return 0; 2245 break; 2246 case T_FLOAT: 2247 case T_INT: 2248 if (size < 8) return 0; 2249 break; 2250 case T_BOOLEAN: 2251 if (size < 4) return 0; 2252 break; 2253 case T_CHAR: 2254 if (size < 4) return 0; 2255 break; 2256 case T_BYTE: 2257 if (size < 4) return 0; 2258 break; 2259 case T_SHORT: 2260 if (size < 4) return 0; 2261 break; 2262 default: 2263 ShouldNotReachHere(); 2264 } 2265 return size; 2266 } 2267 2268 // Limits on vector size (number of elements) loaded into vector. 2269 int Matcher::max_vector_size(const BasicType bt) { 2270 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2271 } 2272 int Matcher::min_vector_size(const BasicType bt) { 2273 int max_size = max_vector_size(bt); 2274 // Min size which can be loaded into vector is 4 bytes. 2275 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2276 // Support for calling svml double64 vectors 2277 if (bt == T_DOUBLE) { 2278 size = 1; 2279 } 2280 return MIN2(size,max_size); 2281 } 2282 2283 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) { 2284 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2285 // by default on Cascade Lake 2286 if (VM_Version::is_default_intel_cascade_lake()) { 2287 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2288 } 2289 return Matcher::max_vector_size(bt); 2290 } 2291 2292 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2293 return -1; 2294 } 2295 2296 // Vector ideal reg corresponding to specified size in bytes 2297 uint Matcher::vector_ideal_reg(int size) { 2298 assert(MaxVectorSize >= size, ""); 2299 switch(size) { 2300 case 4: return Op_VecS; 2301 case 8: return Op_VecD; 2302 case 16: return Op_VecX; 2303 case 32: return Op_VecY; 2304 case 64: return Op_VecZ; 2305 } 2306 ShouldNotReachHere(); 2307 return 0; 2308 } 2309 2310 // Check for shift by small constant as well 2311 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2312 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2313 shift->in(2)->get_int() <= 3 && 2314 // Are there other uses besides address expressions? 2315 !matcher->is_visited(shift)) { 2316 address_visited.set(shift->_idx); // Flag as address_visited 2317 mstack.push(shift->in(2), Matcher::Visit); 2318 Node *conv = shift->in(1); 2319 #ifdef _LP64 2320 // Allow Matcher to match the rule which bypass 2321 // ConvI2L operation for an array index on LP64 2322 // if the index value is positive. 2323 if (conv->Opcode() == Op_ConvI2L && 2324 conv->as_Type()->type()->is_long()->_lo >= 0 && 2325 // Are there other uses besides address expressions? 2326 !matcher->is_visited(conv)) { 2327 address_visited.set(conv->_idx); // Flag as address_visited 2328 mstack.push(conv->in(1), Matcher::Pre_Visit); 2329 } else 2330 #endif 2331 mstack.push(conv, Matcher::Pre_Visit); 2332 return true; 2333 } 2334 return false; 2335 } 2336 2337 // This function identifies sub-graphs in which a 'load' node is 2338 // input to two different nodes, and such that it can be matched 2339 // with BMI instructions like blsi, blsr, etc. 2340 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2341 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2342 // refers to the same node. 2343 // 2344 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2345 // This is a temporary solution until we make DAGs expressible in ADL. 2346 template<typename ConType> 2347 class FusedPatternMatcher { 2348 Node* _op1_node; 2349 Node* _mop_node; 2350 int _con_op; 2351 2352 static int match_next(Node* n, int next_op, int next_op_idx) { 2353 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2354 return -1; 2355 } 2356 2357 if (next_op_idx == -1) { // n is commutative, try rotations 2358 if (n->in(1)->Opcode() == next_op) { 2359 return 1; 2360 } else if (n->in(2)->Opcode() == next_op) { 2361 return 2; 2362 } 2363 } else { 2364 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2365 if (n->in(next_op_idx)->Opcode() == next_op) { 2366 return next_op_idx; 2367 } 2368 } 2369 return -1; 2370 } 2371 2372 public: 2373 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2374 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2375 2376 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2377 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2378 typename ConType::NativeType con_value) { 2379 if (_op1_node->Opcode() != op1) { 2380 return false; 2381 } 2382 if (_mop_node->outcnt() > 2) { 2383 return false; 2384 } 2385 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2386 if (op1_op2_idx == -1) { 2387 return false; 2388 } 2389 // Memory operation must be the other edge 2390 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2391 2392 // Check that the mop node is really what we want 2393 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2394 Node* op2_node = _op1_node->in(op1_op2_idx); 2395 if (op2_node->outcnt() > 1) { 2396 return false; 2397 } 2398 assert(op2_node->Opcode() == op2, "Should be"); 2399 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2400 if (op2_con_idx == -1) { 2401 return false; 2402 } 2403 // Memory operation must be the other edge 2404 int op2_mop_idx = (op2_con_idx & 1) + 1; 2405 // Check that the memory operation is the same node 2406 if (op2_node->in(op2_mop_idx) == _mop_node) { 2407 // Now check the constant 2408 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2409 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2410 return true; 2411 } 2412 } 2413 } 2414 return false; 2415 } 2416 }; 2417 2418 static bool is_bmi_pattern(Node* n, Node* m) { 2419 assert(UseBMI1Instructions, "sanity"); 2420 if (n != nullptr && m != nullptr) { 2421 if (m->Opcode() == Op_LoadI) { 2422 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2423 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2424 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2425 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2426 } else if (m->Opcode() == Op_LoadL) { 2427 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2428 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2429 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2430 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2431 } 2432 } 2433 return false; 2434 } 2435 2436 // Should the matcher clone input 'm' of node 'n'? 2437 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2438 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2439 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2440 mstack.push(m, Visit); 2441 return true; 2442 } 2443 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2444 mstack.push(m, Visit); // m = ShiftCntV 2445 return true; 2446 } 2447 return false; 2448 } 2449 2450 // Should the Matcher clone shifts on addressing modes, expecting them 2451 // to be subsumed into complex addressing expressions or compute them 2452 // into registers? 2453 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2454 Node *off = m->in(AddPNode::Offset); 2455 if (off->is_Con()) { 2456 address_visited.test_set(m->_idx); // Flag as address_visited 2457 Node *adr = m->in(AddPNode::Address); 2458 2459 // Intel can handle 2 adds in addressing mode 2460 // AtomicAdd is not an addressing expression. 2461 // Cheap to find it by looking for screwy base. 2462 if (adr->is_AddP() && 2463 !adr->in(AddPNode::Base)->is_top() && 2464 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2465 // Are there other uses besides address expressions? 2466 !is_visited(adr)) { 2467 address_visited.set(adr->_idx); // Flag as address_visited 2468 Node *shift = adr->in(AddPNode::Offset); 2469 if (!clone_shift(shift, this, mstack, address_visited)) { 2470 mstack.push(shift, Pre_Visit); 2471 } 2472 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2473 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2474 } else { 2475 mstack.push(adr, Pre_Visit); 2476 } 2477 2478 // Clone X+offset as it also folds into most addressing expressions 2479 mstack.push(off, Visit); 2480 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2481 return true; 2482 } else if (clone_shift(off, this, mstack, address_visited)) { 2483 address_visited.test_set(m->_idx); // Flag as address_visited 2484 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2485 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2486 return true; 2487 } 2488 return false; 2489 } 2490 2491 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2492 switch (bt) { 2493 case BoolTest::eq: 2494 return Assembler::eq; 2495 case BoolTest::ne: 2496 return Assembler::neq; 2497 case BoolTest::le: 2498 case BoolTest::ule: 2499 return Assembler::le; 2500 case BoolTest::ge: 2501 case BoolTest::uge: 2502 return Assembler::nlt; 2503 case BoolTest::lt: 2504 case BoolTest::ult: 2505 return Assembler::lt; 2506 case BoolTest::gt: 2507 case BoolTest::ugt: 2508 return Assembler::nle; 2509 default : ShouldNotReachHere(); return Assembler::_false; 2510 } 2511 } 2512 2513 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2514 switch (bt) { 2515 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2516 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2517 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2518 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2519 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2520 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2521 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2522 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2523 } 2524 } 2525 2526 // Helper methods for MachSpillCopyNode::implementation(). 2527 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 2528 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2529 assert(ireg == Op_VecS || // 32bit vector 2530 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2531 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi), 2532 "no non-adjacent vector moves" ); 2533 if (cbuf) { 2534 C2_MacroAssembler _masm(cbuf); 2535 switch (ireg) { 2536 case Op_VecS: // copy whole register 2537 case Op_VecD: 2538 case Op_VecX: 2539 #ifndef _LP64 2540 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2541 #else 2542 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2543 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2544 } else { 2545 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2546 } 2547 #endif 2548 break; 2549 case Op_VecY: 2550 #ifndef _LP64 2551 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2552 #else 2553 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2554 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2555 } else { 2556 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2557 } 2558 #endif 2559 break; 2560 case Op_VecZ: 2561 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2562 break; 2563 default: 2564 ShouldNotReachHere(); 2565 } 2566 #ifndef PRODUCT 2567 } else { 2568 switch (ireg) { 2569 case Op_VecS: 2570 case Op_VecD: 2571 case Op_VecX: 2572 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2573 break; 2574 case Op_VecY: 2575 case Op_VecZ: 2576 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2577 break; 2578 default: 2579 ShouldNotReachHere(); 2580 } 2581 #endif 2582 } 2583 } 2584 2585 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 2586 int stack_offset, int reg, uint ireg, outputStream* st) { 2587 if (cbuf) { 2588 C2_MacroAssembler _masm(cbuf); 2589 if (is_load) { 2590 switch (ireg) { 2591 case Op_VecS: 2592 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2593 break; 2594 case Op_VecD: 2595 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2596 break; 2597 case Op_VecX: 2598 #ifndef _LP64 2599 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2600 #else 2601 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2602 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2603 } else { 2604 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2605 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2606 } 2607 #endif 2608 break; 2609 case Op_VecY: 2610 #ifndef _LP64 2611 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2612 #else 2613 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2614 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2615 } else { 2616 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2617 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2618 } 2619 #endif 2620 break; 2621 case Op_VecZ: 2622 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2623 break; 2624 default: 2625 ShouldNotReachHere(); 2626 } 2627 } else { // store 2628 switch (ireg) { 2629 case Op_VecS: 2630 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2631 break; 2632 case Op_VecD: 2633 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2634 break; 2635 case Op_VecX: 2636 #ifndef _LP64 2637 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2638 #else 2639 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2640 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2641 } 2642 else { 2643 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2644 } 2645 #endif 2646 break; 2647 case Op_VecY: 2648 #ifndef _LP64 2649 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2650 #else 2651 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2652 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2653 } 2654 else { 2655 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2656 } 2657 #endif 2658 break; 2659 case Op_VecZ: 2660 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2661 break; 2662 default: 2663 ShouldNotReachHere(); 2664 } 2665 } 2666 #ifndef PRODUCT 2667 } else { 2668 if (is_load) { 2669 switch (ireg) { 2670 case Op_VecS: 2671 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2672 break; 2673 case Op_VecD: 2674 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2675 break; 2676 case Op_VecX: 2677 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2678 break; 2679 case Op_VecY: 2680 case Op_VecZ: 2681 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2682 break; 2683 default: 2684 ShouldNotReachHere(); 2685 } 2686 } else { // store 2687 switch (ireg) { 2688 case Op_VecS: 2689 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2690 break; 2691 case Op_VecD: 2692 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2693 break; 2694 case Op_VecX: 2695 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2696 break; 2697 case Op_VecY: 2698 case Op_VecZ: 2699 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2700 break; 2701 default: 2702 ShouldNotReachHere(); 2703 } 2704 } 2705 #endif 2706 } 2707 } 2708 2709 template <class T> 2710 static inline GrowableArray<jvalue>* vreplicate_imm(BasicType bt, T con, int len) { 2711 GrowableArray<jvalue>* val = new GrowableArray<jvalue>(len); 2712 jvalue ele; 2713 switch (bt) { 2714 case T_BYTE: ele.b = con; break; 2715 case T_SHORT: ele.s = con; break; 2716 case T_INT: ele.i = con; break; 2717 case T_LONG: ele.j = con; break; 2718 case T_FLOAT: ele.f = con; break; 2719 case T_DOUBLE: ele.d = con; break; 2720 default: ShouldNotReachHere(); 2721 } 2722 for (int i = 0; i < len; i++) { 2723 val->append(ele); 2724 } 2725 return val; 2726 } 2727 2728 static inline jlong high_bit_set(BasicType bt) { 2729 switch (bt) { 2730 case T_BYTE: return 0x8080808080808080; 2731 case T_SHORT: return 0x8000800080008000; 2732 case T_INT: return 0x8000000080000000; 2733 case T_LONG: return 0x8000000000000000; 2734 default: 2735 ShouldNotReachHere(); 2736 return 0; 2737 } 2738 } 2739 2740 #ifndef PRODUCT 2741 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2742 st->print("nop \t# %d bytes pad for loops and calls", _count); 2743 } 2744 #endif 2745 2746 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2747 C2_MacroAssembler _masm(&cbuf); 2748 __ nop(_count); 2749 } 2750 2751 uint MachNopNode::size(PhaseRegAlloc*) const { 2752 return _count; 2753 } 2754 2755 #ifndef PRODUCT 2756 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2757 st->print("# breakpoint"); 2758 } 2759 #endif 2760 2761 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2762 C2_MacroAssembler _masm(&cbuf); 2763 __ int3(); 2764 } 2765 2766 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2767 return MachNode::size(ra_); 2768 } 2769 2770 %} 2771 2772 encode %{ 2773 2774 enc_class call_epilog %{ 2775 C2_MacroAssembler _masm(&cbuf); 2776 if (VerifyStackAtCalls) { 2777 // Check that stack depth is unchanged: find majik cookie on stack 2778 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2779 Label L; 2780 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2781 __ jccb(Assembler::equal, L); 2782 // Die if stack mismatch 2783 __ int3(); 2784 __ bind(L); 2785 } 2786 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) { 2787 C2_MacroAssembler _masm(&cbuf); 2788 // The last return value is not set by the callee but used to pass IsInit information to compiled code. 2789 // Search for the corresponding projection, get the register and emit code that initialized it. 2790 uint con = (tf()->range_cc()->cnt() - 1); 2791 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { 2792 ProjNode* proj = fast_out(i)->as_Proj(); 2793 if (proj->_con == con) { 2794 // Set IsInit if rax is non-null (a non-null value is returned buffered or scalarized) 2795 OptoReg::Name optoReg = ra_->get_reg_first(proj); 2796 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP)); 2797 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1; 2798 __ testq(rax, rax); 2799 __ setb(Assembler::notZero, toReg); 2800 __ movzbl(toReg, toReg); 2801 if (reg->is_stack()) { 2802 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size; 2803 __ movq(Address(rsp, st_off), toReg); 2804 } 2805 break; 2806 } 2807 } 2808 if (return_value_is_used()) { 2809 // An inline type is returned as fields in multiple registers. 2810 // Rax either contains an oop if the inline type is buffered or a pointer 2811 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax 2812 // if the lowest bit is set to allow C2 to use the oop after null checking. 2813 // rax &= (rax & 1) - 1 2814 __ movptr(rscratch1, rax); 2815 __ andptr(rscratch1, 0x1); 2816 __ subptr(rscratch1, 0x1); 2817 __ andptr(rax, rscratch1); 2818 } 2819 } 2820 %} 2821 2822 %} 2823 2824 // Operands for bound floating pointer register arguments 2825 operand rxmm0() %{ 2826 constraint(ALLOC_IN_RC(xmm0_reg)); 2827 match(VecX); 2828 format%{%} 2829 interface(REG_INTER); 2830 %} 2831 2832 //----------OPERANDS----------------------------------------------------------- 2833 // Operand definitions must precede instruction definitions for correct parsing 2834 // in the ADLC because operands constitute user defined types which are used in 2835 // instruction definitions. 2836 2837 // Vectors 2838 2839 // Dummy generic vector class. Should be used for all vector operands. 2840 // Replaced with vec[SDXYZ] during post-selection pass. 2841 operand vec() %{ 2842 constraint(ALLOC_IN_RC(dynamic)); 2843 match(VecX); 2844 match(VecY); 2845 match(VecZ); 2846 match(VecS); 2847 match(VecD); 2848 2849 format %{ %} 2850 interface(REG_INTER); 2851 %} 2852 2853 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2854 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2855 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2856 // runtime code generation via reg_class_dynamic. 2857 operand legVec() %{ 2858 constraint(ALLOC_IN_RC(dynamic)); 2859 match(VecX); 2860 match(VecY); 2861 match(VecZ); 2862 match(VecS); 2863 match(VecD); 2864 2865 format %{ %} 2866 interface(REG_INTER); 2867 %} 2868 2869 // Replaces vec during post-selection cleanup. See above. 2870 operand vecS() %{ 2871 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2872 match(VecS); 2873 2874 format %{ %} 2875 interface(REG_INTER); 2876 %} 2877 2878 // Replaces legVec during post-selection cleanup. See above. 2879 operand legVecS() %{ 2880 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2881 match(VecS); 2882 2883 format %{ %} 2884 interface(REG_INTER); 2885 %} 2886 2887 // Replaces vec during post-selection cleanup. See above. 2888 operand vecD() %{ 2889 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2890 match(VecD); 2891 2892 format %{ %} 2893 interface(REG_INTER); 2894 %} 2895 2896 // Replaces legVec during post-selection cleanup. See above. 2897 operand legVecD() %{ 2898 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2899 match(VecD); 2900 2901 format %{ %} 2902 interface(REG_INTER); 2903 %} 2904 2905 // Replaces vec during post-selection cleanup. See above. 2906 operand vecX() %{ 2907 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2908 match(VecX); 2909 2910 format %{ %} 2911 interface(REG_INTER); 2912 %} 2913 2914 // Replaces legVec during post-selection cleanup. See above. 2915 operand legVecX() %{ 2916 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2917 match(VecX); 2918 2919 format %{ %} 2920 interface(REG_INTER); 2921 %} 2922 2923 // Replaces vec during post-selection cleanup. See above. 2924 operand vecY() %{ 2925 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2926 match(VecY); 2927 2928 format %{ %} 2929 interface(REG_INTER); 2930 %} 2931 2932 // Replaces legVec during post-selection cleanup. See above. 2933 operand legVecY() %{ 2934 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2935 match(VecY); 2936 2937 format %{ %} 2938 interface(REG_INTER); 2939 %} 2940 2941 // Replaces vec during post-selection cleanup. See above. 2942 operand vecZ() %{ 2943 constraint(ALLOC_IN_RC(vectorz_reg)); 2944 match(VecZ); 2945 2946 format %{ %} 2947 interface(REG_INTER); 2948 %} 2949 2950 // Replaces legVec during post-selection cleanup. See above. 2951 operand legVecZ() %{ 2952 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2953 match(VecZ); 2954 2955 format %{ %} 2956 interface(REG_INTER); 2957 %} 2958 2959 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2960 2961 // ============================================================================ 2962 2963 instruct ShouldNotReachHere() %{ 2964 match(Halt); 2965 format %{ "stop\t# ShouldNotReachHere" %} 2966 ins_encode %{ 2967 if (is_reachable()) { 2968 __ stop(_halt_reason); 2969 } 2970 %} 2971 ins_pipe(pipe_slow); 2972 %} 2973 2974 // ============================================================================ 2975 2976 instruct addF_reg(regF dst, regF src) %{ 2977 predicate((UseSSE>=1) && (UseAVX == 0)); 2978 match(Set dst (AddF dst src)); 2979 2980 format %{ "addss $dst, $src" %} 2981 ins_cost(150); 2982 ins_encode %{ 2983 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2984 %} 2985 ins_pipe(pipe_slow); 2986 %} 2987 2988 instruct addF_mem(regF dst, memory src) %{ 2989 predicate((UseSSE>=1) && (UseAVX == 0)); 2990 match(Set dst (AddF dst (LoadF src))); 2991 2992 format %{ "addss $dst, $src" %} 2993 ins_cost(150); 2994 ins_encode %{ 2995 __ addss($dst$$XMMRegister, $src$$Address); 2996 %} 2997 ins_pipe(pipe_slow); 2998 %} 2999 3000 instruct addF_imm(regF dst, immF con) %{ 3001 predicate((UseSSE>=1) && (UseAVX == 0)); 3002 match(Set dst (AddF dst con)); 3003 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3004 ins_cost(150); 3005 ins_encode %{ 3006 __ addss($dst$$XMMRegister, $constantaddress($con)); 3007 %} 3008 ins_pipe(pipe_slow); 3009 %} 3010 3011 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 3012 predicate(UseAVX > 0); 3013 match(Set dst (AddF src1 src2)); 3014 3015 format %{ "vaddss $dst, $src1, $src2" %} 3016 ins_cost(150); 3017 ins_encode %{ 3018 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3019 %} 3020 ins_pipe(pipe_slow); 3021 %} 3022 3023 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 3024 predicate(UseAVX > 0); 3025 match(Set dst (AddF src1 (LoadF src2))); 3026 3027 format %{ "vaddss $dst, $src1, $src2" %} 3028 ins_cost(150); 3029 ins_encode %{ 3030 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3031 %} 3032 ins_pipe(pipe_slow); 3033 %} 3034 3035 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 3036 predicate(UseAVX > 0); 3037 match(Set dst (AddF src con)); 3038 3039 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3040 ins_cost(150); 3041 ins_encode %{ 3042 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3043 %} 3044 ins_pipe(pipe_slow); 3045 %} 3046 3047 instruct addD_reg(regD dst, regD src) %{ 3048 predicate((UseSSE>=2) && (UseAVX == 0)); 3049 match(Set dst (AddD dst src)); 3050 3051 format %{ "addsd $dst, $src" %} 3052 ins_cost(150); 3053 ins_encode %{ 3054 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 3055 %} 3056 ins_pipe(pipe_slow); 3057 %} 3058 3059 instruct addD_mem(regD dst, memory src) %{ 3060 predicate((UseSSE>=2) && (UseAVX == 0)); 3061 match(Set dst (AddD dst (LoadD src))); 3062 3063 format %{ "addsd $dst, $src" %} 3064 ins_cost(150); 3065 ins_encode %{ 3066 __ addsd($dst$$XMMRegister, $src$$Address); 3067 %} 3068 ins_pipe(pipe_slow); 3069 %} 3070 3071 instruct addD_imm(regD dst, immD con) %{ 3072 predicate((UseSSE>=2) && (UseAVX == 0)); 3073 match(Set dst (AddD dst con)); 3074 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3075 ins_cost(150); 3076 ins_encode %{ 3077 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3078 %} 3079 ins_pipe(pipe_slow); 3080 %} 3081 3082 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3083 predicate(UseAVX > 0); 3084 match(Set dst (AddD src1 src2)); 3085 3086 format %{ "vaddsd $dst, $src1, $src2" %} 3087 ins_cost(150); 3088 ins_encode %{ 3089 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3090 %} 3091 ins_pipe(pipe_slow); 3092 %} 3093 3094 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3095 predicate(UseAVX > 0); 3096 match(Set dst (AddD src1 (LoadD src2))); 3097 3098 format %{ "vaddsd $dst, $src1, $src2" %} 3099 ins_cost(150); 3100 ins_encode %{ 3101 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3102 %} 3103 ins_pipe(pipe_slow); 3104 %} 3105 3106 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3107 predicate(UseAVX > 0); 3108 match(Set dst (AddD src con)); 3109 3110 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3111 ins_cost(150); 3112 ins_encode %{ 3113 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3114 %} 3115 ins_pipe(pipe_slow); 3116 %} 3117 3118 instruct subF_reg(regF dst, regF src) %{ 3119 predicate((UseSSE>=1) && (UseAVX == 0)); 3120 match(Set dst (SubF dst src)); 3121 3122 format %{ "subss $dst, $src" %} 3123 ins_cost(150); 3124 ins_encode %{ 3125 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3126 %} 3127 ins_pipe(pipe_slow); 3128 %} 3129 3130 instruct subF_mem(regF dst, memory src) %{ 3131 predicate((UseSSE>=1) && (UseAVX == 0)); 3132 match(Set dst (SubF dst (LoadF src))); 3133 3134 format %{ "subss $dst, $src" %} 3135 ins_cost(150); 3136 ins_encode %{ 3137 __ subss($dst$$XMMRegister, $src$$Address); 3138 %} 3139 ins_pipe(pipe_slow); 3140 %} 3141 3142 instruct subF_imm(regF dst, immF con) %{ 3143 predicate((UseSSE>=1) && (UseAVX == 0)); 3144 match(Set dst (SubF dst con)); 3145 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3146 ins_cost(150); 3147 ins_encode %{ 3148 __ subss($dst$$XMMRegister, $constantaddress($con)); 3149 %} 3150 ins_pipe(pipe_slow); 3151 %} 3152 3153 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3154 predicate(UseAVX > 0); 3155 match(Set dst (SubF src1 src2)); 3156 3157 format %{ "vsubss $dst, $src1, $src2" %} 3158 ins_cost(150); 3159 ins_encode %{ 3160 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3161 %} 3162 ins_pipe(pipe_slow); 3163 %} 3164 3165 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3166 predicate(UseAVX > 0); 3167 match(Set dst (SubF src1 (LoadF src2))); 3168 3169 format %{ "vsubss $dst, $src1, $src2" %} 3170 ins_cost(150); 3171 ins_encode %{ 3172 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3173 %} 3174 ins_pipe(pipe_slow); 3175 %} 3176 3177 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3178 predicate(UseAVX > 0); 3179 match(Set dst (SubF src con)); 3180 3181 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3182 ins_cost(150); 3183 ins_encode %{ 3184 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3185 %} 3186 ins_pipe(pipe_slow); 3187 %} 3188 3189 instruct subD_reg(regD dst, regD src) %{ 3190 predicate((UseSSE>=2) && (UseAVX == 0)); 3191 match(Set dst (SubD dst src)); 3192 3193 format %{ "subsd $dst, $src" %} 3194 ins_cost(150); 3195 ins_encode %{ 3196 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3197 %} 3198 ins_pipe(pipe_slow); 3199 %} 3200 3201 instruct subD_mem(regD dst, memory src) %{ 3202 predicate((UseSSE>=2) && (UseAVX == 0)); 3203 match(Set dst (SubD dst (LoadD src))); 3204 3205 format %{ "subsd $dst, $src" %} 3206 ins_cost(150); 3207 ins_encode %{ 3208 __ subsd($dst$$XMMRegister, $src$$Address); 3209 %} 3210 ins_pipe(pipe_slow); 3211 %} 3212 3213 instruct subD_imm(regD dst, immD con) %{ 3214 predicate((UseSSE>=2) && (UseAVX == 0)); 3215 match(Set dst (SubD dst con)); 3216 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3217 ins_cost(150); 3218 ins_encode %{ 3219 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3220 %} 3221 ins_pipe(pipe_slow); 3222 %} 3223 3224 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3225 predicate(UseAVX > 0); 3226 match(Set dst (SubD src1 src2)); 3227 3228 format %{ "vsubsd $dst, $src1, $src2" %} 3229 ins_cost(150); 3230 ins_encode %{ 3231 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3232 %} 3233 ins_pipe(pipe_slow); 3234 %} 3235 3236 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3237 predicate(UseAVX > 0); 3238 match(Set dst (SubD src1 (LoadD src2))); 3239 3240 format %{ "vsubsd $dst, $src1, $src2" %} 3241 ins_cost(150); 3242 ins_encode %{ 3243 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3244 %} 3245 ins_pipe(pipe_slow); 3246 %} 3247 3248 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3249 predicate(UseAVX > 0); 3250 match(Set dst (SubD src con)); 3251 3252 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3253 ins_cost(150); 3254 ins_encode %{ 3255 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3256 %} 3257 ins_pipe(pipe_slow); 3258 %} 3259 3260 instruct mulF_reg(regF dst, regF src) %{ 3261 predicate((UseSSE>=1) && (UseAVX == 0)); 3262 match(Set dst (MulF dst src)); 3263 3264 format %{ "mulss $dst, $src" %} 3265 ins_cost(150); 3266 ins_encode %{ 3267 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3268 %} 3269 ins_pipe(pipe_slow); 3270 %} 3271 3272 instruct mulF_mem(regF dst, memory src) %{ 3273 predicate((UseSSE>=1) && (UseAVX == 0)); 3274 match(Set dst (MulF dst (LoadF src))); 3275 3276 format %{ "mulss $dst, $src" %} 3277 ins_cost(150); 3278 ins_encode %{ 3279 __ mulss($dst$$XMMRegister, $src$$Address); 3280 %} 3281 ins_pipe(pipe_slow); 3282 %} 3283 3284 instruct mulF_imm(regF dst, immF con) %{ 3285 predicate((UseSSE>=1) && (UseAVX == 0)); 3286 match(Set dst (MulF dst con)); 3287 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3288 ins_cost(150); 3289 ins_encode %{ 3290 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3291 %} 3292 ins_pipe(pipe_slow); 3293 %} 3294 3295 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3296 predicate(UseAVX > 0); 3297 match(Set dst (MulF src1 src2)); 3298 3299 format %{ "vmulss $dst, $src1, $src2" %} 3300 ins_cost(150); 3301 ins_encode %{ 3302 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3303 %} 3304 ins_pipe(pipe_slow); 3305 %} 3306 3307 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3308 predicate(UseAVX > 0); 3309 match(Set dst (MulF src1 (LoadF src2))); 3310 3311 format %{ "vmulss $dst, $src1, $src2" %} 3312 ins_cost(150); 3313 ins_encode %{ 3314 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3315 %} 3316 ins_pipe(pipe_slow); 3317 %} 3318 3319 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3320 predicate(UseAVX > 0); 3321 match(Set dst (MulF src con)); 3322 3323 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3324 ins_cost(150); 3325 ins_encode %{ 3326 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3327 %} 3328 ins_pipe(pipe_slow); 3329 %} 3330 3331 instruct mulD_reg(regD dst, regD src) %{ 3332 predicate((UseSSE>=2) && (UseAVX == 0)); 3333 match(Set dst (MulD dst src)); 3334 3335 format %{ "mulsd $dst, $src" %} 3336 ins_cost(150); 3337 ins_encode %{ 3338 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3339 %} 3340 ins_pipe(pipe_slow); 3341 %} 3342 3343 instruct mulD_mem(regD dst, memory src) %{ 3344 predicate((UseSSE>=2) && (UseAVX == 0)); 3345 match(Set dst (MulD dst (LoadD src))); 3346 3347 format %{ "mulsd $dst, $src" %} 3348 ins_cost(150); 3349 ins_encode %{ 3350 __ mulsd($dst$$XMMRegister, $src$$Address); 3351 %} 3352 ins_pipe(pipe_slow); 3353 %} 3354 3355 instruct mulD_imm(regD dst, immD con) %{ 3356 predicate((UseSSE>=2) && (UseAVX == 0)); 3357 match(Set dst (MulD dst con)); 3358 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3359 ins_cost(150); 3360 ins_encode %{ 3361 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3362 %} 3363 ins_pipe(pipe_slow); 3364 %} 3365 3366 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3367 predicate(UseAVX > 0); 3368 match(Set dst (MulD src1 src2)); 3369 3370 format %{ "vmulsd $dst, $src1, $src2" %} 3371 ins_cost(150); 3372 ins_encode %{ 3373 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3374 %} 3375 ins_pipe(pipe_slow); 3376 %} 3377 3378 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3379 predicate(UseAVX > 0); 3380 match(Set dst (MulD src1 (LoadD src2))); 3381 3382 format %{ "vmulsd $dst, $src1, $src2" %} 3383 ins_cost(150); 3384 ins_encode %{ 3385 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3386 %} 3387 ins_pipe(pipe_slow); 3388 %} 3389 3390 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3391 predicate(UseAVX > 0); 3392 match(Set dst (MulD src con)); 3393 3394 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3395 ins_cost(150); 3396 ins_encode %{ 3397 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3398 %} 3399 ins_pipe(pipe_slow); 3400 %} 3401 3402 instruct divF_reg(regF dst, regF src) %{ 3403 predicate((UseSSE>=1) && (UseAVX == 0)); 3404 match(Set dst (DivF dst src)); 3405 3406 format %{ "divss $dst, $src" %} 3407 ins_cost(150); 3408 ins_encode %{ 3409 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3410 %} 3411 ins_pipe(pipe_slow); 3412 %} 3413 3414 instruct divF_mem(regF dst, memory src) %{ 3415 predicate((UseSSE>=1) && (UseAVX == 0)); 3416 match(Set dst (DivF dst (LoadF src))); 3417 3418 format %{ "divss $dst, $src" %} 3419 ins_cost(150); 3420 ins_encode %{ 3421 __ divss($dst$$XMMRegister, $src$$Address); 3422 %} 3423 ins_pipe(pipe_slow); 3424 %} 3425 3426 instruct divF_imm(regF dst, immF con) %{ 3427 predicate((UseSSE>=1) && (UseAVX == 0)); 3428 match(Set dst (DivF dst con)); 3429 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3430 ins_cost(150); 3431 ins_encode %{ 3432 __ divss($dst$$XMMRegister, $constantaddress($con)); 3433 %} 3434 ins_pipe(pipe_slow); 3435 %} 3436 3437 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3438 predicate(UseAVX > 0); 3439 match(Set dst (DivF src1 src2)); 3440 3441 format %{ "vdivss $dst, $src1, $src2" %} 3442 ins_cost(150); 3443 ins_encode %{ 3444 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3445 %} 3446 ins_pipe(pipe_slow); 3447 %} 3448 3449 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3450 predicate(UseAVX > 0); 3451 match(Set dst (DivF src1 (LoadF src2))); 3452 3453 format %{ "vdivss $dst, $src1, $src2" %} 3454 ins_cost(150); 3455 ins_encode %{ 3456 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3457 %} 3458 ins_pipe(pipe_slow); 3459 %} 3460 3461 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3462 predicate(UseAVX > 0); 3463 match(Set dst (DivF src con)); 3464 3465 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3466 ins_cost(150); 3467 ins_encode %{ 3468 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3469 %} 3470 ins_pipe(pipe_slow); 3471 %} 3472 3473 instruct divD_reg(regD dst, regD src) %{ 3474 predicate((UseSSE>=2) && (UseAVX == 0)); 3475 match(Set dst (DivD dst src)); 3476 3477 format %{ "divsd $dst, $src" %} 3478 ins_cost(150); 3479 ins_encode %{ 3480 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3481 %} 3482 ins_pipe(pipe_slow); 3483 %} 3484 3485 instruct divD_mem(regD dst, memory src) %{ 3486 predicate((UseSSE>=2) && (UseAVX == 0)); 3487 match(Set dst (DivD dst (LoadD src))); 3488 3489 format %{ "divsd $dst, $src" %} 3490 ins_cost(150); 3491 ins_encode %{ 3492 __ divsd($dst$$XMMRegister, $src$$Address); 3493 %} 3494 ins_pipe(pipe_slow); 3495 %} 3496 3497 instruct divD_imm(regD dst, immD con) %{ 3498 predicate((UseSSE>=2) && (UseAVX == 0)); 3499 match(Set dst (DivD dst con)); 3500 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3501 ins_cost(150); 3502 ins_encode %{ 3503 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3504 %} 3505 ins_pipe(pipe_slow); 3506 %} 3507 3508 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3509 predicate(UseAVX > 0); 3510 match(Set dst (DivD src1 src2)); 3511 3512 format %{ "vdivsd $dst, $src1, $src2" %} 3513 ins_cost(150); 3514 ins_encode %{ 3515 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3516 %} 3517 ins_pipe(pipe_slow); 3518 %} 3519 3520 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3521 predicate(UseAVX > 0); 3522 match(Set dst (DivD src1 (LoadD src2))); 3523 3524 format %{ "vdivsd $dst, $src1, $src2" %} 3525 ins_cost(150); 3526 ins_encode %{ 3527 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3528 %} 3529 ins_pipe(pipe_slow); 3530 %} 3531 3532 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3533 predicate(UseAVX > 0); 3534 match(Set dst (DivD src con)); 3535 3536 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3537 ins_cost(150); 3538 ins_encode %{ 3539 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3540 %} 3541 ins_pipe(pipe_slow); 3542 %} 3543 3544 instruct absF_reg(regF dst) %{ 3545 predicate((UseSSE>=1) && (UseAVX == 0)); 3546 match(Set dst (AbsF dst)); 3547 ins_cost(150); 3548 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3549 ins_encode %{ 3550 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3551 %} 3552 ins_pipe(pipe_slow); 3553 %} 3554 3555 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3556 predicate(UseAVX > 0); 3557 match(Set dst (AbsF src)); 3558 ins_cost(150); 3559 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3560 ins_encode %{ 3561 int vlen_enc = Assembler::AVX_128bit; 3562 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3563 ExternalAddress(float_signmask()), vlen_enc); 3564 %} 3565 ins_pipe(pipe_slow); 3566 %} 3567 3568 instruct absD_reg(regD dst) %{ 3569 predicate((UseSSE>=2) && (UseAVX == 0)); 3570 match(Set dst (AbsD dst)); 3571 ins_cost(150); 3572 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3573 "# abs double by sign masking" %} 3574 ins_encode %{ 3575 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3576 %} 3577 ins_pipe(pipe_slow); 3578 %} 3579 3580 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3581 predicate(UseAVX > 0); 3582 match(Set dst (AbsD src)); 3583 ins_cost(150); 3584 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3585 "# abs double by sign masking" %} 3586 ins_encode %{ 3587 int vlen_enc = Assembler::AVX_128bit; 3588 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3589 ExternalAddress(double_signmask()), vlen_enc); 3590 %} 3591 ins_pipe(pipe_slow); 3592 %} 3593 3594 instruct negF_reg(regF dst) %{ 3595 predicate((UseSSE>=1) && (UseAVX == 0)); 3596 match(Set dst (NegF dst)); 3597 ins_cost(150); 3598 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3599 ins_encode %{ 3600 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3601 %} 3602 ins_pipe(pipe_slow); 3603 %} 3604 3605 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3606 predicate(UseAVX > 0); 3607 match(Set dst (NegF src)); 3608 ins_cost(150); 3609 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3610 ins_encode %{ 3611 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3612 ExternalAddress(float_signflip())); 3613 %} 3614 ins_pipe(pipe_slow); 3615 %} 3616 3617 instruct negD_reg(regD dst) %{ 3618 predicate((UseSSE>=2) && (UseAVX == 0)); 3619 match(Set dst (NegD dst)); 3620 ins_cost(150); 3621 format %{ "xorpd $dst, [0x8000000000000000]\t" 3622 "# neg double by sign flipping" %} 3623 ins_encode %{ 3624 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3625 %} 3626 ins_pipe(pipe_slow); 3627 %} 3628 3629 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3630 predicate(UseAVX > 0); 3631 match(Set dst (NegD src)); 3632 ins_cost(150); 3633 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3634 "# neg double by sign flipping" %} 3635 ins_encode %{ 3636 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3637 ExternalAddress(double_signflip())); 3638 %} 3639 ins_pipe(pipe_slow); 3640 %} 3641 3642 // sqrtss instruction needs destination register to be pre initialized for best performance 3643 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3644 instruct sqrtF_reg(regF dst) %{ 3645 predicate(UseSSE>=1); 3646 match(Set dst (SqrtF dst)); 3647 format %{ "sqrtss $dst, $dst" %} 3648 ins_encode %{ 3649 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3650 %} 3651 ins_pipe(pipe_slow); 3652 %} 3653 3654 // sqrtsd instruction needs destination register to be pre initialized for best performance 3655 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3656 instruct sqrtD_reg(regD dst) %{ 3657 predicate(UseSSE>=2); 3658 match(Set dst (SqrtD dst)); 3659 format %{ "sqrtsd $dst, $dst" %} 3660 ins_encode %{ 3661 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3662 %} 3663 ins_pipe(pipe_slow); 3664 %} 3665 3666 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3667 effect(TEMP tmp); 3668 match(Set dst (ConvF2HF src)); 3669 ins_cost(125); 3670 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3671 ins_encode %{ 3672 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3673 %} 3674 ins_pipe( pipe_slow ); 3675 %} 3676 3677 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3678 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3679 effect(TEMP ktmp, TEMP rtmp); 3680 match(Set mem (StoreC mem (ConvF2HF src))); 3681 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3682 ins_encode %{ 3683 __ movl($rtmp$$Register, 0x1); 3684 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3685 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3686 %} 3687 ins_pipe( pipe_slow ); 3688 %} 3689 3690 instruct vconvF2HF(vec dst, vec src) %{ 3691 match(Set dst (VectorCastF2HF src)); 3692 format %{ "vector_conv_F2HF $dst $src" %} 3693 ins_encode %{ 3694 int vlen_enc = vector_length_encoding(this, $src); 3695 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3696 %} 3697 ins_pipe( pipe_slow ); 3698 %} 3699 3700 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3701 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3702 format %{ "vcvtps2ph $mem,$src" %} 3703 ins_encode %{ 3704 int vlen_enc = vector_length_encoding(this, $src); 3705 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3706 %} 3707 ins_pipe( pipe_slow ); 3708 %} 3709 3710 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3711 match(Set dst (ConvHF2F src)); 3712 format %{ "vcvtph2ps $dst,$src" %} 3713 ins_encode %{ 3714 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3715 %} 3716 ins_pipe( pipe_slow ); 3717 %} 3718 3719 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3720 match(Set dst (VectorCastHF2F (LoadVector mem))); 3721 format %{ "vcvtph2ps $dst,$mem" %} 3722 ins_encode %{ 3723 int vlen_enc = vector_length_encoding(this); 3724 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3725 %} 3726 ins_pipe( pipe_slow ); 3727 %} 3728 3729 instruct vconvHF2F(vec dst, vec src) %{ 3730 match(Set dst (VectorCastHF2F src)); 3731 ins_cost(125); 3732 format %{ "vector_conv_HF2F $dst,$src" %} 3733 ins_encode %{ 3734 int vlen_enc = vector_length_encoding(this); 3735 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3736 %} 3737 ins_pipe( pipe_slow ); 3738 %} 3739 3740 // ---------------------------------------- VectorReinterpret ------------------------------------ 3741 instruct reinterpret_mask(kReg dst) %{ 3742 predicate(n->bottom_type()->isa_vectmask() && 3743 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3744 match(Set dst (VectorReinterpret dst)); 3745 ins_cost(125); 3746 format %{ "vector_reinterpret $dst\t!" %} 3747 ins_encode %{ 3748 // empty 3749 %} 3750 ins_pipe( pipe_slow ); 3751 %} 3752 3753 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3754 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3755 n->bottom_type()->isa_vectmask() && 3756 n->in(1)->bottom_type()->isa_vectmask() && 3757 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3758 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3759 match(Set dst (VectorReinterpret src)); 3760 effect(TEMP xtmp); 3761 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3762 ins_encode %{ 3763 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3764 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3765 assert(src_sz == dst_sz , "src and dst size mismatch"); 3766 int vlen_enc = vector_length_encoding(src_sz); 3767 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3768 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3769 %} 3770 ins_pipe( pipe_slow ); 3771 %} 3772 3773 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3774 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3775 n->bottom_type()->isa_vectmask() && 3776 n->in(1)->bottom_type()->isa_vectmask() && 3777 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3778 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3779 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3780 match(Set dst (VectorReinterpret src)); 3781 effect(TEMP xtmp); 3782 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3783 ins_encode %{ 3784 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3785 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3786 assert(src_sz == dst_sz , "src and dst size mismatch"); 3787 int vlen_enc = vector_length_encoding(src_sz); 3788 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3789 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3790 %} 3791 ins_pipe( pipe_slow ); 3792 %} 3793 3794 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3795 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3796 n->bottom_type()->isa_vectmask() && 3797 n->in(1)->bottom_type()->isa_vectmask() && 3798 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3799 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3800 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3801 match(Set dst (VectorReinterpret src)); 3802 effect(TEMP xtmp); 3803 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3804 ins_encode %{ 3805 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3806 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3807 assert(src_sz == dst_sz , "src and dst size mismatch"); 3808 int vlen_enc = vector_length_encoding(src_sz); 3809 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3810 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3811 %} 3812 ins_pipe( pipe_slow ); 3813 %} 3814 3815 instruct reinterpret(vec dst) %{ 3816 predicate(!n->bottom_type()->isa_vectmask() && 3817 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3818 match(Set dst (VectorReinterpret dst)); 3819 ins_cost(125); 3820 format %{ "vector_reinterpret $dst\t!" %} 3821 ins_encode %{ 3822 // empty 3823 %} 3824 ins_pipe( pipe_slow ); 3825 %} 3826 3827 instruct reinterpret_expand(vec dst, vec src) %{ 3828 predicate(UseAVX == 0 && 3829 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3830 match(Set dst (VectorReinterpret src)); 3831 ins_cost(125); 3832 effect(TEMP dst); 3833 format %{ "vector_reinterpret_expand $dst,$src" %} 3834 ins_encode %{ 3835 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3836 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3837 3838 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3839 if (src_vlen_in_bytes == 4) { 3840 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3841 } else { 3842 assert(src_vlen_in_bytes == 8, ""); 3843 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3844 } 3845 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3846 %} 3847 ins_pipe( pipe_slow ); 3848 %} 3849 3850 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3851 predicate(UseAVX > 0 && 3852 !n->bottom_type()->isa_vectmask() && 3853 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3854 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3855 match(Set dst (VectorReinterpret src)); 3856 ins_cost(125); 3857 format %{ "vector_reinterpret_expand $dst,$src" %} 3858 ins_encode %{ 3859 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3860 %} 3861 ins_pipe( pipe_slow ); 3862 %} 3863 3864 3865 instruct vreinterpret_expand(legVec dst, vec src) %{ 3866 predicate(UseAVX > 0 && 3867 !n->bottom_type()->isa_vectmask() && 3868 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3869 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3870 match(Set dst (VectorReinterpret src)); 3871 ins_cost(125); 3872 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3873 ins_encode %{ 3874 switch (Matcher::vector_length_in_bytes(this, $src)) { 3875 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3876 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3877 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3878 default: ShouldNotReachHere(); 3879 } 3880 %} 3881 ins_pipe( pipe_slow ); 3882 %} 3883 3884 instruct reinterpret_shrink(vec dst, legVec src) %{ 3885 predicate(!n->bottom_type()->isa_vectmask() && 3886 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3887 match(Set dst (VectorReinterpret src)); 3888 ins_cost(125); 3889 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3890 ins_encode %{ 3891 switch (Matcher::vector_length_in_bytes(this)) { 3892 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3893 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3894 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3895 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3896 default: ShouldNotReachHere(); 3897 } 3898 %} 3899 ins_pipe( pipe_slow ); 3900 %} 3901 3902 // ---------------------------------------------------------------------------------------------------- 3903 3904 #ifdef _LP64 3905 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3906 match(Set dst (RoundDoubleMode src rmode)); 3907 format %{ "roundsd $dst,$src" %} 3908 ins_cost(150); 3909 ins_encode %{ 3910 assert(UseSSE >= 4, "required"); 3911 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3912 %} 3913 ins_pipe(pipe_slow); 3914 %} 3915 3916 instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{ 3917 match(Set dst (RoundDoubleMode (LoadD src) rmode)); 3918 format %{ "roundsd $dst,$src" %} 3919 ins_cost(150); 3920 ins_encode %{ 3921 assert(UseSSE >= 4, "required"); 3922 __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant); 3923 %} 3924 ins_pipe(pipe_slow); 3925 %} 3926 3927 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3928 match(Set dst (RoundDoubleMode con rmode)); 3929 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3930 ins_cost(150); 3931 ins_encode %{ 3932 assert(UseSSE >= 4, "required"); 3933 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3934 %} 3935 ins_pipe(pipe_slow); 3936 %} 3937 3938 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3939 predicate(Matcher::vector_length(n) < 8); 3940 match(Set dst (RoundDoubleModeV src rmode)); 3941 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3942 ins_encode %{ 3943 assert(UseAVX > 0, "required"); 3944 int vlen_enc = vector_length_encoding(this); 3945 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3946 %} 3947 ins_pipe( pipe_slow ); 3948 %} 3949 3950 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3951 predicate(Matcher::vector_length(n) == 8); 3952 match(Set dst (RoundDoubleModeV src rmode)); 3953 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3954 ins_encode %{ 3955 assert(UseAVX > 2, "required"); 3956 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3957 %} 3958 ins_pipe( pipe_slow ); 3959 %} 3960 3961 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3962 predicate(Matcher::vector_length(n) < 8); 3963 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3964 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3965 ins_encode %{ 3966 assert(UseAVX > 0, "required"); 3967 int vlen_enc = vector_length_encoding(this); 3968 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3969 %} 3970 ins_pipe( pipe_slow ); 3971 %} 3972 3973 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3974 predicate(Matcher::vector_length(n) == 8); 3975 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3976 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3977 ins_encode %{ 3978 assert(UseAVX > 2, "required"); 3979 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3980 %} 3981 ins_pipe( pipe_slow ); 3982 %} 3983 #endif // _LP64 3984 3985 instruct onspinwait() %{ 3986 match(OnSpinWait); 3987 ins_cost(200); 3988 3989 format %{ 3990 $$template 3991 $$emit$$"pause\t! membar_onspinwait" 3992 %} 3993 ins_encode %{ 3994 __ pause(); 3995 %} 3996 ins_pipe(pipe_slow); 3997 %} 3998 3999 // a * b + c 4000 instruct fmaD_reg(regD a, regD b, regD c) %{ 4001 match(Set c (FmaD c (Binary a b))); 4002 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 4003 ins_cost(150); 4004 ins_encode %{ 4005 assert(UseFMA, "Needs FMA instructions support."); 4006 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4007 %} 4008 ins_pipe( pipe_slow ); 4009 %} 4010 4011 // a * b + c 4012 instruct fmaF_reg(regF a, regF b, regF c) %{ 4013 match(Set c (FmaF c (Binary a b))); 4014 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 4015 ins_cost(150); 4016 ins_encode %{ 4017 assert(UseFMA, "Needs FMA instructions support."); 4018 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4019 %} 4020 ins_pipe( pipe_slow ); 4021 %} 4022 4023 // ====================VECTOR INSTRUCTIONS===================================== 4024 4025 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4026 instruct MoveVec2Leg(legVec dst, vec src) %{ 4027 match(Set dst src); 4028 format %{ "" %} 4029 ins_encode %{ 4030 ShouldNotReachHere(); 4031 %} 4032 ins_pipe( fpu_reg_reg ); 4033 %} 4034 4035 instruct MoveLeg2Vec(vec dst, legVec src) %{ 4036 match(Set dst src); 4037 format %{ "" %} 4038 ins_encode %{ 4039 ShouldNotReachHere(); 4040 %} 4041 ins_pipe( fpu_reg_reg ); 4042 %} 4043 4044 // ============================================================================ 4045 4046 // Load vectors generic operand pattern 4047 instruct loadV(vec dst, memory mem) %{ 4048 match(Set dst (LoadVector mem)); 4049 ins_cost(125); 4050 format %{ "load_vector $dst,$mem" %} 4051 ins_encode %{ 4052 __ load_vector($dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 4053 %} 4054 ins_pipe( pipe_slow ); 4055 %} 4056 4057 // Store vectors generic operand pattern. 4058 instruct storeV(memory mem, vec src) %{ 4059 match(Set mem (StoreVector mem src)); 4060 ins_cost(145); 4061 format %{ "store_vector $mem,$src\n\t" %} 4062 ins_encode %{ 4063 switch (Matcher::vector_length_in_bytes(this, $src)) { 4064 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 4065 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 4066 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 4067 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 4068 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 4069 default: ShouldNotReachHere(); 4070 } 4071 %} 4072 ins_pipe( pipe_slow ); 4073 %} 4074 4075 // ---------------------------------------- Gather ------------------------------------ 4076 4077 // Gather INT, LONG, FLOAT, DOUBLE 4078 4079 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 4080 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 4081 match(Set dst (LoadVectorGather mem idx)); 4082 effect(TEMP dst, TEMP tmp, TEMP mask); 4083 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 4084 ins_encode %{ 4085 int vlen_enc = vector_length_encoding(this); 4086 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4087 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4088 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4089 __ lea($tmp$$Register, $mem$$Address); 4090 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4091 %} 4092 ins_pipe( pipe_slow ); 4093 %} 4094 4095 4096 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 4097 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 4098 match(Set dst (LoadVectorGather mem idx)); 4099 effect(TEMP dst, TEMP tmp, TEMP ktmp); 4100 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 4101 ins_encode %{ 4102 int vlen_enc = vector_length_encoding(this); 4103 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4104 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister); 4105 __ lea($tmp$$Register, $mem$$Address); 4106 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4107 %} 4108 ins_pipe( pipe_slow ); 4109 %} 4110 4111 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4112 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 4113 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4114 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4115 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4116 ins_encode %{ 4117 assert(UseAVX > 2, "sanity"); 4118 int vlen_enc = vector_length_encoding(this); 4119 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4120 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4121 // Note: Since gather instruction partially updates the opmask register used 4122 // for predication hense moving mask operand to a temporary. 4123 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4124 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4125 __ lea($tmp$$Register, $mem$$Address); 4126 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4127 %} 4128 ins_pipe( pipe_slow ); 4129 %} 4130 // ====================Scatter======================================= 4131 4132 // Scatter INT, LONG, FLOAT, DOUBLE 4133 4134 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4135 predicate(UseAVX > 2); 4136 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4137 effect(TEMP tmp, TEMP ktmp); 4138 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4139 ins_encode %{ 4140 int vlen_enc = vector_length_encoding(this, $src); 4141 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4142 4143 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4144 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4145 4146 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4147 __ lea($tmp$$Register, $mem$$Address); 4148 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4149 %} 4150 ins_pipe( pipe_slow ); 4151 %} 4152 4153 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4154 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4155 effect(TEMP tmp, TEMP ktmp); 4156 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4157 ins_encode %{ 4158 int vlen_enc = vector_length_encoding(this, $src); 4159 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4160 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4161 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4162 // Note: Since scatter instruction partially updates the opmask register used 4163 // for predication hense moving mask operand to a temporary. 4164 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4165 __ lea($tmp$$Register, $mem$$Address); 4166 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4167 %} 4168 ins_pipe( pipe_slow ); 4169 %} 4170 4171 // ====================REPLICATE======================================= 4172 4173 // Replicate byte scalar to be vector 4174 instruct vReplB_reg(vec dst, rRegI src) %{ 4175 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4176 match(Set dst (Replicate src)); 4177 format %{ "replicateB $dst,$src" %} 4178 ins_encode %{ 4179 uint vlen = Matcher::vector_length(this); 4180 if (UseAVX >= 2) { 4181 int vlen_enc = vector_length_encoding(this); 4182 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4183 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4184 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4185 } else { 4186 __ movdl($dst$$XMMRegister, $src$$Register); 4187 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4188 } 4189 } else { 4190 assert(UseAVX < 2, ""); 4191 __ movdl($dst$$XMMRegister, $src$$Register); 4192 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4193 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4194 if (vlen >= 16) { 4195 assert(vlen == 16, ""); 4196 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4197 } 4198 } 4199 %} 4200 ins_pipe( pipe_slow ); 4201 %} 4202 4203 instruct ReplB_mem(vec dst, memory mem) %{ 4204 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4205 match(Set dst (Replicate (LoadB mem))); 4206 format %{ "replicateB $dst,$mem" %} 4207 ins_encode %{ 4208 int vlen_enc = vector_length_encoding(this); 4209 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4210 %} 4211 ins_pipe( pipe_slow ); 4212 %} 4213 4214 // ====================ReplicateS======================================= 4215 4216 instruct vReplS_reg(vec dst, rRegI src) %{ 4217 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4218 match(Set dst (Replicate src)); 4219 format %{ "replicateS $dst,$src" %} 4220 ins_encode %{ 4221 uint vlen = Matcher::vector_length(this); 4222 int vlen_enc = vector_length_encoding(this); 4223 if (UseAVX >= 2) { 4224 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4225 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4226 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4227 } else { 4228 __ movdl($dst$$XMMRegister, $src$$Register); 4229 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4230 } 4231 } else { 4232 assert(UseAVX < 2, ""); 4233 __ movdl($dst$$XMMRegister, $src$$Register); 4234 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4235 if (vlen >= 8) { 4236 assert(vlen == 8, ""); 4237 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4238 } 4239 } 4240 %} 4241 ins_pipe( pipe_slow ); 4242 %} 4243 4244 instruct ReplS_mem(vec dst, memory mem) %{ 4245 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4246 match(Set dst (Replicate (LoadS mem))); 4247 format %{ "replicateS $dst,$mem" %} 4248 ins_encode %{ 4249 int vlen_enc = vector_length_encoding(this); 4250 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4251 %} 4252 ins_pipe( pipe_slow ); 4253 %} 4254 4255 // ====================ReplicateI======================================= 4256 4257 instruct ReplI_reg(vec dst, rRegI src) %{ 4258 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4259 match(Set dst (Replicate src)); 4260 format %{ "replicateI $dst,$src" %} 4261 ins_encode %{ 4262 uint vlen = Matcher::vector_length(this); 4263 int vlen_enc = vector_length_encoding(this); 4264 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4265 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4266 } else if (VM_Version::supports_avx2()) { 4267 __ movdl($dst$$XMMRegister, $src$$Register); 4268 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4269 } else { 4270 __ movdl($dst$$XMMRegister, $src$$Register); 4271 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4272 } 4273 %} 4274 ins_pipe( pipe_slow ); 4275 %} 4276 4277 instruct ReplI_mem(vec dst, memory mem) %{ 4278 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4279 match(Set dst (Replicate (LoadI mem))); 4280 format %{ "replicateI $dst,$mem" %} 4281 ins_encode %{ 4282 int vlen_enc = vector_length_encoding(this); 4283 if (VM_Version::supports_avx2()) { 4284 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4285 } else if (VM_Version::supports_avx()) { 4286 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4287 } else { 4288 __ movdl($dst$$XMMRegister, $mem$$Address); 4289 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4290 } 4291 %} 4292 ins_pipe( pipe_slow ); 4293 %} 4294 4295 instruct ReplI_imm(vec dst, immI con) %{ 4296 predicate(Matcher::is_non_long_integral_vector(n)); 4297 match(Set dst (Replicate con)); 4298 format %{ "replicateI $dst,$con" %} 4299 ins_encode %{ 4300 InternalAddress addr = $constantaddress(Matcher::vector_element_basic_type(this), 4301 vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4302 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 8) / 4303 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4304 BasicType bt = Matcher::vector_element_basic_type(this); 4305 int vlen = Matcher::vector_length_in_bytes(this); 4306 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4307 %} 4308 ins_pipe( pipe_slow ); 4309 %} 4310 4311 // Replicate scalar zero to be vector 4312 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4313 predicate(Matcher::is_non_long_integral_vector(n)); 4314 match(Set dst (Replicate zero)); 4315 format %{ "replicateI $dst,$zero" %} 4316 ins_encode %{ 4317 int vlen_enc = vector_length_encoding(this); 4318 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4319 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4320 } else { 4321 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4322 } 4323 %} 4324 ins_pipe( fpu_reg_reg ); 4325 %} 4326 4327 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4328 predicate(UseSSE >= 2 && Matcher::is_non_long_integral_vector(n)); 4329 match(Set dst (Replicate con)); 4330 format %{ "vallones $dst" %} 4331 ins_encode %{ 4332 int vector_len = vector_length_encoding(this); 4333 __ vallones($dst$$XMMRegister, vector_len); 4334 %} 4335 ins_pipe( pipe_slow ); 4336 %} 4337 4338 // ====================ReplicateL======================================= 4339 4340 #ifdef _LP64 4341 // Replicate long (8 byte) scalar to be vector 4342 instruct ReplL_reg(vec dst, rRegL src) %{ 4343 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4344 match(Set dst (Replicate src)); 4345 format %{ "replicateL $dst,$src" %} 4346 ins_encode %{ 4347 int vlen = Matcher::vector_length(this); 4348 int vlen_enc = vector_length_encoding(this); 4349 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4350 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4351 } else if (VM_Version::supports_avx2()) { 4352 __ movdq($dst$$XMMRegister, $src$$Register); 4353 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4354 } else { 4355 __ movdq($dst$$XMMRegister, $src$$Register); 4356 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4357 } 4358 %} 4359 ins_pipe( pipe_slow ); 4360 %} 4361 #else // _LP64 4362 // Replicate long (8 byte) scalar to be vector 4363 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 4364 predicate(Matcher::vector_length(n) <= 4 && Matcher::vector_element_basic_type(n) == T_LONG); 4365 match(Set dst (Replicate src)); 4366 effect(TEMP dst, USE src, TEMP tmp); 4367 format %{ "replicateL $dst,$src" %} 4368 ins_encode %{ 4369 uint vlen = Matcher::vector_length(this); 4370 if (vlen == 2) { 4371 __ movdl($dst$$XMMRegister, $src$$Register); 4372 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4373 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4374 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4375 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4376 int vlen_enc = Assembler::AVX_256bit; 4377 __ movdl($dst$$XMMRegister, $src$$Register); 4378 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4379 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4380 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4381 } else { 4382 __ movdl($dst$$XMMRegister, $src$$Register); 4383 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4384 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4385 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4386 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4387 } 4388 %} 4389 ins_pipe( pipe_slow ); 4390 %} 4391 4392 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 4393 predicate(Matcher::vector_length(n) == 8 && Matcher::vector_element_basic_type(n) == T_LONG); 4394 match(Set dst (Replicate src)); 4395 effect(TEMP dst, USE src, TEMP tmp); 4396 format %{ "replicateL $dst,$src" %} 4397 ins_encode %{ 4398 if (VM_Version::supports_avx512vl()) { 4399 __ movdl($dst$$XMMRegister, $src$$Register); 4400 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4401 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4402 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4403 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4404 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4405 } else { 4406 int vlen_enc = Assembler::AVX_512bit; 4407 __ movdl($dst$$XMMRegister, $src$$Register); 4408 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4409 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4410 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4411 } 4412 %} 4413 ins_pipe( pipe_slow ); 4414 %} 4415 #endif // _LP64 4416 4417 instruct ReplL_mem(vec dst, memory mem) %{ 4418 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4419 match(Set dst (Replicate (LoadL mem))); 4420 format %{ "replicateL $dst,$mem" %} 4421 ins_encode %{ 4422 int vlen_enc = vector_length_encoding(this); 4423 if (VM_Version::supports_avx2()) { 4424 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4425 } else if (VM_Version::supports_sse3()) { 4426 __ movddup($dst$$XMMRegister, $mem$$Address); 4427 } else { 4428 __ movq($dst$$XMMRegister, $mem$$Address); 4429 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4430 } 4431 %} 4432 ins_pipe( pipe_slow ); 4433 %} 4434 4435 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4436 instruct ReplL_imm(vec dst, immL con) %{ 4437 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4438 match(Set dst (Replicate con)); 4439 format %{ "replicateL $dst,$con" %} 4440 ins_encode %{ 4441 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, 1)); 4442 int vlen = Matcher::vector_length_in_bytes(this); 4443 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4444 %} 4445 ins_pipe( pipe_slow ); 4446 %} 4447 4448 instruct ReplL_zero(vec dst, immL0 zero) %{ 4449 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4450 match(Set dst (Replicate zero)); 4451 format %{ "replicateL $dst,$zero" %} 4452 ins_encode %{ 4453 int vlen_enc = vector_length_encoding(this); 4454 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4455 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4456 } else { 4457 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4458 } 4459 %} 4460 ins_pipe( fpu_reg_reg ); 4461 %} 4462 4463 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4464 predicate(UseSSE >= 2 && Matcher::vector_element_basic_type(n) == T_LONG); 4465 match(Set dst (Replicate con)); 4466 format %{ "vallones $dst" %} 4467 ins_encode %{ 4468 int vector_len = vector_length_encoding(this); 4469 __ vallones($dst$$XMMRegister, vector_len); 4470 %} 4471 ins_pipe( pipe_slow ); 4472 %} 4473 4474 // ====================ReplicateF======================================= 4475 4476 instruct vReplF_reg(vec dst, vlRegF src) %{ 4477 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4478 match(Set dst (Replicate src)); 4479 format %{ "replicateF $dst,$src" %} 4480 ins_encode %{ 4481 uint vlen = Matcher::vector_length(this); 4482 int vlen_enc = vector_length_encoding(this); 4483 if (vlen <= 4) { 4484 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4485 } else if (VM_Version::supports_avx2()) { 4486 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4487 } else { 4488 assert(vlen == 8, "sanity"); 4489 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4490 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4491 } 4492 %} 4493 ins_pipe( pipe_slow ); 4494 %} 4495 4496 instruct ReplF_reg(vec dst, vlRegF src) %{ 4497 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4498 match(Set dst (Replicate src)); 4499 format %{ "replicateF $dst,$src" %} 4500 ins_encode %{ 4501 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4502 %} 4503 ins_pipe( pipe_slow ); 4504 %} 4505 4506 instruct ReplF_mem(vec dst, memory mem) %{ 4507 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4508 match(Set dst (Replicate (LoadF mem))); 4509 format %{ "replicateF $dst,$mem" %} 4510 ins_encode %{ 4511 int vlen_enc = vector_length_encoding(this); 4512 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4513 %} 4514 ins_pipe( pipe_slow ); 4515 %} 4516 4517 // Replicate float scalar immediate to be vector by loading from const table. 4518 instruct ReplF_imm(vec dst, immF con) %{ 4519 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4520 match(Set dst (Replicate con)); 4521 format %{ "replicateF $dst,$con" %} 4522 ins_encode %{ 4523 InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant, 4524 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 2)); 4525 int vlen = Matcher::vector_length_in_bytes(this); 4526 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4527 %} 4528 ins_pipe( pipe_slow ); 4529 %} 4530 4531 instruct ReplF_zero(vec dst, immF0 zero) %{ 4532 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4533 match(Set dst (Replicate zero)); 4534 format %{ "replicateF $dst,$zero" %} 4535 ins_encode %{ 4536 int vlen_enc = vector_length_encoding(this); 4537 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4538 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4539 } else { 4540 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4541 } 4542 %} 4543 ins_pipe( fpu_reg_reg ); 4544 %} 4545 4546 // ====================ReplicateD======================================= 4547 4548 // Replicate double (8 bytes) scalar to be vector 4549 instruct vReplD_reg(vec dst, vlRegD src) %{ 4550 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4551 match(Set dst (Replicate src)); 4552 format %{ "replicateD $dst,$src" %} 4553 ins_encode %{ 4554 uint vlen = Matcher::vector_length(this); 4555 int vlen_enc = vector_length_encoding(this); 4556 if (vlen <= 2) { 4557 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4558 } else if (VM_Version::supports_avx2()) { 4559 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4560 } else { 4561 assert(vlen == 4, "sanity"); 4562 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4563 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4564 } 4565 %} 4566 ins_pipe( pipe_slow ); 4567 %} 4568 4569 instruct ReplD_reg(vec dst, vlRegD src) %{ 4570 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4571 match(Set dst (Replicate src)); 4572 format %{ "replicateD $dst,$src" %} 4573 ins_encode %{ 4574 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4575 %} 4576 ins_pipe( pipe_slow ); 4577 %} 4578 4579 instruct ReplD_mem(vec dst, memory mem) %{ 4580 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4581 match(Set dst (Replicate (LoadD mem))); 4582 format %{ "replicateD $dst,$mem" %} 4583 ins_encode %{ 4584 if (Matcher::vector_length(this) >= 4) { 4585 int vlen_enc = vector_length_encoding(this); 4586 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4587 } else { 4588 __ movddup($dst$$XMMRegister, $mem$$Address); 4589 } 4590 %} 4591 ins_pipe( pipe_slow ); 4592 %} 4593 4594 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4595 instruct ReplD_imm(vec dst, immD con) %{ 4596 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4597 match(Set dst (Replicate con)); 4598 format %{ "replicateD $dst,$con" %} 4599 ins_encode %{ 4600 InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, 1)); 4601 int vlen = Matcher::vector_length_in_bytes(this); 4602 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4603 %} 4604 ins_pipe( pipe_slow ); 4605 %} 4606 4607 instruct ReplD_zero(vec dst, immD0 zero) %{ 4608 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4609 match(Set dst (Replicate zero)); 4610 format %{ "replicateD $dst,$zero" %} 4611 ins_encode %{ 4612 int vlen_enc = vector_length_encoding(this); 4613 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4614 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4615 } else { 4616 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4617 } 4618 %} 4619 ins_pipe( fpu_reg_reg ); 4620 %} 4621 4622 // ====================VECTOR INSERT======================================= 4623 4624 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4625 predicate(Matcher::vector_length_in_bytes(n) < 32); 4626 match(Set dst (VectorInsert (Binary dst val) idx)); 4627 format %{ "vector_insert $dst,$val,$idx" %} 4628 ins_encode %{ 4629 assert(UseSSE >= 4, "required"); 4630 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4631 4632 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4633 4634 assert(is_integral_type(elem_bt), ""); 4635 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4636 4637 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4638 %} 4639 ins_pipe( pipe_slow ); 4640 %} 4641 4642 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4643 predicate(Matcher::vector_length_in_bytes(n) == 32); 4644 match(Set dst (VectorInsert (Binary src val) idx)); 4645 effect(TEMP vtmp); 4646 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4647 ins_encode %{ 4648 int vlen_enc = Assembler::AVX_256bit; 4649 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4650 int elem_per_lane = 16/type2aelembytes(elem_bt); 4651 int log2epr = log2(elem_per_lane); 4652 4653 assert(is_integral_type(elem_bt), "sanity"); 4654 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4655 4656 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4657 uint y_idx = ($idx$$constant >> log2epr) & 1; 4658 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4659 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4660 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4661 %} 4662 ins_pipe( pipe_slow ); 4663 %} 4664 4665 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4666 predicate(Matcher::vector_length_in_bytes(n) == 64); 4667 match(Set dst (VectorInsert (Binary src val) idx)); 4668 effect(TEMP vtmp); 4669 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4670 ins_encode %{ 4671 assert(UseAVX > 2, "sanity"); 4672 4673 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4674 int elem_per_lane = 16/type2aelembytes(elem_bt); 4675 int log2epr = log2(elem_per_lane); 4676 4677 assert(is_integral_type(elem_bt), ""); 4678 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4679 4680 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4681 uint y_idx = ($idx$$constant >> log2epr) & 3; 4682 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4683 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4684 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4685 %} 4686 ins_pipe( pipe_slow ); 4687 %} 4688 4689 #ifdef _LP64 4690 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4691 predicate(Matcher::vector_length(n) == 2); 4692 match(Set dst (VectorInsert (Binary dst val) idx)); 4693 format %{ "vector_insert $dst,$val,$idx" %} 4694 ins_encode %{ 4695 assert(UseSSE >= 4, "required"); 4696 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4697 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4698 4699 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4700 %} 4701 ins_pipe( pipe_slow ); 4702 %} 4703 4704 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4705 predicate(Matcher::vector_length(n) == 4); 4706 match(Set dst (VectorInsert (Binary src val) idx)); 4707 effect(TEMP vtmp); 4708 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4709 ins_encode %{ 4710 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4711 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4712 4713 uint x_idx = $idx$$constant & right_n_bits(1); 4714 uint y_idx = ($idx$$constant >> 1) & 1; 4715 int vlen_enc = Assembler::AVX_256bit; 4716 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4717 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4718 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4719 %} 4720 ins_pipe( pipe_slow ); 4721 %} 4722 4723 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4724 predicate(Matcher::vector_length(n) == 8); 4725 match(Set dst (VectorInsert (Binary src val) idx)); 4726 effect(TEMP vtmp); 4727 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4728 ins_encode %{ 4729 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4730 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4731 4732 uint x_idx = $idx$$constant & right_n_bits(1); 4733 uint y_idx = ($idx$$constant >> 1) & 3; 4734 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4735 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4736 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4737 %} 4738 ins_pipe( pipe_slow ); 4739 %} 4740 #endif 4741 4742 instruct insertF(vec dst, regF val, immU8 idx) %{ 4743 predicate(Matcher::vector_length(n) < 8); 4744 match(Set dst (VectorInsert (Binary dst val) idx)); 4745 format %{ "vector_insert $dst,$val,$idx" %} 4746 ins_encode %{ 4747 assert(UseSSE >= 4, "sanity"); 4748 4749 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4750 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4751 4752 uint x_idx = $idx$$constant & right_n_bits(2); 4753 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4754 %} 4755 ins_pipe( pipe_slow ); 4756 %} 4757 4758 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4759 predicate(Matcher::vector_length(n) >= 8); 4760 match(Set dst (VectorInsert (Binary src val) idx)); 4761 effect(TEMP vtmp); 4762 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4763 ins_encode %{ 4764 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4765 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4766 4767 int vlen = Matcher::vector_length(this); 4768 uint x_idx = $idx$$constant & right_n_bits(2); 4769 if (vlen == 8) { 4770 uint y_idx = ($idx$$constant >> 2) & 1; 4771 int vlen_enc = Assembler::AVX_256bit; 4772 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4773 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4774 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4775 } else { 4776 assert(vlen == 16, "sanity"); 4777 uint y_idx = ($idx$$constant >> 2) & 3; 4778 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4779 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4780 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4781 } 4782 %} 4783 ins_pipe( pipe_slow ); 4784 %} 4785 4786 #ifdef _LP64 4787 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4788 predicate(Matcher::vector_length(n) == 2); 4789 match(Set dst (VectorInsert (Binary dst val) idx)); 4790 effect(TEMP tmp); 4791 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4792 ins_encode %{ 4793 assert(UseSSE >= 4, "sanity"); 4794 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4795 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4796 4797 __ movq($tmp$$Register, $val$$XMMRegister); 4798 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4799 %} 4800 ins_pipe( pipe_slow ); 4801 %} 4802 4803 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4804 predicate(Matcher::vector_length(n) == 4); 4805 match(Set dst (VectorInsert (Binary src val) idx)); 4806 effect(TEMP vtmp, TEMP tmp); 4807 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4808 ins_encode %{ 4809 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4810 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4811 4812 uint x_idx = $idx$$constant & right_n_bits(1); 4813 uint y_idx = ($idx$$constant >> 1) & 1; 4814 int vlen_enc = Assembler::AVX_256bit; 4815 __ movq($tmp$$Register, $val$$XMMRegister); 4816 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4817 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4818 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4819 %} 4820 ins_pipe( pipe_slow ); 4821 %} 4822 4823 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4824 predicate(Matcher::vector_length(n) == 8); 4825 match(Set dst (VectorInsert (Binary src val) idx)); 4826 effect(TEMP tmp, TEMP vtmp); 4827 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4828 ins_encode %{ 4829 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4830 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4831 4832 uint x_idx = $idx$$constant & right_n_bits(1); 4833 uint y_idx = ($idx$$constant >> 1) & 3; 4834 __ movq($tmp$$Register, $val$$XMMRegister); 4835 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4836 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4837 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4838 %} 4839 ins_pipe( pipe_slow ); 4840 %} 4841 #endif 4842 4843 // ====================REDUCTION ARITHMETIC======================================= 4844 4845 // =======================Int Reduction========================================== 4846 4847 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4848 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 4849 match(Set dst (AddReductionVI src1 src2)); 4850 match(Set dst (MulReductionVI src1 src2)); 4851 match(Set dst (AndReductionV src1 src2)); 4852 match(Set dst ( OrReductionV src1 src2)); 4853 match(Set dst (XorReductionV src1 src2)); 4854 match(Set dst (MinReductionV src1 src2)); 4855 match(Set dst (MaxReductionV src1 src2)); 4856 effect(TEMP vtmp1, TEMP vtmp2); 4857 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4858 ins_encode %{ 4859 int opcode = this->ideal_Opcode(); 4860 int vlen = Matcher::vector_length(this, $src2); 4861 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4862 %} 4863 ins_pipe( pipe_slow ); 4864 %} 4865 4866 // =======================Long Reduction========================================== 4867 4868 #ifdef _LP64 4869 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4870 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 4871 match(Set dst (AddReductionVL src1 src2)); 4872 match(Set dst (MulReductionVL src1 src2)); 4873 match(Set dst (AndReductionV src1 src2)); 4874 match(Set dst ( OrReductionV src1 src2)); 4875 match(Set dst (XorReductionV src1 src2)); 4876 match(Set dst (MinReductionV src1 src2)); 4877 match(Set dst (MaxReductionV src1 src2)); 4878 effect(TEMP vtmp1, TEMP vtmp2); 4879 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4880 ins_encode %{ 4881 int opcode = this->ideal_Opcode(); 4882 int vlen = Matcher::vector_length(this, $src2); 4883 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4884 %} 4885 ins_pipe( pipe_slow ); 4886 %} 4887 4888 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 4889 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 4890 match(Set dst (AddReductionVL src1 src2)); 4891 match(Set dst (MulReductionVL src1 src2)); 4892 match(Set dst (AndReductionV src1 src2)); 4893 match(Set dst ( OrReductionV src1 src2)); 4894 match(Set dst (XorReductionV src1 src2)); 4895 match(Set dst (MinReductionV src1 src2)); 4896 match(Set dst (MaxReductionV src1 src2)); 4897 effect(TEMP vtmp1, TEMP vtmp2); 4898 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4899 ins_encode %{ 4900 int opcode = this->ideal_Opcode(); 4901 int vlen = Matcher::vector_length(this, $src2); 4902 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4903 %} 4904 ins_pipe( pipe_slow ); 4905 %} 4906 #endif // _LP64 4907 4908 // =======================Float Reduction========================================== 4909 4910 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 4911 predicate(Matcher::vector_length(n->in(2)) <= 4); // src 4912 match(Set dst (AddReductionVF dst src)); 4913 match(Set dst (MulReductionVF dst src)); 4914 effect(TEMP dst, TEMP vtmp); 4915 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 4916 ins_encode %{ 4917 int opcode = this->ideal_Opcode(); 4918 int vlen = Matcher::vector_length(this, $src); 4919 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4920 %} 4921 ins_pipe( pipe_slow ); 4922 %} 4923 4924 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 4925 predicate(Matcher::vector_length(n->in(2)) == 8); // src 4926 match(Set dst (AddReductionVF dst src)); 4927 match(Set dst (MulReductionVF dst src)); 4928 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4929 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4930 ins_encode %{ 4931 int opcode = this->ideal_Opcode(); 4932 int vlen = Matcher::vector_length(this, $src); 4933 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4934 %} 4935 ins_pipe( pipe_slow ); 4936 %} 4937 4938 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4939 predicate(Matcher::vector_length(n->in(2)) == 16); // src 4940 match(Set dst (AddReductionVF dst src)); 4941 match(Set dst (MulReductionVF dst src)); 4942 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4943 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4944 ins_encode %{ 4945 int opcode = this->ideal_Opcode(); 4946 int vlen = Matcher::vector_length(this, $src); 4947 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4948 %} 4949 ins_pipe( pipe_slow ); 4950 %} 4951 4952 // =======================Double Reduction========================================== 4953 4954 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 4955 predicate(Matcher::vector_length(n->in(2)) == 2); // src 4956 match(Set dst (AddReductionVD dst src)); 4957 match(Set dst (MulReductionVD dst src)); 4958 effect(TEMP dst, TEMP vtmp); 4959 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 4960 ins_encode %{ 4961 int opcode = this->ideal_Opcode(); 4962 int vlen = Matcher::vector_length(this, $src); 4963 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4964 %} 4965 ins_pipe( pipe_slow ); 4966 %} 4967 4968 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 4969 predicate(Matcher::vector_length(n->in(2)) == 4); // src 4970 match(Set dst (AddReductionVD dst src)); 4971 match(Set dst (MulReductionVD dst src)); 4972 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4973 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4974 ins_encode %{ 4975 int opcode = this->ideal_Opcode(); 4976 int vlen = Matcher::vector_length(this, $src); 4977 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4978 %} 4979 ins_pipe( pipe_slow ); 4980 %} 4981 4982 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4983 predicate(Matcher::vector_length(n->in(2)) == 8); // src 4984 match(Set dst (AddReductionVD dst src)); 4985 match(Set dst (MulReductionVD dst src)); 4986 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4987 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4988 ins_encode %{ 4989 int opcode = this->ideal_Opcode(); 4990 int vlen = Matcher::vector_length(this, $src); 4991 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4992 %} 4993 ins_pipe( pipe_slow ); 4994 %} 4995 4996 // =======================Byte Reduction========================================== 4997 4998 #ifdef _LP64 4999 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5000 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5001 match(Set dst (AddReductionVI src1 src2)); 5002 match(Set dst (AndReductionV src1 src2)); 5003 match(Set dst ( OrReductionV src1 src2)); 5004 match(Set dst (XorReductionV src1 src2)); 5005 match(Set dst (MinReductionV src1 src2)); 5006 match(Set dst (MaxReductionV src1 src2)); 5007 effect(TEMP vtmp1, TEMP vtmp2); 5008 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5009 ins_encode %{ 5010 int opcode = this->ideal_Opcode(); 5011 int vlen = Matcher::vector_length(this, $src2); 5012 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5013 %} 5014 ins_pipe( pipe_slow ); 5015 %} 5016 5017 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5018 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5019 match(Set dst (AddReductionVI src1 src2)); 5020 match(Set dst (AndReductionV src1 src2)); 5021 match(Set dst ( OrReductionV src1 src2)); 5022 match(Set dst (XorReductionV src1 src2)); 5023 match(Set dst (MinReductionV src1 src2)); 5024 match(Set dst (MaxReductionV src1 src2)); 5025 effect(TEMP vtmp1, TEMP vtmp2); 5026 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5027 ins_encode %{ 5028 int opcode = this->ideal_Opcode(); 5029 int vlen = Matcher::vector_length(this, $src2); 5030 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5031 %} 5032 ins_pipe( pipe_slow ); 5033 %} 5034 #endif 5035 5036 // =======================Short Reduction========================================== 5037 5038 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5039 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5040 match(Set dst (AddReductionVI src1 src2)); 5041 match(Set dst (MulReductionVI src1 src2)); 5042 match(Set dst (AndReductionV src1 src2)); 5043 match(Set dst ( OrReductionV src1 src2)); 5044 match(Set dst (XorReductionV src1 src2)); 5045 match(Set dst (MinReductionV src1 src2)); 5046 match(Set dst (MaxReductionV src1 src2)); 5047 effect(TEMP vtmp1, TEMP vtmp2); 5048 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5049 ins_encode %{ 5050 int opcode = this->ideal_Opcode(); 5051 int vlen = Matcher::vector_length(this, $src2); 5052 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5053 %} 5054 ins_pipe( pipe_slow ); 5055 %} 5056 5057 // =======================Mul Reduction========================================== 5058 5059 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5060 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5061 Matcher::vector_length(n->in(2)) <= 32); // src2 5062 match(Set dst (MulReductionVI src1 src2)); 5063 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5064 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5065 ins_encode %{ 5066 int opcode = this->ideal_Opcode(); 5067 int vlen = Matcher::vector_length(this, $src2); 5068 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5069 %} 5070 ins_pipe( pipe_slow ); 5071 %} 5072 5073 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5074 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5075 Matcher::vector_length(n->in(2)) == 64); // src2 5076 match(Set dst (MulReductionVI src1 src2)); 5077 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5078 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5079 ins_encode %{ 5080 int opcode = this->ideal_Opcode(); 5081 int vlen = Matcher::vector_length(this, $src2); 5082 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5083 %} 5084 ins_pipe( pipe_slow ); 5085 %} 5086 5087 //--------------------Min/Max Float Reduction -------------------- 5088 // Float Min Reduction 5089 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 5090 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5091 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5092 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5093 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5094 Matcher::vector_length(n->in(2)) == 2); 5095 match(Set dst (MinReductionV src1 src2)); 5096 match(Set dst (MaxReductionV src1 src2)); 5097 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5098 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5099 ins_encode %{ 5100 assert(UseAVX > 0, "sanity"); 5101 5102 int opcode = this->ideal_Opcode(); 5103 int vlen = Matcher::vector_length(this, $src2); 5104 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5105 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5106 %} 5107 ins_pipe( pipe_slow ); 5108 %} 5109 5110 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5111 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5112 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5113 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5114 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5115 Matcher::vector_length(n->in(2)) >= 4); 5116 match(Set dst (MinReductionV src1 src2)); 5117 match(Set dst (MaxReductionV src1 src2)); 5118 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5119 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5120 ins_encode %{ 5121 assert(UseAVX > 0, "sanity"); 5122 5123 int opcode = this->ideal_Opcode(); 5124 int vlen = Matcher::vector_length(this, $src2); 5125 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5126 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5127 %} 5128 ins_pipe( pipe_slow ); 5129 %} 5130 5131 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 5132 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5133 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5134 Matcher::vector_length(n->in(2)) == 2); 5135 match(Set dst (MinReductionV dst src)); 5136 match(Set dst (MaxReductionV dst src)); 5137 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5138 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5139 ins_encode %{ 5140 assert(UseAVX > 0, "sanity"); 5141 5142 int opcode = this->ideal_Opcode(); 5143 int vlen = Matcher::vector_length(this, $src); 5144 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5145 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5146 %} 5147 ins_pipe( pipe_slow ); 5148 %} 5149 5150 5151 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5152 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5153 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5154 Matcher::vector_length(n->in(2)) >= 4); 5155 match(Set dst (MinReductionV dst src)); 5156 match(Set dst (MaxReductionV dst src)); 5157 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5158 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5159 ins_encode %{ 5160 assert(UseAVX > 0, "sanity"); 5161 5162 int opcode = this->ideal_Opcode(); 5163 int vlen = Matcher::vector_length(this, $src); 5164 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5165 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5166 %} 5167 ins_pipe( pipe_slow ); 5168 %} 5169 5170 5171 //--------------------Min Double Reduction -------------------- 5172 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5173 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5174 rFlagsReg cr) %{ 5175 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5176 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5177 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5178 Matcher::vector_length(n->in(2)) == 2); 5179 match(Set dst (MinReductionV src1 src2)); 5180 match(Set dst (MaxReductionV src1 src2)); 5181 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5182 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5183 ins_encode %{ 5184 assert(UseAVX > 0, "sanity"); 5185 5186 int opcode = this->ideal_Opcode(); 5187 int vlen = Matcher::vector_length(this, $src2); 5188 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5189 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5190 %} 5191 ins_pipe( pipe_slow ); 5192 %} 5193 5194 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5195 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5196 rFlagsReg cr) %{ 5197 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5198 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5199 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5200 Matcher::vector_length(n->in(2)) >= 4); 5201 match(Set dst (MinReductionV src1 src2)); 5202 match(Set dst (MaxReductionV src1 src2)); 5203 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5204 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5205 ins_encode %{ 5206 assert(UseAVX > 0, "sanity"); 5207 5208 int opcode = this->ideal_Opcode(); 5209 int vlen = Matcher::vector_length(this, $src2); 5210 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5211 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5212 %} 5213 ins_pipe( pipe_slow ); 5214 %} 5215 5216 5217 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5218 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5219 rFlagsReg cr) %{ 5220 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5221 Matcher::vector_length(n->in(2)) == 2); 5222 match(Set dst (MinReductionV dst src)); 5223 match(Set dst (MaxReductionV dst src)); 5224 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5225 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5226 ins_encode %{ 5227 assert(UseAVX > 0, "sanity"); 5228 5229 int opcode = this->ideal_Opcode(); 5230 int vlen = Matcher::vector_length(this, $src); 5231 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5232 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5233 %} 5234 ins_pipe( pipe_slow ); 5235 %} 5236 5237 instruct minmax_reductionD_av(legRegD dst, legVec src, 5238 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5239 rFlagsReg cr) %{ 5240 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5241 Matcher::vector_length(n->in(2)) >= 4); 5242 match(Set dst (MinReductionV dst src)); 5243 match(Set dst (MaxReductionV dst src)); 5244 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5245 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5246 ins_encode %{ 5247 assert(UseAVX > 0, "sanity"); 5248 5249 int opcode = this->ideal_Opcode(); 5250 int vlen = Matcher::vector_length(this, $src); 5251 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5252 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5253 %} 5254 ins_pipe( pipe_slow ); 5255 %} 5256 5257 // ====================VECTOR ARITHMETIC======================================= 5258 5259 // --------------------------------- ADD -------------------------------------- 5260 5261 // Bytes vector add 5262 instruct vaddB(vec dst, vec src) %{ 5263 predicate(UseAVX == 0); 5264 match(Set dst (AddVB dst src)); 5265 format %{ "paddb $dst,$src\t! add packedB" %} 5266 ins_encode %{ 5267 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5268 %} 5269 ins_pipe( pipe_slow ); 5270 %} 5271 5272 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5273 predicate(UseAVX > 0); 5274 match(Set dst (AddVB src1 src2)); 5275 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5276 ins_encode %{ 5277 int vlen_enc = vector_length_encoding(this); 5278 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5279 %} 5280 ins_pipe( pipe_slow ); 5281 %} 5282 5283 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5284 predicate((UseAVX > 0) && 5285 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5286 match(Set dst (AddVB src (LoadVector mem))); 5287 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5288 ins_encode %{ 5289 int vlen_enc = vector_length_encoding(this); 5290 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5291 %} 5292 ins_pipe( pipe_slow ); 5293 %} 5294 5295 // Shorts/Chars vector add 5296 instruct vaddS(vec dst, vec src) %{ 5297 predicate(UseAVX == 0); 5298 match(Set dst (AddVS dst src)); 5299 format %{ "paddw $dst,$src\t! add packedS" %} 5300 ins_encode %{ 5301 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5302 %} 5303 ins_pipe( pipe_slow ); 5304 %} 5305 5306 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5307 predicate(UseAVX > 0); 5308 match(Set dst (AddVS src1 src2)); 5309 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5310 ins_encode %{ 5311 int vlen_enc = vector_length_encoding(this); 5312 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5313 %} 5314 ins_pipe( pipe_slow ); 5315 %} 5316 5317 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5318 predicate((UseAVX > 0) && 5319 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5320 match(Set dst (AddVS src (LoadVector mem))); 5321 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5322 ins_encode %{ 5323 int vlen_enc = vector_length_encoding(this); 5324 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5325 %} 5326 ins_pipe( pipe_slow ); 5327 %} 5328 5329 // Integers vector add 5330 instruct vaddI(vec dst, vec src) %{ 5331 predicate(UseAVX == 0); 5332 match(Set dst (AddVI dst src)); 5333 format %{ "paddd $dst,$src\t! add packedI" %} 5334 ins_encode %{ 5335 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5336 %} 5337 ins_pipe( pipe_slow ); 5338 %} 5339 5340 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5341 predicate(UseAVX > 0); 5342 match(Set dst (AddVI src1 src2)); 5343 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5344 ins_encode %{ 5345 int vlen_enc = vector_length_encoding(this); 5346 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5347 %} 5348 ins_pipe( pipe_slow ); 5349 %} 5350 5351 5352 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5353 predicate((UseAVX > 0) && 5354 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5355 match(Set dst (AddVI src (LoadVector mem))); 5356 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5357 ins_encode %{ 5358 int vlen_enc = vector_length_encoding(this); 5359 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5360 %} 5361 ins_pipe( pipe_slow ); 5362 %} 5363 5364 // Longs vector add 5365 instruct vaddL(vec dst, vec src) %{ 5366 predicate(UseAVX == 0); 5367 match(Set dst (AddVL dst src)); 5368 format %{ "paddq $dst,$src\t! add packedL" %} 5369 ins_encode %{ 5370 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5371 %} 5372 ins_pipe( pipe_slow ); 5373 %} 5374 5375 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5376 predicate(UseAVX > 0); 5377 match(Set dst (AddVL src1 src2)); 5378 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5379 ins_encode %{ 5380 int vlen_enc = vector_length_encoding(this); 5381 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5382 %} 5383 ins_pipe( pipe_slow ); 5384 %} 5385 5386 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5387 predicate((UseAVX > 0) && 5388 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5389 match(Set dst (AddVL src (LoadVector mem))); 5390 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5391 ins_encode %{ 5392 int vlen_enc = vector_length_encoding(this); 5393 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5394 %} 5395 ins_pipe( pipe_slow ); 5396 %} 5397 5398 // Floats vector add 5399 instruct vaddF(vec dst, vec src) %{ 5400 predicate(UseAVX == 0); 5401 match(Set dst (AddVF dst src)); 5402 format %{ "addps $dst,$src\t! add packedF" %} 5403 ins_encode %{ 5404 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5405 %} 5406 ins_pipe( pipe_slow ); 5407 %} 5408 5409 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5410 predicate(UseAVX > 0); 5411 match(Set dst (AddVF src1 src2)); 5412 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5413 ins_encode %{ 5414 int vlen_enc = vector_length_encoding(this); 5415 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5416 %} 5417 ins_pipe( pipe_slow ); 5418 %} 5419 5420 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5421 predicate((UseAVX > 0) && 5422 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5423 match(Set dst (AddVF src (LoadVector mem))); 5424 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5425 ins_encode %{ 5426 int vlen_enc = vector_length_encoding(this); 5427 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5428 %} 5429 ins_pipe( pipe_slow ); 5430 %} 5431 5432 // Doubles vector add 5433 instruct vaddD(vec dst, vec src) %{ 5434 predicate(UseAVX == 0); 5435 match(Set dst (AddVD dst src)); 5436 format %{ "addpd $dst,$src\t! add packedD" %} 5437 ins_encode %{ 5438 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5439 %} 5440 ins_pipe( pipe_slow ); 5441 %} 5442 5443 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5444 predicate(UseAVX > 0); 5445 match(Set dst (AddVD src1 src2)); 5446 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5447 ins_encode %{ 5448 int vlen_enc = vector_length_encoding(this); 5449 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5450 %} 5451 ins_pipe( pipe_slow ); 5452 %} 5453 5454 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5455 predicate((UseAVX > 0) && 5456 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5457 match(Set dst (AddVD src (LoadVector mem))); 5458 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5459 ins_encode %{ 5460 int vlen_enc = vector_length_encoding(this); 5461 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5462 %} 5463 ins_pipe( pipe_slow ); 5464 %} 5465 5466 // --------------------------------- SUB -------------------------------------- 5467 5468 // Bytes vector sub 5469 instruct vsubB(vec dst, vec src) %{ 5470 predicate(UseAVX == 0); 5471 match(Set dst (SubVB dst src)); 5472 format %{ "psubb $dst,$src\t! sub packedB" %} 5473 ins_encode %{ 5474 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5475 %} 5476 ins_pipe( pipe_slow ); 5477 %} 5478 5479 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5480 predicate(UseAVX > 0); 5481 match(Set dst (SubVB src1 src2)); 5482 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5483 ins_encode %{ 5484 int vlen_enc = vector_length_encoding(this); 5485 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5486 %} 5487 ins_pipe( pipe_slow ); 5488 %} 5489 5490 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5491 predicate((UseAVX > 0) && 5492 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5493 match(Set dst (SubVB src (LoadVector mem))); 5494 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5495 ins_encode %{ 5496 int vlen_enc = vector_length_encoding(this); 5497 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5498 %} 5499 ins_pipe( pipe_slow ); 5500 %} 5501 5502 // Shorts/Chars vector sub 5503 instruct vsubS(vec dst, vec src) %{ 5504 predicate(UseAVX == 0); 5505 match(Set dst (SubVS dst src)); 5506 format %{ "psubw $dst,$src\t! sub packedS" %} 5507 ins_encode %{ 5508 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5509 %} 5510 ins_pipe( pipe_slow ); 5511 %} 5512 5513 5514 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5515 predicate(UseAVX > 0); 5516 match(Set dst (SubVS src1 src2)); 5517 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5518 ins_encode %{ 5519 int vlen_enc = vector_length_encoding(this); 5520 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5521 %} 5522 ins_pipe( pipe_slow ); 5523 %} 5524 5525 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5526 predicate((UseAVX > 0) && 5527 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5528 match(Set dst (SubVS src (LoadVector mem))); 5529 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5530 ins_encode %{ 5531 int vlen_enc = vector_length_encoding(this); 5532 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5533 %} 5534 ins_pipe( pipe_slow ); 5535 %} 5536 5537 // Integers vector sub 5538 instruct vsubI(vec dst, vec src) %{ 5539 predicate(UseAVX == 0); 5540 match(Set dst (SubVI dst src)); 5541 format %{ "psubd $dst,$src\t! sub packedI" %} 5542 ins_encode %{ 5543 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5544 %} 5545 ins_pipe( pipe_slow ); 5546 %} 5547 5548 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5549 predicate(UseAVX > 0); 5550 match(Set dst (SubVI src1 src2)); 5551 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5552 ins_encode %{ 5553 int vlen_enc = vector_length_encoding(this); 5554 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5555 %} 5556 ins_pipe( pipe_slow ); 5557 %} 5558 5559 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5560 predicate((UseAVX > 0) && 5561 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5562 match(Set dst (SubVI src (LoadVector mem))); 5563 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5564 ins_encode %{ 5565 int vlen_enc = vector_length_encoding(this); 5566 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5567 %} 5568 ins_pipe( pipe_slow ); 5569 %} 5570 5571 // Longs vector sub 5572 instruct vsubL(vec dst, vec src) %{ 5573 predicate(UseAVX == 0); 5574 match(Set dst (SubVL dst src)); 5575 format %{ "psubq $dst,$src\t! sub packedL" %} 5576 ins_encode %{ 5577 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5578 %} 5579 ins_pipe( pipe_slow ); 5580 %} 5581 5582 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5583 predicate(UseAVX > 0); 5584 match(Set dst (SubVL src1 src2)); 5585 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5586 ins_encode %{ 5587 int vlen_enc = vector_length_encoding(this); 5588 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5589 %} 5590 ins_pipe( pipe_slow ); 5591 %} 5592 5593 5594 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5595 predicate((UseAVX > 0) && 5596 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5597 match(Set dst (SubVL src (LoadVector mem))); 5598 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5599 ins_encode %{ 5600 int vlen_enc = vector_length_encoding(this); 5601 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5602 %} 5603 ins_pipe( pipe_slow ); 5604 %} 5605 5606 // Floats vector sub 5607 instruct vsubF(vec dst, vec src) %{ 5608 predicate(UseAVX == 0); 5609 match(Set dst (SubVF dst src)); 5610 format %{ "subps $dst,$src\t! sub packedF" %} 5611 ins_encode %{ 5612 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5613 %} 5614 ins_pipe( pipe_slow ); 5615 %} 5616 5617 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5618 predicate(UseAVX > 0); 5619 match(Set dst (SubVF src1 src2)); 5620 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5621 ins_encode %{ 5622 int vlen_enc = vector_length_encoding(this); 5623 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5624 %} 5625 ins_pipe( pipe_slow ); 5626 %} 5627 5628 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5629 predicate((UseAVX > 0) && 5630 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5631 match(Set dst (SubVF src (LoadVector mem))); 5632 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5633 ins_encode %{ 5634 int vlen_enc = vector_length_encoding(this); 5635 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5636 %} 5637 ins_pipe( pipe_slow ); 5638 %} 5639 5640 // Doubles vector sub 5641 instruct vsubD(vec dst, vec src) %{ 5642 predicate(UseAVX == 0); 5643 match(Set dst (SubVD dst src)); 5644 format %{ "subpd $dst,$src\t! sub packedD" %} 5645 ins_encode %{ 5646 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5647 %} 5648 ins_pipe( pipe_slow ); 5649 %} 5650 5651 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5652 predicate(UseAVX > 0); 5653 match(Set dst (SubVD src1 src2)); 5654 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5655 ins_encode %{ 5656 int vlen_enc = vector_length_encoding(this); 5657 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5658 %} 5659 ins_pipe( pipe_slow ); 5660 %} 5661 5662 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 5663 predicate((UseAVX > 0) && 5664 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5665 match(Set dst (SubVD src (LoadVector mem))); 5666 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 5667 ins_encode %{ 5668 int vlen_enc = vector_length_encoding(this); 5669 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5670 %} 5671 ins_pipe( pipe_slow ); 5672 %} 5673 5674 // --------------------------------- MUL -------------------------------------- 5675 5676 // Byte vector mul 5677 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 5678 predicate(Matcher::vector_length_in_bytes(n) <= 8); 5679 match(Set dst (MulVB src1 src2)); 5680 effect(TEMP dst, TEMP xtmp); 5681 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5682 ins_encode %{ 5683 assert(UseSSE > 3, "required"); 5684 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 5685 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 5686 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5687 __ psllw($dst$$XMMRegister, 8); 5688 __ psrlw($dst$$XMMRegister, 8); 5689 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5690 %} 5691 ins_pipe( pipe_slow ); 5692 %} 5693 5694 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 5695 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 5696 match(Set dst (MulVB src1 src2)); 5697 effect(TEMP dst, TEMP xtmp); 5698 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5699 ins_encode %{ 5700 assert(UseSSE > 3, "required"); 5701 // Odd-index elements 5702 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 5703 __ psrlw($dst$$XMMRegister, 8); 5704 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 5705 __ psrlw($xtmp$$XMMRegister, 8); 5706 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5707 __ psllw($dst$$XMMRegister, 8); 5708 // Even-index elements 5709 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 5710 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 5711 __ psllw($xtmp$$XMMRegister, 8); 5712 __ psrlw($xtmp$$XMMRegister, 8); 5713 // Combine 5714 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 5715 %} 5716 ins_pipe( pipe_slow ); 5717 %} 5718 5719 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 5720 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 5721 match(Set dst (MulVB src1 src2)); 5722 effect(TEMP xtmp1, TEMP xtmp2); 5723 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 5724 ins_encode %{ 5725 int vlen_enc = vector_length_encoding(this); 5726 // Odd-index elements 5727 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 5728 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 5729 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5730 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 5731 // Even-index elements 5732 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5733 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5734 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5735 // Combine 5736 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5737 %} 5738 ins_pipe( pipe_slow ); 5739 %} 5740 5741 // Shorts/Chars vector mul 5742 instruct vmulS(vec dst, vec src) %{ 5743 predicate(UseAVX == 0); 5744 match(Set dst (MulVS dst src)); 5745 format %{ "pmullw $dst,$src\t! mul packedS" %} 5746 ins_encode %{ 5747 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 5748 %} 5749 ins_pipe( pipe_slow ); 5750 %} 5751 5752 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 5753 predicate(UseAVX > 0); 5754 match(Set dst (MulVS src1 src2)); 5755 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 5756 ins_encode %{ 5757 int vlen_enc = vector_length_encoding(this); 5758 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5759 %} 5760 ins_pipe( pipe_slow ); 5761 %} 5762 5763 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 5764 predicate((UseAVX > 0) && 5765 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5766 match(Set dst (MulVS src (LoadVector mem))); 5767 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 5768 ins_encode %{ 5769 int vlen_enc = vector_length_encoding(this); 5770 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5771 %} 5772 ins_pipe( pipe_slow ); 5773 %} 5774 5775 // Integers vector mul 5776 instruct vmulI(vec dst, vec src) %{ 5777 predicate(UseAVX == 0); 5778 match(Set dst (MulVI dst src)); 5779 format %{ "pmulld $dst,$src\t! mul packedI" %} 5780 ins_encode %{ 5781 assert(UseSSE > 3, "required"); 5782 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 5783 %} 5784 ins_pipe( pipe_slow ); 5785 %} 5786 5787 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 5788 predicate(UseAVX > 0); 5789 match(Set dst (MulVI src1 src2)); 5790 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 5791 ins_encode %{ 5792 int vlen_enc = vector_length_encoding(this); 5793 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5794 %} 5795 ins_pipe( pipe_slow ); 5796 %} 5797 5798 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 5799 predicate((UseAVX > 0) && 5800 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5801 match(Set dst (MulVI src (LoadVector mem))); 5802 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 5803 ins_encode %{ 5804 int vlen_enc = vector_length_encoding(this); 5805 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5806 %} 5807 ins_pipe( pipe_slow ); 5808 %} 5809 5810 // Longs vector mul 5811 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 5812 predicate((Matcher::vector_length_in_bytes(n) == 64 && 5813 VM_Version::supports_avx512dq()) || 5814 VM_Version::supports_avx512vldq()); 5815 match(Set dst (MulVL src1 src2)); 5816 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 5817 ins_encode %{ 5818 assert(UseAVX > 2, "required"); 5819 int vlen_enc = vector_length_encoding(this); 5820 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5821 %} 5822 ins_pipe( pipe_slow ); 5823 %} 5824 5825 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 5826 predicate((Matcher::vector_length_in_bytes(n) == 64 && 5827 VM_Version::supports_avx512dq()) || 5828 (Matcher::vector_length_in_bytes(n) > 8 && 5829 VM_Version::supports_avx512vldq())); 5830 match(Set dst (MulVL src (LoadVector mem))); 5831 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 5832 ins_encode %{ 5833 assert(UseAVX > 2, "required"); 5834 int vlen_enc = vector_length_encoding(this); 5835 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5836 %} 5837 ins_pipe( pipe_slow ); 5838 %} 5839 5840 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 5841 predicate(UseAVX == 0); 5842 match(Set dst (MulVL src1 src2)); 5843 effect(TEMP dst, TEMP xtmp); 5844 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5845 ins_encode %{ 5846 assert(VM_Version::supports_sse4_1(), "required"); 5847 // Get the lo-hi products, only the lower 32 bits is in concerns 5848 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 5849 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 5850 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 5851 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 5852 __ psllq($dst$$XMMRegister, 32); 5853 // Get the lo-lo products 5854 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 5855 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 5856 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 5857 %} 5858 ins_pipe( pipe_slow ); 5859 %} 5860 5861 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 5862 predicate(UseAVX > 0 && 5863 ((Matcher::vector_length_in_bytes(n) == 64 && 5864 !VM_Version::supports_avx512dq()) || 5865 (Matcher::vector_length_in_bytes(n) < 64 && 5866 !VM_Version::supports_avx512vldq()))); 5867 match(Set dst (MulVL src1 src2)); 5868 effect(TEMP xtmp1, TEMP xtmp2); 5869 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 5870 ins_encode %{ 5871 int vlen_enc = vector_length_encoding(this); 5872 // Get the lo-hi products, only the lower 32 bits is in concerns 5873 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 5874 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 5875 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 5876 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 5877 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 5878 // Get the lo-lo products 5879 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5880 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5881 %} 5882 ins_pipe( pipe_slow ); 5883 %} 5884 5885 // Floats vector mul 5886 instruct vmulF(vec dst, vec src) %{ 5887 predicate(UseAVX == 0); 5888 match(Set dst (MulVF dst src)); 5889 format %{ "mulps $dst,$src\t! mul packedF" %} 5890 ins_encode %{ 5891 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 5892 %} 5893 ins_pipe( pipe_slow ); 5894 %} 5895 5896 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 5897 predicate(UseAVX > 0); 5898 match(Set dst (MulVF src1 src2)); 5899 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 5900 ins_encode %{ 5901 int vlen_enc = vector_length_encoding(this); 5902 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5903 %} 5904 ins_pipe( pipe_slow ); 5905 %} 5906 5907 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 5908 predicate((UseAVX > 0) && 5909 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5910 match(Set dst (MulVF src (LoadVector mem))); 5911 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 5912 ins_encode %{ 5913 int vlen_enc = vector_length_encoding(this); 5914 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5915 %} 5916 ins_pipe( pipe_slow ); 5917 %} 5918 5919 // Doubles vector mul 5920 instruct vmulD(vec dst, vec src) %{ 5921 predicate(UseAVX == 0); 5922 match(Set dst (MulVD dst src)); 5923 format %{ "mulpd $dst,$src\t! mul packedD" %} 5924 ins_encode %{ 5925 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 5926 %} 5927 ins_pipe( pipe_slow ); 5928 %} 5929 5930 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 5931 predicate(UseAVX > 0); 5932 match(Set dst (MulVD src1 src2)); 5933 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 5934 ins_encode %{ 5935 int vlen_enc = vector_length_encoding(this); 5936 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5937 %} 5938 ins_pipe( pipe_slow ); 5939 %} 5940 5941 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 5942 predicate((UseAVX > 0) && 5943 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5944 match(Set dst (MulVD src (LoadVector mem))); 5945 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 5946 ins_encode %{ 5947 int vlen_enc = vector_length_encoding(this); 5948 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5949 %} 5950 ins_pipe( pipe_slow ); 5951 %} 5952 5953 // --------------------------------- DIV -------------------------------------- 5954 5955 // Floats vector div 5956 instruct vdivF(vec dst, vec src) %{ 5957 predicate(UseAVX == 0); 5958 match(Set dst (DivVF dst src)); 5959 format %{ "divps $dst,$src\t! div packedF" %} 5960 ins_encode %{ 5961 __ divps($dst$$XMMRegister, $src$$XMMRegister); 5962 %} 5963 ins_pipe( pipe_slow ); 5964 %} 5965 5966 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 5967 predicate(UseAVX > 0); 5968 match(Set dst (DivVF src1 src2)); 5969 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 5970 ins_encode %{ 5971 int vlen_enc = vector_length_encoding(this); 5972 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5973 %} 5974 ins_pipe( pipe_slow ); 5975 %} 5976 5977 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 5978 predicate((UseAVX > 0) && 5979 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5980 match(Set dst (DivVF src (LoadVector mem))); 5981 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 5982 ins_encode %{ 5983 int vlen_enc = vector_length_encoding(this); 5984 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5985 %} 5986 ins_pipe( pipe_slow ); 5987 %} 5988 5989 // Doubles vector div 5990 instruct vdivD(vec dst, vec src) %{ 5991 predicate(UseAVX == 0); 5992 match(Set dst (DivVD dst src)); 5993 format %{ "divpd $dst,$src\t! div packedD" %} 5994 ins_encode %{ 5995 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 5996 %} 5997 ins_pipe( pipe_slow ); 5998 %} 5999 6000 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6001 predicate(UseAVX > 0); 6002 match(Set dst (DivVD src1 src2)); 6003 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6004 ins_encode %{ 6005 int vlen_enc = vector_length_encoding(this); 6006 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6007 %} 6008 ins_pipe( pipe_slow ); 6009 %} 6010 6011 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6012 predicate((UseAVX > 0) && 6013 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6014 match(Set dst (DivVD src (LoadVector mem))); 6015 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6016 ins_encode %{ 6017 int vlen_enc = vector_length_encoding(this); 6018 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6019 %} 6020 ins_pipe( pipe_slow ); 6021 %} 6022 6023 // ------------------------------ MinMax --------------------------------------- 6024 6025 // Byte, Short, Int vector Min/Max 6026 instruct minmax_reg_sse(vec dst, vec src) %{ 6027 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6028 UseAVX == 0); 6029 match(Set dst (MinV dst src)); 6030 match(Set dst (MaxV dst src)); 6031 format %{ "vector_minmax $dst,$src\t! " %} 6032 ins_encode %{ 6033 assert(UseSSE >= 4, "required"); 6034 6035 int opcode = this->ideal_Opcode(); 6036 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6037 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6038 %} 6039 ins_pipe( pipe_slow ); 6040 %} 6041 6042 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6043 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6044 UseAVX > 0); 6045 match(Set dst (MinV src1 src2)); 6046 match(Set dst (MaxV src1 src2)); 6047 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6048 ins_encode %{ 6049 int opcode = this->ideal_Opcode(); 6050 int vlen_enc = vector_length_encoding(this); 6051 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6052 6053 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6054 %} 6055 ins_pipe( pipe_slow ); 6056 %} 6057 6058 // Long vector Min/Max 6059 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6060 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6061 UseAVX == 0); 6062 match(Set dst (MinV dst src)); 6063 match(Set dst (MaxV src dst)); 6064 effect(TEMP dst, TEMP tmp); 6065 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6066 ins_encode %{ 6067 assert(UseSSE >= 4, "required"); 6068 6069 int opcode = this->ideal_Opcode(); 6070 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6071 assert(elem_bt == T_LONG, "sanity"); 6072 6073 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6074 %} 6075 ins_pipe( pipe_slow ); 6076 %} 6077 6078 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6079 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6080 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6081 match(Set dst (MinV src1 src2)); 6082 match(Set dst (MaxV src1 src2)); 6083 effect(TEMP dst); 6084 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6085 ins_encode %{ 6086 int vlen_enc = vector_length_encoding(this); 6087 int opcode = this->ideal_Opcode(); 6088 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6089 assert(elem_bt == T_LONG, "sanity"); 6090 6091 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6092 %} 6093 ins_pipe( pipe_slow ); 6094 %} 6095 6096 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6097 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6098 Matcher::vector_element_basic_type(n) == T_LONG); 6099 match(Set dst (MinV src1 src2)); 6100 match(Set dst (MaxV src1 src2)); 6101 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6102 ins_encode %{ 6103 assert(UseAVX > 2, "required"); 6104 6105 int vlen_enc = vector_length_encoding(this); 6106 int opcode = this->ideal_Opcode(); 6107 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6108 assert(elem_bt == T_LONG, "sanity"); 6109 6110 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6111 %} 6112 ins_pipe( pipe_slow ); 6113 %} 6114 6115 // Float/Double vector Min/Max 6116 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6117 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6118 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6119 UseAVX > 0); 6120 match(Set dst (MinV a b)); 6121 match(Set dst (MaxV a b)); 6122 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6123 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6124 ins_encode %{ 6125 assert(UseAVX > 0, "required"); 6126 6127 int opcode = this->ideal_Opcode(); 6128 int vlen_enc = vector_length_encoding(this); 6129 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6130 6131 __ vminmax_fp(opcode, elem_bt, 6132 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6133 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6134 %} 6135 ins_pipe( pipe_slow ); 6136 %} 6137 6138 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6139 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6140 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6141 match(Set dst (MinV a b)); 6142 match(Set dst (MaxV a b)); 6143 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6144 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6145 ins_encode %{ 6146 assert(UseAVX > 2, "required"); 6147 6148 int opcode = this->ideal_Opcode(); 6149 int vlen_enc = vector_length_encoding(this); 6150 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6151 6152 __ evminmax_fp(opcode, elem_bt, 6153 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6154 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6155 %} 6156 ins_pipe( pipe_slow ); 6157 %} 6158 6159 // --------------------------------- Signum/CopySign --------------------------- 6160 6161 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6162 match(Set dst (SignumF dst (Binary zero one))); 6163 effect(KILL cr); 6164 format %{ "signumF $dst, $dst" %} 6165 ins_encode %{ 6166 int opcode = this->ideal_Opcode(); 6167 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6168 %} 6169 ins_pipe( pipe_slow ); 6170 %} 6171 6172 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6173 match(Set dst (SignumD dst (Binary zero one))); 6174 effect(KILL cr); 6175 format %{ "signumD $dst, $dst" %} 6176 ins_encode %{ 6177 int opcode = this->ideal_Opcode(); 6178 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6179 %} 6180 ins_pipe( pipe_slow ); 6181 %} 6182 6183 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6184 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6185 match(Set dst (SignumVF src (Binary zero one))); 6186 match(Set dst (SignumVD src (Binary zero one))); 6187 effect(TEMP dst, TEMP xtmp1); 6188 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6189 ins_encode %{ 6190 int opcode = this->ideal_Opcode(); 6191 int vec_enc = vector_length_encoding(this); 6192 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6193 $xtmp1$$XMMRegister, vec_enc); 6194 %} 6195 ins_pipe( pipe_slow ); 6196 %} 6197 6198 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6199 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6200 match(Set dst (SignumVF src (Binary zero one))); 6201 match(Set dst (SignumVD src (Binary zero one))); 6202 effect(TEMP dst, TEMP ktmp1); 6203 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6204 ins_encode %{ 6205 int opcode = this->ideal_Opcode(); 6206 int vec_enc = vector_length_encoding(this); 6207 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6208 $ktmp1$$KRegister, vec_enc); 6209 %} 6210 ins_pipe( pipe_slow ); 6211 %} 6212 6213 // --------------------------------------- 6214 // For copySign use 0xE4 as writemask for vpternlog 6215 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6216 // C (xmm2) is set to 0x7FFFFFFF 6217 // Wherever xmm2 is 0, we want to pick from B (sign) 6218 // Wherever xmm2 is 1, we want to pick from A (src) 6219 // 6220 // A B C Result 6221 // 0 0 0 0 6222 // 0 0 1 0 6223 // 0 1 0 1 6224 // 0 1 1 0 6225 // 1 0 0 0 6226 // 1 0 1 1 6227 // 1 1 0 1 6228 // 1 1 1 1 6229 // 6230 // Result going from high bit to low bit is 0x11100100 = 0xe4 6231 // --------------------------------------- 6232 6233 #ifdef _LP64 6234 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6235 match(Set dst (CopySignF dst src)); 6236 effect(TEMP tmp1, TEMP tmp2); 6237 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6238 ins_encode %{ 6239 __ movl($tmp2$$Register, 0x7FFFFFFF); 6240 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6241 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6242 %} 6243 ins_pipe( pipe_slow ); 6244 %} 6245 6246 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6247 match(Set dst (CopySignD dst (Binary src zero))); 6248 ins_cost(100); 6249 effect(TEMP tmp1, TEMP tmp2); 6250 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6251 ins_encode %{ 6252 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6253 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6254 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6255 %} 6256 ins_pipe( pipe_slow ); 6257 %} 6258 6259 #endif // _LP64 6260 6261 //----------------------------- CompressBits/ExpandBits ------------------------ 6262 6263 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6264 predicate(n->bottom_type()->isa_int()); 6265 match(Set dst (CompressBits src mask)); 6266 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6267 ins_encode %{ 6268 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6269 %} 6270 ins_pipe( pipe_slow ); 6271 %} 6272 6273 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6274 predicate(n->bottom_type()->isa_int()); 6275 match(Set dst (ExpandBits src mask)); 6276 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6277 ins_encode %{ 6278 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6279 %} 6280 ins_pipe( pipe_slow ); 6281 %} 6282 6283 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6284 predicate(n->bottom_type()->isa_int()); 6285 match(Set dst (CompressBits src (LoadI mask))); 6286 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6287 ins_encode %{ 6288 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6289 %} 6290 ins_pipe( pipe_slow ); 6291 %} 6292 6293 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6294 predicate(n->bottom_type()->isa_int()); 6295 match(Set dst (ExpandBits src (LoadI mask))); 6296 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6297 ins_encode %{ 6298 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6299 %} 6300 ins_pipe( pipe_slow ); 6301 %} 6302 6303 // --------------------------------- Sqrt -------------------------------------- 6304 6305 instruct vsqrtF_reg(vec dst, vec src) %{ 6306 match(Set dst (SqrtVF src)); 6307 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6308 ins_encode %{ 6309 assert(UseAVX > 0, "required"); 6310 int vlen_enc = vector_length_encoding(this); 6311 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6312 %} 6313 ins_pipe( pipe_slow ); 6314 %} 6315 6316 instruct vsqrtF_mem(vec dst, memory mem) %{ 6317 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6318 match(Set dst (SqrtVF (LoadVector mem))); 6319 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6320 ins_encode %{ 6321 assert(UseAVX > 0, "required"); 6322 int vlen_enc = vector_length_encoding(this); 6323 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6324 %} 6325 ins_pipe( pipe_slow ); 6326 %} 6327 6328 // Floating point vector sqrt 6329 instruct vsqrtD_reg(vec dst, vec src) %{ 6330 match(Set dst (SqrtVD src)); 6331 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6332 ins_encode %{ 6333 assert(UseAVX > 0, "required"); 6334 int vlen_enc = vector_length_encoding(this); 6335 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6336 %} 6337 ins_pipe( pipe_slow ); 6338 %} 6339 6340 instruct vsqrtD_mem(vec dst, memory mem) %{ 6341 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6342 match(Set dst (SqrtVD (LoadVector mem))); 6343 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6344 ins_encode %{ 6345 assert(UseAVX > 0, "required"); 6346 int vlen_enc = vector_length_encoding(this); 6347 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6348 %} 6349 ins_pipe( pipe_slow ); 6350 %} 6351 6352 // ------------------------------ Shift --------------------------------------- 6353 6354 // Left and right shift count vectors are the same on x86 6355 // (only lowest bits of xmm reg are used for count). 6356 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6357 match(Set dst (LShiftCntV cnt)); 6358 match(Set dst (RShiftCntV cnt)); 6359 format %{ "movdl $dst,$cnt\t! load shift count" %} 6360 ins_encode %{ 6361 __ movdl($dst$$XMMRegister, $cnt$$Register); 6362 %} 6363 ins_pipe( pipe_slow ); 6364 %} 6365 6366 // Byte vector shift 6367 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6368 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6369 match(Set dst ( LShiftVB src shift)); 6370 match(Set dst ( RShiftVB src shift)); 6371 match(Set dst (URShiftVB src shift)); 6372 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6373 format %{"vector_byte_shift $dst,$src,$shift" %} 6374 ins_encode %{ 6375 assert(UseSSE > 3, "required"); 6376 int opcode = this->ideal_Opcode(); 6377 bool sign = (opcode != Op_URShiftVB); 6378 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6379 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6380 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6381 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6382 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6383 %} 6384 ins_pipe( pipe_slow ); 6385 %} 6386 6387 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6388 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6389 UseAVX <= 1); 6390 match(Set dst ( LShiftVB src shift)); 6391 match(Set dst ( RShiftVB src shift)); 6392 match(Set dst (URShiftVB src shift)); 6393 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6394 format %{"vector_byte_shift $dst,$src,$shift" %} 6395 ins_encode %{ 6396 assert(UseSSE > 3, "required"); 6397 int opcode = this->ideal_Opcode(); 6398 bool sign = (opcode != Op_URShiftVB); 6399 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6400 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6401 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6402 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6403 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6404 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6405 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6406 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6407 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6408 %} 6409 ins_pipe( pipe_slow ); 6410 %} 6411 6412 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6413 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6414 UseAVX > 1); 6415 match(Set dst ( LShiftVB src shift)); 6416 match(Set dst ( RShiftVB src shift)); 6417 match(Set dst (URShiftVB src shift)); 6418 effect(TEMP dst, TEMP tmp); 6419 format %{"vector_byte_shift $dst,$src,$shift" %} 6420 ins_encode %{ 6421 int opcode = this->ideal_Opcode(); 6422 bool sign = (opcode != Op_URShiftVB); 6423 int vlen_enc = Assembler::AVX_256bit; 6424 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6425 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6426 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6427 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6428 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6429 %} 6430 ins_pipe( pipe_slow ); 6431 %} 6432 6433 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6434 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6435 match(Set dst ( LShiftVB src shift)); 6436 match(Set dst ( RShiftVB src shift)); 6437 match(Set dst (URShiftVB src shift)); 6438 effect(TEMP dst, TEMP tmp); 6439 format %{"vector_byte_shift $dst,$src,$shift" %} 6440 ins_encode %{ 6441 assert(UseAVX > 1, "required"); 6442 int opcode = this->ideal_Opcode(); 6443 bool sign = (opcode != Op_URShiftVB); 6444 int vlen_enc = Assembler::AVX_256bit; 6445 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6446 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6447 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6448 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6449 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6450 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6451 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6452 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6453 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6454 %} 6455 ins_pipe( pipe_slow ); 6456 %} 6457 6458 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6459 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6460 match(Set dst ( LShiftVB src shift)); 6461 match(Set dst (RShiftVB src shift)); 6462 match(Set dst (URShiftVB src shift)); 6463 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6464 format %{"vector_byte_shift $dst,$src,$shift" %} 6465 ins_encode %{ 6466 assert(UseAVX > 2, "required"); 6467 int opcode = this->ideal_Opcode(); 6468 bool sign = (opcode != Op_URShiftVB); 6469 int vlen_enc = Assembler::AVX_512bit; 6470 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6471 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6472 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6473 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6474 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6475 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6476 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6477 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6478 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6479 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6480 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 6481 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6482 %} 6483 ins_pipe( pipe_slow ); 6484 %} 6485 6486 // Shorts vector logical right shift produces incorrect Java result 6487 // for negative data because java code convert short value into int with 6488 // sign extension before a shift. But char vectors are fine since chars are 6489 // unsigned values. 6490 // Shorts/Chars vector left shift 6491 instruct vshiftS(vec dst, vec src, vec shift) %{ 6492 predicate(!n->as_ShiftV()->is_var_shift()); 6493 match(Set dst ( LShiftVS src shift)); 6494 match(Set dst ( RShiftVS src shift)); 6495 match(Set dst (URShiftVS src shift)); 6496 effect(TEMP dst, USE src, USE shift); 6497 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6498 ins_encode %{ 6499 int opcode = this->ideal_Opcode(); 6500 if (UseAVX > 0) { 6501 int vlen_enc = vector_length_encoding(this); 6502 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6503 } else { 6504 int vlen = Matcher::vector_length(this); 6505 if (vlen == 2) { 6506 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6507 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6508 } else if (vlen == 4) { 6509 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6510 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6511 } else { 6512 assert (vlen == 8, "sanity"); 6513 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6514 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6515 } 6516 } 6517 %} 6518 ins_pipe( pipe_slow ); 6519 %} 6520 6521 // Integers vector left shift 6522 instruct vshiftI(vec dst, vec src, vec shift) %{ 6523 predicate(!n->as_ShiftV()->is_var_shift()); 6524 match(Set dst ( LShiftVI src shift)); 6525 match(Set dst ( RShiftVI src shift)); 6526 match(Set dst (URShiftVI src shift)); 6527 effect(TEMP dst, USE src, USE shift); 6528 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6529 ins_encode %{ 6530 int opcode = this->ideal_Opcode(); 6531 if (UseAVX > 0) { 6532 int vlen_enc = vector_length_encoding(this); 6533 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6534 } else { 6535 int vlen = Matcher::vector_length(this); 6536 if (vlen == 2) { 6537 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6538 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6539 } else { 6540 assert(vlen == 4, "sanity"); 6541 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6542 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6543 } 6544 } 6545 %} 6546 ins_pipe( pipe_slow ); 6547 %} 6548 6549 // Integers vector left constant shift 6550 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6551 match(Set dst (LShiftVI src (LShiftCntV shift))); 6552 match(Set dst (RShiftVI src (RShiftCntV shift))); 6553 match(Set dst (URShiftVI src (RShiftCntV shift))); 6554 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6555 ins_encode %{ 6556 int opcode = this->ideal_Opcode(); 6557 if (UseAVX > 0) { 6558 int vector_len = vector_length_encoding(this); 6559 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6560 } else { 6561 int vlen = Matcher::vector_length(this); 6562 if (vlen == 2) { 6563 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6564 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6565 } else { 6566 assert(vlen == 4, "sanity"); 6567 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6568 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6569 } 6570 } 6571 %} 6572 ins_pipe( pipe_slow ); 6573 %} 6574 6575 // Longs vector shift 6576 instruct vshiftL(vec dst, vec src, vec shift) %{ 6577 predicate(!n->as_ShiftV()->is_var_shift()); 6578 match(Set dst ( LShiftVL src shift)); 6579 match(Set dst (URShiftVL src shift)); 6580 effect(TEMP dst, USE src, USE shift); 6581 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6582 ins_encode %{ 6583 int opcode = this->ideal_Opcode(); 6584 if (UseAVX > 0) { 6585 int vlen_enc = vector_length_encoding(this); 6586 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6587 } else { 6588 assert(Matcher::vector_length(this) == 2, ""); 6589 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6590 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6591 } 6592 %} 6593 ins_pipe( pipe_slow ); 6594 %} 6595 6596 // Longs vector constant shift 6597 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 6598 match(Set dst (LShiftVL src (LShiftCntV shift))); 6599 match(Set dst (URShiftVL src (RShiftCntV shift))); 6600 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 6601 ins_encode %{ 6602 int opcode = this->ideal_Opcode(); 6603 if (UseAVX > 0) { 6604 int vector_len = vector_length_encoding(this); 6605 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6606 } else { 6607 assert(Matcher::vector_length(this) == 2, ""); 6608 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6609 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6610 } 6611 %} 6612 ins_pipe( pipe_slow ); 6613 %} 6614 6615 // -------------------ArithmeticRightShift ----------------------------------- 6616 // Long vector arithmetic right shift 6617 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 6618 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 6619 match(Set dst (RShiftVL src shift)); 6620 effect(TEMP dst, TEMP tmp); 6621 format %{ "vshiftq $dst,$src,$shift" %} 6622 ins_encode %{ 6623 uint vlen = Matcher::vector_length(this); 6624 if (vlen == 2) { 6625 assert(UseSSE >= 2, "required"); 6626 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6627 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6628 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6629 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6630 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6631 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 6632 } else { 6633 assert(vlen == 4, "sanity"); 6634 assert(UseAVX > 1, "required"); 6635 int vlen_enc = Assembler::AVX_256bit; 6636 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6637 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6638 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6639 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6640 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6641 } 6642 %} 6643 ins_pipe( pipe_slow ); 6644 %} 6645 6646 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 6647 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 6648 match(Set dst (RShiftVL src shift)); 6649 format %{ "vshiftq $dst,$src,$shift" %} 6650 ins_encode %{ 6651 int vlen_enc = vector_length_encoding(this); 6652 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6653 %} 6654 ins_pipe( pipe_slow ); 6655 %} 6656 6657 // ------------------- Variable Shift ----------------------------- 6658 // Byte variable shift 6659 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 6660 predicate(Matcher::vector_length(n) <= 8 && 6661 n->as_ShiftV()->is_var_shift() && 6662 !VM_Version::supports_avx512bw()); 6663 match(Set dst ( LShiftVB src shift)); 6664 match(Set dst ( RShiftVB src shift)); 6665 match(Set dst (URShiftVB src shift)); 6666 effect(TEMP dst, TEMP vtmp); 6667 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 6668 ins_encode %{ 6669 assert(UseAVX >= 2, "required"); 6670 6671 int opcode = this->ideal_Opcode(); 6672 int vlen_enc = Assembler::AVX_128bit; 6673 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 6674 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 6675 %} 6676 ins_pipe( pipe_slow ); 6677 %} 6678 6679 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 6680 predicate(Matcher::vector_length(n) == 16 && 6681 n->as_ShiftV()->is_var_shift() && 6682 !VM_Version::supports_avx512bw()); 6683 match(Set dst ( LShiftVB src shift)); 6684 match(Set dst ( RShiftVB src shift)); 6685 match(Set dst (URShiftVB src shift)); 6686 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 6687 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 6688 ins_encode %{ 6689 assert(UseAVX >= 2, "required"); 6690 6691 int opcode = this->ideal_Opcode(); 6692 int vlen_enc = Assembler::AVX_128bit; 6693 // Shift lower half and get word result in dst 6694 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 6695 6696 // Shift upper half and get word result in vtmp1 6697 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6698 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6699 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6700 6701 // Merge and down convert the two word results to byte in dst 6702 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6703 %} 6704 ins_pipe( pipe_slow ); 6705 %} 6706 6707 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 6708 predicate(Matcher::vector_length(n) == 32 && 6709 n->as_ShiftV()->is_var_shift() && 6710 !VM_Version::supports_avx512bw()); 6711 match(Set dst ( LShiftVB src shift)); 6712 match(Set dst ( RShiftVB src shift)); 6713 match(Set dst (URShiftVB src shift)); 6714 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 6715 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 6716 ins_encode %{ 6717 assert(UseAVX >= 2, "required"); 6718 6719 int opcode = this->ideal_Opcode(); 6720 int vlen_enc = Assembler::AVX_128bit; 6721 // Process lower 128 bits and get result in dst 6722 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 6723 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6724 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6725 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6726 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6727 6728 // Process higher 128 bits and get result in vtmp3 6729 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6730 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6731 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 6732 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 6733 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 6734 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6735 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 6736 6737 // Merge the two results in dst 6738 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6739 %} 6740 ins_pipe( pipe_slow ); 6741 %} 6742 6743 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 6744 predicate(Matcher::vector_length(n) <= 32 && 6745 n->as_ShiftV()->is_var_shift() && 6746 VM_Version::supports_avx512bw()); 6747 match(Set dst ( LShiftVB src shift)); 6748 match(Set dst ( RShiftVB src shift)); 6749 match(Set dst (URShiftVB src shift)); 6750 effect(TEMP dst, TEMP vtmp); 6751 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 6752 ins_encode %{ 6753 assert(UseAVX > 2, "required"); 6754 6755 int opcode = this->ideal_Opcode(); 6756 int vlen_enc = vector_length_encoding(this); 6757 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 6758 %} 6759 ins_pipe( pipe_slow ); 6760 %} 6761 6762 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 6763 predicate(Matcher::vector_length(n) == 64 && 6764 n->as_ShiftV()->is_var_shift() && 6765 VM_Version::supports_avx512bw()); 6766 match(Set dst ( LShiftVB src shift)); 6767 match(Set dst ( RShiftVB src shift)); 6768 match(Set dst (URShiftVB src shift)); 6769 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 6770 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 6771 ins_encode %{ 6772 assert(UseAVX > 2, "required"); 6773 6774 int opcode = this->ideal_Opcode(); 6775 int vlen_enc = Assembler::AVX_256bit; 6776 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 6777 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6778 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6779 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6780 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6781 %} 6782 ins_pipe( pipe_slow ); 6783 %} 6784 6785 // Short variable shift 6786 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 6787 predicate(Matcher::vector_length(n) <= 8 && 6788 n->as_ShiftV()->is_var_shift() && 6789 !VM_Version::supports_avx512bw()); 6790 match(Set dst ( LShiftVS src shift)); 6791 match(Set dst ( RShiftVS src shift)); 6792 match(Set dst (URShiftVS src shift)); 6793 effect(TEMP dst, TEMP vtmp); 6794 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6795 ins_encode %{ 6796 assert(UseAVX >= 2, "required"); 6797 6798 int opcode = this->ideal_Opcode(); 6799 bool sign = (opcode != Op_URShiftVS); 6800 int vlen_enc = Assembler::AVX_256bit; 6801 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 6802 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 6803 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 6804 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 6805 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 6806 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 6807 %} 6808 ins_pipe( pipe_slow ); 6809 %} 6810 6811 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 6812 predicate(Matcher::vector_length(n) == 16 && 6813 n->as_ShiftV()->is_var_shift() && 6814 !VM_Version::supports_avx512bw()); 6815 match(Set dst ( LShiftVS src shift)); 6816 match(Set dst ( RShiftVS src shift)); 6817 match(Set dst (URShiftVS src shift)); 6818 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 6819 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6820 ins_encode %{ 6821 assert(UseAVX >= 2, "required"); 6822 6823 int opcode = this->ideal_Opcode(); 6824 bool sign = (opcode != Op_URShiftVS); 6825 int vlen_enc = Assembler::AVX_256bit; 6826 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 6827 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6828 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6829 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6830 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 6831 6832 // Shift upper half, with result in dst using vtmp1 as TEMP 6833 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 6834 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 6835 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6836 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6837 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6838 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 6839 6840 // Merge lower and upper half result into dst 6841 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6842 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6843 %} 6844 ins_pipe( pipe_slow ); 6845 %} 6846 6847 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 6848 predicate(n->as_ShiftV()->is_var_shift() && 6849 VM_Version::supports_avx512bw()); 6850 match(Set dst ( LShiftVS src shift)); 6851 match(Set dst ( RShiftVS src shift)); 6852 match(Set dst (URShiftVS src shift)); 6853 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 6854 ins_encode %{ 6855 assert(UseAVX > 2, "required"); 6856 6857 int opcode = this->ideal_Opcode(); 6858 int vlen_enc = vector_length_encoding(this); 6859 if (!VM_Version::supports_avx512vl()) { 6860 vlen_enc = Assembler::AVX_512bit; 6861 } 6862 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6863 %} 6864 ins_pipe( pipe_slow ); 6865 %} 6866 6867 //Integer variable shift 6868 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 6869 predicate(n->as_ShiftV()->is_var_shift()); 6870 match(Set dst ( LShiftVI src shift)); 6871 match(Set dst ( RShiftVI src shift)); 6872 match(Set dst (URShiftVI src shift)); 6873 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 6874 ins_encode %{ 6875 assert(UseAVX >= 2, "required"); 6876 6877 int opcode = this->ideal_Opcode(); 6878 int vlen_enc = vector_length_encoding(this); 6879 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6880 %} 6881 ins_pipe( pipe_slow ); 6882 %} 6883 6884 //Long variable shift 6885 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 6886 predicate(n->as_ShiftV()->is_var_shift()); 6887 match(Set dst ( LShiftVL src shift)); 6888 match(Set dst (URShiftVL src shift)); 6889 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 6890 ins_encode %{ 6891 assert(UseAVX >= 2, "required"); 6892 6893 int opcode = this->ideal_Opcode(); 6894 int vlen_enc = vector_length_encoding(this); 6895 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6896 %} 6897 ins_pipe( pipe_slow ); 6898 %} 6899 6900 //Long variable right shift arithmetic 6901 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 6902 predicate(Matcher::vector_length(n) <= 4 && 6903 n->as_ShiftV()->is_var_shift() && 6904 UseAVX == 2); 6905 match(Set dst (RShiftVL src shift)); 6906 effect(TEMP dst, TEMP vtmp); 6907 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 6908 ins_encode %{ 6909 int opcode = this->ideal_Opcode(); 6910 int vlen_enc = vector_length_encoding(this); 6911 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 6912 $vtmp$$XMMRegister); 6913 %} 6914 ins_pipe( pipe_slow ); 6915 %} 6916 6917 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 6918 predicate(n->as_ShiftV()->is_var_shift() && 6919 UseAVX > 2); 6920 match(Set dst (RShiftVL src shift)); 6921 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 6922 ins_encode %{ 6923 int opcode = this->ideal_Opcode(); 6924 int vlen_enc = vector_length_encoding(this); 6925 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6926 %} 6927 ins_pipe( pipe_slow ); 6928 %} 6929 6930 // --------------------------------- AND -------------------------------------- 6931 6932 instruct vand(vec dst, vec src) %{ 6933 predicate(UseAVX == 0); 6934 match(Set dst (AndV dst src)); 6935 format %{ "pand $dst,$src\t! and vectors" %} 6936 ins_encode %{ 6937 __ pand($dst$$XMMRegister, $src$$XMMRegister); 6938 %} 6939 ins_pipe( pipe_slow ); 6940 %} 6941 6942 instruct vand_reg(vec dst, vec src1, vec src2) %{ 6943 predicate(UseAVX > 0); 6944 match(Set dst (AndV src1 src2)); 6945 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 6946 ins_encode %{ 6947 int vlen_enc = vector_length_encoding(this); 6948 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6949 %} 6950 ins_pipe( pipe_slow ); 6951 %} 6952 6953 instruct vand_mem(vec dst, vec src, memory mem) %{ 6954 predicate((UseAVX > 0) && 6955 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6956 match(Set dst (AndV src (LoadVector mem))); 6957 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 6958 ins_encode %{ 6959 int vlen_enc = vector_length_encoding(this); 6960 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6961 %} 6962 ins_pipe( pipe_slow ); 6963 %} 6964 6965 // --------------------------------- OR --------------------------------------- 6966 6967 instruct vor(vec dst, vec src) %{ 6968 predicate(UseAVX == 0); 6969 match(Set dst (OrV dst src)); 6970 format %{ "por $dst,$src\t! or vectors" %} 6971 ins_encode %{ 6972 __ por($dst$$XMMRegister, $src$$XMMRegister); 6973 %} 6974 ins_pipe( pipe_slow ); 6975 %} 6976 6977 instruct vor_reg(vec dst, vec src1, vec src2) %{ 6978 predicate(UseAVX > 0); 6979 match(Set dst (OrV src1 src2)); 6980 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 6981 ins_encode %{ 6982 int vlen_enc = vector_length_encoding(this); 6983 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6984 %} 6985 ins_pipe( pipe_slow ); 6986 %} 6987 6988 instruct vor_mem(vec dst, vec src, memory mem) %{ 6989 predicate((UseAVX > 0) && 6990 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6991 match(Set dst (OrV src (LoadVector mem))); 6992 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 6993 ins_encode %{ 6994 int vlen_enc = vector_length_encoding(this); 6995 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6996 %} 6997 ins_pipe( pipe_slow ); 6998 %} 6999 7000 // --------------------------------- XOR -------------------------------------- 7001 7002 instruct vxor(vec dst, vec src) %{ 7003 predicate(UseAVX == 0); 7004 match(Set dst (XorV dst src)); 7005 format %{ "pxor $dst,$src\t! xor vectors" %} 7006 ins_encode %{ 7007 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7008 %} 7009 ins_pipe( pipe_slow ); 7010 %} 7011 7012 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7013 predicate(UseAVX > 0); 7014 match(Set dst (XorV src1 src2)); 7015 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7016 ins_encode %{ 7017 int vlen_enc = vector_length_encoding(this); 7018 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7019 %} 7020 ins_pipe( pipe_slow ); 7021 %} 7022 7023 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7024 predicate((UseAVX > 0) && 7025 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7026 match(Set dst (XorV src (LoadVector mem))); 7027 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7028 ins_encode %{ 7029 int vlen_enc = vector_length_encoding(this); 7030 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7031 %} 7032 ins_pipe( pipe_slow ); 7033 %} 7034 7035 // --------------------------------- VectorCast -------------------------------------- 7036 7037 instruct vcastBtoX(vec dst, vec src) %{ 7038 match(Set dst (VectorCastB2X src)); 7039 format %{ "vector_cast_b2x $dst,$src\t!" %} 7040 ins_encode %{ 7041 assert(UseAVX > 0, "required"); 7042 7043 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7044 int vlen_enc = vector_length_encoding(this); 7045 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7046 %} 7047 ins_pipe( pipe_slow ); 7048 %} 7049 7050 instruct castStoX(vec dst, vec src) %{ 7051 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7052 Matcher::vector_length(n->in(1)) <= 8 && // src 7053 Matcher::vector_element_basic_type(n) == T_BYTE); 7054 match(Set dst (VectorCastS2X src)); 7055 format %{ "vector_cast_s2x $dst,$src" %} 7056 ins_encode %{ 7057 assert(UseAVX > 0, "required"); 7058 7059 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7060 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7061 %} 7062 ins_pipe( pipe_slow ); 7063 %} 7064 7065 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7066 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7067 Matcher::vector_length(n->in(1)) == 16 && // src 7068 Matcher::vector_element_basic_type(n) == T_BYTE); 7069 effect(TEMP dst, TEMP vtmp); 7070 match(Set dst (VectorCastS2X src)); 7071 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7072 ins_encode %{ 7073 assert(UseAVX > 0, "required"); 7074 7075 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7076 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7077 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7078 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7079 %} 7080 ins_pipe( pipe_slow ); 7081 %} 7082 7083 instruct vcastStoX_evex(vec dst, vec src) %{ 7084 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7085 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7086 match(Set dst (VectorCastS2X src)); 7087 format %{ "vector_cast_s2x $dst,$src\t!" %} 7088 ins_encode %{ 7089 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7090 int src_vlen_enc = vector_length_encoding(this, $src); 7091 int vlen_enc = vector_length_encoding(this); 7092 switch (to_elem_bt) { 7093 case T_BYTE: 7094 if (!VM_Version::supports_avx512vl()) { 7095 vlen_enc = Assembler::AVX_512bit; 7096 } 7097 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7098 break; 7099 case T_INT: 7100 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7101 break; 7102 case T_FLOAT: 7103 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7104 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7105 break; 7106 case T_LONG: 7107 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7108 break; 7109 case T_DOUBLE: { 7110 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7111 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7112 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7113 break; 7114 } 7115 default: 7116 ShouldNotReachHere(); 7117 } 7118 %} 7119 ins_pipe( pipe_slow ); 7120 %} 7121 7122 instruct castItoX(vec dst, vec src) %{ 7123 predicate(UseAVX <= 2 && 7124 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7125 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7126 match(Set dst (VectorCastI2X src)); 7127 format %{ "vector_cast_i2x $dst,$src" %} 7128 ins_encode %{ 7129 assert(UseAVX > 0, "required"); 7130 7131 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7132 int vlen_enc = vector_length_encoding(this, $src); 7133 7134 if (to_elem_bt == T_BYTE) { 7135 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7136 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7137 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7138 } else { 7139 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7140 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7141 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7142 } 7143 %} 7144 ins_pipe( pipe_slow ); 7145 %} 7146 7147 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7148 predicate(UseAVX <= 2 && 7149 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7150 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7151 match(Set dst (VectorCastI2X src)); 7152 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7153 effect(TEMP dst, TEMP vtmp); 7154 ins_encode %{ 7155 assert(UseAVX > 0, "required"); 7156 7157 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7158 int vlen_enc = vector_length_encoding(this, $src); 7159 7160 if (to_elem_bt == T_BYTE) { 7161 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7162 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7163 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7164 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7165 } else { 7166 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7167 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7168 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7169 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7170 } 7171 %} 7172 ins_pipe( pipe_slow ); 7173 %} 7174 7175 instruct vcastItoX_evex(vec dst, vec src) %{ 7176 predicate(UseAVX > 2 || 7177 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7178 match(Set dst (VectorCastI2X src)); 7179 format %{ "vector_cast_i2x $dst,$src\t!" %} 7180 ins_encode %{ 7181 assert(UseAVX > 0, "required"); 7182 7183 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7184 int src_vlen_enc = vector_length_encoding(this, $src); 7185 int dst_vlen_enc = vector_length_encoding(this); 7186 switch (dst_elem_bt) { 7187 case T_BYTE: 7188 if (!VM_Version::supports_avx512vl()) { 7189 src_vlen_enc = Assembler::AVX_512bit; 7190 } 7191 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7192 break; 7193 case T_SHORT: 7194 if (!VM_Version::supports_avx512vl()) { 7195 src_vlen_enc = Assembler::AVX_512bit; 7196 } 7197 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7198 break; 7199 case T_FLOAT: 7200 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7201 break; 7202 case T_LONG: 7203 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7204 break; 7205 case T_DOUBLE: 7206 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7207 break; 7208 default: 7209 ShouldNotReachHere(); 7210 } 7211 %} 7212 ins_pipe( pipe_slow ); 7213 %} 7214 7215 instruct vcastLtoBS(vec dst, vec src) %{ 7216 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7217 UseAVX <= 2); 7218 match(Set dst (VectorCastL2X src)); 7219 format %{ "vector_cast_l2x $dst,$src" %} 7220 ins_encode %{ 7221 assert(UseAVX > 0, "required"); 7222 7223 int vlen = Matcher::vector_length_in_bytes(this, $src); 7224 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7225 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7226 : ExternalAddress(vector_int_to_short_mask()); 7227 if (vlen <= 16) { 7228 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7229 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7230 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7231 } else { 7232 assert(vlen <= 32, "required"); 7233 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7234 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7235 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7236 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7237 } 7238 if (to_elem_bt == T_BYTE) { 7239 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7240 } 7241 %} 7242 ins_pipe( pipe_slow ); 7243 %} 7244 7245 instruct vcastLtoX_evex(vec dst, vec src) %{ 7246 predicate(UseAVX > 2 || 7247 (Matcher::vector_element_basic_type(n) == T_INT || 7248 Matcher::vector_element_basic_type(n) == T_FLOAT || 7249 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7250 match(Set dst (VectorCastL2X src)); 7251 format %{ "vector_cast_l2x $dst,$src\t!" %} 7252 ins_encode %{ 7253 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7254 int vlen = Matcher::vector_length_in_bytes(this, $src); 7255 int vlen_enc = vector_length_encoding(this, $src); 7256 switch (to_elem_bt) { 7257 case T_BYTE: 7258 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7259 vlen_enc = Assembler::AVX_512bit; 7260 } 7261 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7262 break; 7263 case T_SHORT: 7264 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7265 vlen_enc = Assembler::AVX_512bit; 7266 } 7267 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7268 break; 7269 case T_INT: 7270 if (vlen == 8) { 7271 if ($dst$$XMMRegister != $src$$XMMRegister) { 7272 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7273 } 7274 } else if (vlen == 16) { 7275 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7276 } else if (vlen == 32) { 7277 if (UseAVX > 2) { 7278 if (!VM_Version::supports_avx512vl()) { 7279 vlen_enc = Assembler::AVX_512bit; 7280 } 7281 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7282 } else { 7283 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7284 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7285 } 7286 } else { // vlen == 64 7287 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7288 } 7289 break; 7290 case T_FLOAT: 7291 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7292 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7293 break; 7294 case T_DOUBLE: 7295 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7296 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7297 break; 7298 7299 default: assert(false, "%s", type2name(to_elem_bt)); 7300 } 7301 %} 7302 ins_pipe( pipe_slow ); 7303 %} 7304 7305 instruct vcastFtoD_reg(vec dst, vec src) %{ 7306 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7307 match(Set dst (VectorCastF2X src)); 7308 format %{ "vector_cast_f2d $dst,$src\t!" %} 7309 ins_encode %{ 7310 int vlen_enc = vector_length_encoding(this); 7311 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7312 %} 7313 ins_pipe( pipe_slow ); 7314 %} 7315 7316 7317 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7318 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7319 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7320 match(Set dst (VectorCastF2X src)); 7321 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7322 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7323 ins_encode %{ 7324 int vlen_enc = vector_length_encoding(this, $src); 7325 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7326 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7327 // 32 bit addresses for register indirect addressing mode since stub constants 7328 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7329 // However, targets are free to increase this limit, but having a large code cache size 7330 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7331 // cap we save a temporary register allocation which in limiting case can prevent 7332 // spilling in high register pressure blocks. 7333 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7334 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7335 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7336 %} 7337 ins_pipe( pipe_slow ); 7338 %} 7339 7340 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7341 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7342 is_integral_type(Matcher::vector_element_basic_type(n))); 7343 match(Set dst (VectorCastF2X src)); 7344 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7345 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7346 ins_encode %{ 7347 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7348 if (to_elem_bt == T_LONG) { 7349 int vlen_enc = vector_length_encoding(this); 7350 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7351 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7352 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7353 } else { 7354 int vlen_enc = vector_length_encoding(this, $src); 7355 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7356 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7357 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7358 } 7359 %} 7360 ins_pipe( pipe_slow ); 7361 %} 7362 7363 instruct vcastDtoF_reg(vec dst, vec src) %{ 7364 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7365 match(Set dst (VectorCastD2X src)); 7366 format %{ "vector_cast_d2x $dst,$src\t!" %} 7367 ins_encode %{ 7368 int vlen_enc = vector_length_encoding(this, $src); 7369 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7370 %} 7371 ins_pipe( pipe_slow ); 7372 %} 7373 7374 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7375 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7376 is_integral_type(Matcher::vector_element_basic_type(n))); 7377 match(Set dst (VectorCastD2X src)); 7378 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7379 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7380 ins_encode %{ 7381 int vlen_enc = vector_length_encoding(this, $src); 7382 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7383 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7384 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7385 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7386 %} 7387 ins_pipe( pipe_slow ); 7388 %} 7389 7390 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7391 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7392 is_integral_type(Matcher::vector_element_basic_type(n))); 7393 match(Set dst (VectorCastD2X src)); 7394 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7395 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7396 ins_encode %{ 7397 int vlen_enc = vector_length_encoding(this, $src); 7398 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7399 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7400 ExternalAddress(vector_float_signflip()); 7401 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7402 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7403 %} 7404 ins_pipe( pipe_slow ); 7405 %} 7406 7407 instruct vucast(vec dst, vec src) %{ 7408 match(Set dst (VectorUCastB2X src)); 7409 match(Set dst (VectorUCastS2X src)); 7410 match(Set dst (VectorUCastI2X src)); 7411 format %{ "vector_ucast $dst,$src\t!" %} 7412 ins_encode %{ 7413 assert(UseAVX > 0, "required"); 7414 7415 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7416 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7417 int vlen_enc = vector_length_encoding(this); 7418 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7419 %} 7420 ins_pipe( pipe_slow ); 7421 %} 7422 7423 #ifdef _LP64 7424 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7425 predicate(!VM_Version::supports_avx512vl() && 7426 Matcher::vector_length_in_bytes(n) < 64 && 7427 Matcher::vector_element_basic_type(n) == T_INT); 7428 match(Set dst (RoundVF src)); 7429 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7430 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7431 ins_encode %{ 7432 int vlen_enc = vector_length_encoding(this); 7433 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7434 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7435 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7436 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7437 %} 7438 ins_pipe( pipe_slow ); 7439 %} 7440 7441 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7442 predicate((VM_Version::supports_avx512vl() || 7443 Matcher::vector_length_in_bytes(n) == 64) && 7444 Matcher::vector_element_basic_type(n) == T_INT); 7445 match(Set dst (RoundVF src)); 7446 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7447 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7448 ins_encode %{ 7449 int vlen_enc = vector_length_encoding(this); 7450 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7451 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7452 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7453 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7454 %} 7455 ins_pipe( pipe_slow ); 7456 %} 7457 7458 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7459 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7460 match(Set dst (RoundVD src)); 7461 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7462 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7463 ins_encode %{ 7464 int vlen_enc = vector_length_encoding(this); 7465 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7466 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 7467 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 7468 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7469 %} 7470 ins_pipe( pipe_slow ); 7471 %} 7472 7473 #endif // _LP64 7474 7475 // --------------------------------- VectorMaskCmp -------------------------------------- 7476 7477 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7478 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7479 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7480 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7481 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7482 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7483 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7484 ins_encode %{ 7485 int vlen_enc = vector_length_encoding(this, $src1); 7486 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7487 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7488 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7489 } else { 7490 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7491 } 7492 %} 7493 ins_pipe( pipe_slow ); 7494 %} 7495 7496 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7497 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7498 n->bottom_type()->isa_vectmask() == nullptr && 7499 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7500 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7501 effect(TEMP ktmp); 7502 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7503 ins_encode %{ 7504 int vlen_enc = Assembler::AVX_512bit; 7505 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7506 KRegister mask = k0; // The comparison itself is not being masked. 7507 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7508 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7509 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7510 } else { 7511 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7512 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7513 } 7514 %} 7515 ins_pipe( pipe_slow ); 7516 %} 7517 7518 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7519 predicate(n->bottom_type()->isa_vectmask() && 7520 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7521 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7522 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7523 ins_encode %{ 7524 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7525 int vlen_enc = vector_length_encoding(this, $src1); 7526 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7527 KRegister mask = k0; // The comparison itself is not being masked. 7528 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7529 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7530 } else { 7531 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7532 } 7533 %} 7534 ins_pipe( pipe_slow ); 7535 %} 7536 7537 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7538 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7539 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7540 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7541 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7542 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7543 (n->in(2)->get_int() == BoolTest::eq || 7544 n->in(2)->get_int() == BoolTest::lt || 7545 n->in(2)->get_int() == BoolTest::gt)); // cond 7546 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7547 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7548 ins_encode %{ 7549 int vlen_enc = vector_length_encoding(this, $src1); 7550 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7551 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7552 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 7553 %} 7554 ins_pipe( pipe_slow ); 7555 %} 7556 7557 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7558 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7559 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7560 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7561 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7562 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7563 (n->in(2)->get_int() == BoolTest::ne || 7564 n->in(2)->get_int() == BoolTest::le || 7565 n->in(2)->get_int() == BoolTest::ge)); // cond 7566 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7567 effect(TEMP dst, TEMP xtmp); 7568 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7569 ins_encode %{ 7570 int vlen_enc = vector_length_encoding(this, $src1); 7571 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7572 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7573 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7574 %} 7575 ins_pipe( pipe_slow ); 7576 %} 7577 7578 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7579 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7580 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7581 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7582 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7583 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7584 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7585 effect(TEMP dst, TEMP xtmp); 7586 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7587 ins_encode %{ 7588 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 7589 int vlen_enc = vector_length_encoding(this, $src1); 7590 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7591 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7592 7593 if (vlen_enc == Assembler::AVX_128bit) { 7594 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7595 } else { 7596 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7597 } 7598 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7599 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7600 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7601 %} 7602 ins_pipe( pipe_slow ); 7603 %} 7604 7605 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7606 predicate((n->bottom_type()->isa_vectmask() == nullptr && 7607 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 7608 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7609 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7610 effect(TEMP ktmp); 7611 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7612 ins_encode %{ 7613 assert(UseAVX > 2, "required"); 7614 7615 int vlen_enc = vector_length_encoding(this, $src1); 7616 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7617 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7618 KRegister mask = k0; // The comparison itself is not being masked. 7619 bool merge = false; 7620 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7621 7622 switch (src1_elem_bt) { 7623 case T_INT: { 7624 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7625 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7626 break; 7627 } 7628 case T_LONG: { 7629 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7630 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7631 break; 7632 } 7633 default: assert(false, "%s", type2name(src1_elem_bt)); 7634 } 7635 %} 7636 ins_pipe( pipe_slow ); 7637 %} 7638 7639 7640 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 7641 predicate(n->bottom_type()->isa_vectmask() && 7642 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7643 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7644 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 7645 ins_encode %{ 7646 assert(UseAVX > 2, "required"); 7647 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7648 7649 int vlen_enc = vector_length_encoding(this, $src1); 7650 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7651 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7652 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7653 7654 // Comparison i 7655 switch (src1_elem_bt) { 7656 case T_BYTE: { 7657 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7658 break; 7659 } 7660 case T_SHORT: { 7661 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7662 break; 7663 } 7664 case T_INT: { 7665 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7666 break; 7667 } 7668 case T_LONG: { 7669 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7670 break; 7671 } 7672 default: assert(false, "%s", type2name(src1_elem_bt)); 7673 } 7674 %} 7675 ins_pipe( pipe_slow ); 7676 %} 7677 7678 // Extract 7679 7680 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 7681 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 7682 match(Set dst (ExtractI src idx)); 7683 match(Set dst (ExtractS src idx)); 7684 #ifdef _LP64 7685 match(Set dst (ExtractB src idx)); 7686 #endif 7687 format %{ "extractI $dst,$src,$idx\t!" %} 7688 ins_encode %{ 7689 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7690 7691 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 7692 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7693 %} 7694 ins_pipe( pipe_slow ); 7695 %} 7696 7697 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 7698 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 7699 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 7700 match(Set dst (ExtractI src idx)); 7701 match(Set dst (ExtractS src idx)); 7702 #ifdef _LP64 7703 match(Set dst (ExtractB src idx)); 7704 #endif 7705 effect(TEMP vtmp); 7706 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 7707 ins_encode %{ 7708 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7709 7710 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 7711 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7712 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 7713 %} 7714 ins_pipe( pipe_slow ); 7715 %} 7716 7717 #ifdef _LP64 7718 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 7719 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 7720 match(Set dst (ExtractL src idx)); 7721 format %{ "extractL $dst,$src,$idx\t!" %} 7722 ins_encode %{ 7723 assert(UseSSE >= 4, "required"); 7724 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7725 7726 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7727 %} 7728 ins_pipe( pipe_slow ); 7729 %} 7730 7731 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 7732 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 7733 Matcher::vector_length(n->in(1)) == 8); // src 7734 match(Set dst (ExtractL src idx)); 7735 effect(TEMP vtmp); 7736 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 7737 ins_encode %{ 7738 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7739 7740 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7741 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 7742 %} 7743 ins_pipe( pipe_slow ); 7744 %} 7745 #endif 7746 7747 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 7748 predicate(Matcher::vector_length(n->in(1)) <= 4); 7749 match(Set dst (ExtractF src idx)); 7750 effect(TEMP dst, TEMP vtmp); 7751 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 7752 ins_encode %{ 7753 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7754 7755 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 7756 %} 7757 ins_pipe( pipe_slow ); 7758 %} 7759 7760 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 7761 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 7762 Matcher::vector_length(n->in(1)/*src*/) == 16); 7763 match(Set dst (ExtractF src idx)); 7764 effect(TEMP vtmp); 7765 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 7766 ins_encode %{ 7767 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7768 7769 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7770 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 7771 %} 7772 ins_pipe( pipe_slow ); 7773 %} 7774 7775 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 7776 predicate(Matcher::vector_length(n->in(1)) == 2); // src 7777 match(Set dst (ExtractD src idx)); 7778 format %{ "extractD $dst,$src,$idx\t!" %} 7779 ins_encode %{ 7780 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7781 7782 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7783 %} 7784 ins_pipe( pipe_slow ); 7785 %} 7786 7787 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 7788 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 7789 Matcher::vector_length(n->in(1)) == 8); // src 7790 match(Set dst (ExtractD src idx)); 7791 effect(TEMP vtmp); 7792 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 7793 ins_encode %{ 7794 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7795 7796 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7797 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 7798 %} 7799 ins_pipe( pipe_slow ); 7800 %} 7801 7802 // --------------------------------- Vector Blend -------------------------------------- 7803 7804 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 7805 predicate(UseAVX == 0); 7806 match(Set dst (VectorBlend (Binary dst src) mask)); 7807 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 7808 effect(TEMP tmp); 7809 ins_encode %{ 7810 assert(UseSSE >= 4, "required"); 7811 7812 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 7813 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 7814 } 7815 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 7816 %} 7817 ins_pipe( pipe_slow ); 7818 %} 7819 7820 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7821 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 7822 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 7823 Matcher::vector_length_in_bytes(n) <= 32 && 7824 is_integral_type(Matcher::vector_element_basic_type(n))); 7825 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7826 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7827 ins_encode %{ 7828 int vlen_enc = vector_length_encoding(this); 7829 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7830 %} 7831 ins_pipe( pipe_slow ); 7832 %} 7833 7834 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7835 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 7836 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 7837 Matcher::vector_length_in_bytes(n) <= 32 && 7838 !is_integral_type(Matcher::vector_element_basic_type(n))); 7839 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7840 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7841 ins_encode %{ 7842 int vlen_enc = vector_length_encoding(this); 7843 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7844 %} 7845 ins_pipe( pipe_slow ); 7846 %} 7847 7848 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ 7849 predicate(UseAVX > 0 && EnableX86ECoreOpts && 7850 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 7851 Matcher::vector_length_in_bytes(n) <= 32); 7852 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7853 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} 7854 effect(TEMP vtmp, TEMP dst); 7855 ins_encode %{ 7856 int vlen_enc = vector_length_encoding(this); 7857 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7858 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7859 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7860 %} 7861 ins_pipe( pipe_slow ); 7862 %} 7863 7864 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 7865 predicate(Matcher::vector_length_in_bytes(n) == 64 && 7866 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 7867 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7868 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 7869 effect(TEMP ktmp); 7870 ins_encode %{ 7871 int vlen_enc = Assembler::AVX_512bit; 7872 BasicType elem_bt = Matcher::vector_element_basic_type(this); 7873 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 7874 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7875 %} 7876 ins_pipe( pipe_slow ); 7877 %} 7878 7879 7880 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 7881 predicate(n->in(2)->bottom_type()->isa_vectmask() && 7882 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 7883 VM_Version::supports_avx512bw())); 7884 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7885 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 7886 ins_encode %{ 7887 int vlen_enc = vector_length_encoding(this); 7888 BasicType elem_bt = Matcher::vector_element_basic_type(this); 7889 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7890 %} 7891 ins_pipe( pipe_slow ); 7892 %} 7893 7894 // --------------------------------- ABS -------------------------------------- 7895 // a = |a| 7896 instruct vabsB_reg(vec dst, vec src) %{ 7897 match(Set dst (AbsVB src)); 7898 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 7899 ins_encode %{ 7900 uint vlen = Matcher::vector_length(this); 7901 if (vlen <= 16) { 7902 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 7903 } else { 7904 int vlen_enc = vector_length_encoding(this); 7905 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7906 } 7907 %} 7908 ins_pipe( pipe_slow ); 7909 %} 7910 7911 instruct vabsS_reg(vec dst, vec src) %{ 7912 match(Set dst (AbsVS src)); 7913 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 7914 ins_encode %{ 7915 uint vlen = Matcher::vector_length(this); 7916 if (vlen <= 8) { 7917 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 7918 } else { 7919 int vlen_enc = vector_length_encoding(this); 7920 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7921 } 7922 %} 7923 ins_pipe( pipe_slow ); 7924 %} 7925 7926 instruct vabsI_reg(vec dst, vec src) %{ 7927 match(Set dst (AbsVI src)); 7928 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 7929 ins_encode %{ 7930 uint vlen = Matcher::vector_length(this); 7931 if (vlen <= 4) { 7932 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 7933 } else { 7934 int vlen_enc = vector_length_encoding(this); 7935 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7936 } 7937 %} 7938 ins_pipe( pipe_slow ); 7939 %} 7940 7941 instruct vabsL_reg(vec dst, vec src) %{ 7942 match(Set dst (AbsVL src)); 7943 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 7944 ins_encode %{ 7945 assert(UseAVX > 2, "required"); 7946 int vlen_enc = vector_length_encoding(this); 7947 if (!VM_Version::supports_avx512vl()) { 7948 vlen_enc = Assembler::AVX_512bit; 7949 } 7950 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7951 %} 7952 ins_pipe( pipe_slow ); 7953 %} 7954 7955 // --------------------------------- ABSNEG -------------------------------------- 7956 7957 instruct vabsnegF(vec dst, vec src) %{ 7958 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 7959 match(Set dst (AbsVF src)); 7960 match(Set dst (NegVF src)); 7961 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 7962 ins_cost(150); 7963 ins_encode %{ 7964 int opcode = this->ideal_Opcode(); 7965 int vlen = Matcher::vector_length(this); 7966 if (vlen == 2) { 7967 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 7968 } else { 7969 assert(vlen == 8 || vlen == 16, "required"); 7970 int vlen_enc = vector_length_encoding(this); 7971 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7972 } 7973 %} 7974 ins_pipe( pipe_slow ); 7975 %} 7976 7977 instruct vabsneg4F(vec dst) %{ 7978 predicate(Matcher::vector_length(n) == 4); 7979 match(Set dst (AbsVF dst)); 7980 match(Set dst (NegVF dst)); 7981 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 7982 ins_cost(150); 7983 ins_encode %{ 7984 int opcode = this->ideal_Opcode(); 7985 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 7986 %} 7987 ins_pipe( pipe_slow ); 7988 %} 7989 7990 instruct vabsnegD(vec dst, vec src) %{ 7991 match(Set dst (AbsVD src)); 7992 match(Set dst (NegVD src)); 7993 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 7994 ins_encode %{ 7995 int opcode = this->ideal_Opcode(); 7996 uint vlen = Matcher::vector_length(this); 7997 if (vlen == 2) { 7998 assert(UseSSE >= 2, "required"); 7999 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8000 } else { 8001 int vlen_enc = vector_length_encoding(this); 8002 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8003 } 8004 %} 8005 ins_pipe( pipe_slow ); 8006 %} 8007 8008 //------------------------------------- VectorTest -------------------------------------------- 8009 8010 #ifdef _LP64 8011 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8012 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8013 match(Set cr (VectorTest src1 src2)); 8014 effect(TEMP vtmp); 8015 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8016 ins_encode %{ 8017 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8018 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8019 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8020 %} 8021 ins_pipe( pipe_slow ); 8022 %} 8023 8024 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8025 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8026 match(Set cr (VectorTest src1 src2)); 8027 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8028 ins_encode %{ 8029 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8030 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8031 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8032 %} 8033 ins_pipe( pipe_slow ); 8034 %} 8035 8036 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8037 predicate((Matcher::vector_length(n->in(1)) < 8 || 8038 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8039 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8040 match(Set cr (VectorTest src1 src2)); 8041 effect(TEMP tmp); 8042 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8043 ins_encode %{ 8044 uint masklen = Matcher::vector_length(this, $src1); 8045 __ kmovwl($tmp$$Register, $src1$$KRegister); 8046 __ andl($tmp$$Register, (1 << masklen) - 1); 8047 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8048 %} 8049 ins_pipe( pipe_slow ); 8050 %} 8051 8052 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8053 predicate((Matcher::vector_length(n->in(1)) < 8 || 8054 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8055 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8056 match(Set cr (VectorTest src1 src2)); 8057 effect(TEMP tmp); 8058 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8059 ins_encode %{ 8060 uint masklen = Matcher::vector_length(this, $src1); 8061 __ kmovwl($tmp$$Register, $src1$$KRegister); 8062 __ andl($tmp$$Register, (1 << masklen) - 1); 8063 %} 8064 ins_pipe( pipe_slow ); 8065 %} 8066 8067 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8068 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8069 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8070 match(Set cr (VectorTest src1 src2)); 8071 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8072 ins_encode %{ 8073 uint masklen = Matcher::vector_length(this, $src1); 8074 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8075 %} 8076 ins_pipe( pipe_slow ); 8077 %} 8078 #endif 8079 8080 //------------------------------------- LoadMask -------------------------------------------- 8081 8082 instruct loadMask(legVec dst, legVec src) %{ 8083 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8084 match(Set dst (VectorLoadMask src)); 8085 effect(TEMP dst); 8086 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8087 ins_encode %{ 8088 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8089 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8090 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8091 %} 8092 ins_pipe( pipe_slow ); 8093 %} 8094 8095 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8096 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8097 match(Set dst (VectorLoadMask src)); 8098 effect(TEMP xtmp); 8099 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8100 ins_encode %{ 8101 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8102 true, Assembler::AVX_512bit); 8103 %} 8104 ins_pipe( pipe_slow ); 8105 %} 8106 8107 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8108 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8109 match(Set dst (VectorLoadMask src)); 8110 effect(TEMP xtmp); 8111 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8112 ins_encode %{ 8113 int vlen_enc = vector_length_encoding(in(1)); 8114 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8115 false, vlen_enc); 8116 %} 8117 ins_pipe( pipe_slow ); 8118 %} 8119 8120 //------------------------------------- StoreMask -------------------------------------------- 8121 8122 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8123 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8124 match(Set dst (VectorStoreMask src size)); 8125 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8126 ins_encode %{ 8127 int vlen = Matcher::vector_length(this); 8128 if (vlen <= 16 && UseAVX <= 2) { 8129 assert(UseSSE >= 3, "required"); 8130 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8131 } else { 8132 assert(UseAVX > 0, "required"); 8133 int src_vlen_enc = vector_length_encoding(this, $src); 8134 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8135 } 8136 %} 8137 ins_pipe( pipe_slow ); 8138 %} 8139 8140 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8141 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8142 match(Set dst (VectorStoreMask src size)); 8143 effect(TEMP_DEF dst, TEMP xtmp); 8144 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8145 ins_encode %{ 8146 int vlen_enc = Assembler::AVX_128bit; 8147 int vlen = Matcher::vector_length(this); 8148 if (vlen <= 8) { 8149 assert(UseSSE >= 3, "required"); 8150 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8151 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8152 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8153 } else { 8154 assert(UseAVX > 0, "required"); 8155 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8156 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8157 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8158 } 8159 %} 8160 ins_pipe( pipe_slow ); 8161 %} 8162 8163 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8164 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8165 match(Set dst (VectorStoreMask src size)); 8166 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8167 effect(TEMP_DEF dst, TEMP xtmp); 8168 ins_encode %{ 8169 int vlen_enc = Assembler::AVX_128bit; 8170 int vlen = Matcher::vector_length(this); 8171 if (vlen <= 4) { 8172 assert(UseSSE >= 3, "required"); 8173 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8174 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8175 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8176 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8177 } else { 8178 assert(UseAVX > 0, "required"); 8179 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8180 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8181 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8182 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8183 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8184 } 8185 %} 8186 ins_pipe( pipe_slow ); 8187 %} 8188 8189 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8190 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8191 match(Set dst (VectorStoreMask src size)); 8192 effect(TEMP_DEF dst, TEMP xtmp); 8193 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8194 ins_encode %{ 8195 assert(UseSSE >= 3, "required"); 8196 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8197 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8198 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8199 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8200 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8201 %} 8202 ins_pipe( pipe_slow ); 8203 %} 8204 8205 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8206 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8207 match(Set dst (VectorStoreMask src size)); 8208 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8209 effect(TEMP_DEF dst, TEMP vtmp); 8210 ins_encode %{ 8211 int vlen_enc = Assembler::AVX_128bit; 8212 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8213 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8214 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8215 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8216 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8217 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8218 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8219 %} 8220 ins_pipe( pipe_slow ); 8221 %} 8222 8223 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8224 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8225 match(Set dst (VectorStoreMask src size)); 8226 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8227 ins_encode %{ 8228 int src_vlen_enc = vector_length_encoding(this, $src); 8229 int dst_vlen_enc = vector_length_encoding(this); 8230 if (!VM_Version::supports_avx512vl()) { 8231 src_vlen_enc = Assembler::AVX_512bit; 8232 } 8233 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8234 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8235 %} 8236 ins_pipe( pipe_slow ); 8237 %} 8238 8239 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8240 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8241 match(Set dst (VectorStoreMask src size)); 8242 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8243 ins_encode %{ 8244 int src_vlen_enc = vector_length_encoding(this, $src); 8245 int dst_vlen_enc = vector_length_encoding(this); 8246 if (!VM_Version::supports_avx512vl()) { 8247 src_vlen_enc = Assembler::AVX_512bit; 8248 } 8249 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8250 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8251 %} 8252 ins_pipe( pipe_slow ); 8253 %} 8254 8255 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8256 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8257 match(Set dst (VectorStoreMask mask size)); 8258 effect(TEMP_DEF dst); 8259 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8260 ins_encode %{ 8261 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8262 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8263 false, Assembler::AVX_512bit, noreg); 8264 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8265 %} 8266 ins_pipe( pipe_slow ); 8267 %} 8268 8269 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8270 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8271 match(Set dst (VectorStoreMask mask size)); 8272 effect(TEMP_DEF dst); 8273 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8274 ins_encode %{ 8275 int dst_vlen_enc = vector_length_encoding(this); 8276 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8277 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8278 %} 8279 ins_pipe( pipe_slow ); 8280 %} 8281 8282 instruct vmaskcast_evex(kReg dst) %{ 8283 match(Set dst (VectorMaskCast dst)); 8284 ins_cost(0); 8285 format %{ "vector_mask_cast $dst" %} 8286 ins_encode %{ 8287 // empty 8288 %} 8289 ins_pipe(empty); 8290 %} 8291 8292 instruct vmaskcast(vec dst) %{ 8293 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8294 match(Set dst (VectorMaskCast dst)); 8295 ins_cost(0); 8296 format %{ "vector_mask_cast $dst" %} 8297 ins_encode %{ 8298 // empty 8299 %} 8300 ins_pipe(empty); 8301 %} 8302 8303 instruct vmaskcast_avx(vec dst, vec src) %{ 8304 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8305 match(Set dst (VectorMaskCast src)); 8306 format %{ "vector_mask_cast $dst, $src" %} 8307 ins_encode %{ 8308 int vlen = Matcher::vector_length(this); 8309 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8310 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8311 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8312 %} 8313 ins_pipe(pipe_slow); 8314 %} 8315 8316 //-------------------------------- Load Iota Indices ---------------------------------- 8317 8318 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8319 match(Set dst (VectorLoadConst src)); 8320 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8321 ins_encode %{ 8322 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8323 BasicType bt = Matcher::vector_element_basic_type(this); 8324 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8325 %} 8326 ins_pipe( pipe_slow ); 8327 %} 8328 8329 #ifdef _LP64 8330 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8331 match(Set dst (PopulateIndex src1 src2)); 8332 effect(TEMP dst, TEMP vtmp); 8333 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8334 ins_encode %{ 8335 assert($src2$$constant == 1, "required"); 8336 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8337 int vlen_enc = vector_length_encoding(this); 8338 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8339 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8340 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8341 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8342 %} 8343 ins_pipe( pipe_slow ); 8344 %} 8345 8346 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8347 match(Set dst (PopulateIndex src1 src2)); 8348 effect(TEMP dst, TEMP vtmp); 8349 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8350 ins_encode %{ 8351 assert($src2$$constant == 1, "required"); 8352 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8353 int vlen_enc = vector_length_encoding(this); 8354 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8355 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8356 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8357 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8358 %} 8359 ins_pipe( pipe_slow ); 8360 %} 8361 #endif 8362 //-------------------------------- Rearrange ---------------------------------- 8363 8364 // LoadShuffle/Rearrange for Byte 8365 8366 instruct loadShuffleB(vec dst) %{ 8367 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 8368 match(Set dst (VectorLoadShuffle dst)); 8369 format %{ "vector_load_shuffle $dst, $dst" %} 8370 ins_encode %{ 8371 // empty 8372 %} 8373 ins_pipe( pipe_slow ); 8374 %} 8375 8376 instruct rearrangeB(vec dst, vec shuffle) %{ 8377 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8378 Matcher::vector_length(n) < 32); 8379 match(Set dst (VectorRearrange dst shuffle)); 8380 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8381 ins_encode %{ 8382 assert(UseSSE >= 4, "required"); 8383 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8384 %} 8385 ins_pipe( pipe_slow ); 8386 %} 8387 8388 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8389 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8390 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8391 match(Set dst (VectorRearrange src shuffle)); 8392 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8393 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8394 ins_encode %{ 8395 assert(UseAVX >= 2, "required"); 8396 // Swap src into vtmp1 8397 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8398 // Shuffle swapped src to get entries from other 128 bit lane 8399 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8400 // Shuffle original src to get entries from self 128 bit lane 8401 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8402 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8403 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8404 // Perform the blend 8405 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8406 %} 8407 ins_pipe( pipe_slow ); 8408 %} 8409 8410 8411 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8412 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8413 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8414 match(Set dst (VectorRearrange src shuffle)); 8415 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8416 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8417 ins_encode %{ 8418 int vlen_enc = vector_length_encoding(this); 8419 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8420 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8421 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8422 %} 8423 ins_pipe( pipe_slow ); 8424 %} 8425 8426 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8427 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8428 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8429 match(Set dst (VectorRearrange src shuffle)); 8430 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8431 ins_encode %{ 8432 int vlen_enc = vector_length_encoding(this); 8433 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8434 %} 8435 ins_pipe( pipe_slow ); 8436 %} 8437 8438 // LoadShuffle/Rearrange for Short 8439 8440 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8441 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8442 Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS 8443 match(Set dst (VectorLoadShuffle src)); 8444 effect(TEMP dst, TEMP vtmp); 8445 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8446 ins_encode %{ 8447 // Create a byte shuffle mask from short shuffle mask 8448 // only byte shuffle instruction available on these platforms 8449 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8450 if (UseAVX == 0) { 8451 assert(vlen_in_bytes <= 16, "required"); 8452 // Multiply each shuffle by two to get byte index 8453 __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); 8454 __ psllw($vtmp$$XMMRegister, 1); 8455 8456 // Duplicate to create 2 copies of byte index 8457 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8458 __ psllw($dst$$XMMRegister, 8); 8459 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8460 8461 // Add one to get alternate byte index 8462 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8463 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8464 } else { 8465 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8466 int vlen_enc = vector_length_encoding(this); 8467 // Multiply each shuffle by two to get byte index 8468 __ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8469 __ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8470 8471 // Duplicate to create 2 copies of byte index 8472 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8473 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8474 8475 // Add one to get alternate byte index 8476 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 8477 } 8478 %} 8479 ins_pipe( pipe_slow ); 8480 %} 8481 8482 instruct rearrangeS(vec dst, vec shuffle) %{ 8483 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8484 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8485 match(Set dst (VectorRearrange dst shuffle)); 8486 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8487 ins_encode %{ 8488 assert(UseSSE >= 4, "required"); 8489 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8490 %} 8491 ins_pipe( pipe_slow ); 8492 %} 8493 8494 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8495 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8496 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8497 match(Set dst (VectorRearrange src shuffle)); 8498 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8499 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8500 ins_encode %{ 8501 assert(UseAVX >= 2, "required"); 8502 // Swap src into vtmp1 8503 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8504 // Shuffle swapped src to get entries from other 128 bit lane 8505 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8506 // Shuffle original src to get entries from self 128 bit lane 8507 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8508 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8509 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8510 // Perform the blend 8511 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8512 %} 8513 ins_pipe( pipe_slow ); 8514 %} 8515 8516 instruct loadShuffleS_evex(vec dst, vec src) %{ 8517 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8518 VM_Version::supports_avx512bw()); 8519 match(Set dst (VectorLoadShuffle src)); 8520 format %{ "vector_load_shuffle $dst, $src" %} 8521 ins_encode %{ 8522 int vlen_enc = vector_length_encoding(this); 8523 if (!VM_Version::supports_avx512vl()) { 8524 vlen_enc = Assembler::AVX_512bit; 8525 } 8526 __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8527 %} 8528 ins_pipe( pipe_slow ); 8529 %} 8530 8531 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8532 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8533 VM_Version::supports_avx512bw()); 8534 match(Set dst (VectorRearrange src shuffle)); 8535 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8536 ins_encode %{ 8537 int vlen_enc = vector_length_encoding(this); 8538 if (!VM_Version::supports_avx512vl()) { 8539 vlen_enc = Assembler::AVX_512bit; 8540 } 8541 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8542 %} 8543 ins_pipe( pipe_slow ); 8544 %} 8545 8546 // LoadShuffle/Rearrange for Integer and Float 8547 8548 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 8549 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8550 Matcher::vector_length(n) == 4 && UseAVX == 0); 8551 match(Set dst (VectorLoadShuffle src)); 8552 effect(TEMP dst, TEMP vtmp); 8553 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8554 ins_encode %{ 8555 assert(UseSSE >= 4, "required"); 8556 8557 // Create a byte shuffle mask from int shuffle mask 8558 // only byte shuffle instruction available on these platforms 8559 8560 // Duplicate and multiply each shuffle by 4 8561 __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); 8562 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8563 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8564 __ psllw($vtmp$$XMMRegister, 2); 8565 8566 // Duplicate again to create 4 copies of byte index 8567 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8568 __ psllw($dst$$XMMRegister, 8); 8569 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 8570 8571 // Add 3,2,1,0 to get alternate byte index 8572 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 8573 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8574 %} 8575 ins_pipe( pipe_slow ); 8576 %} 8577 8578 instruct rearrangeI(vec dst, vec shuffle) %{ 8579 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8580 UseAVX == 0); 8581 match(Set dst (VectorRearrange dst shuffle)); 8582 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8583 ins_encode %{ 8584 assert(UseSSE >= 4, "required"); 8585 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8586 %} 8587 ins_pipe( pipe_slow ); 8588 %} 8589 8590 instruct loadShuffleI_avx(vec dst, vec src) %{ 8591 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8592 UseAVX > 0); 8593 match(Set dst (VectorLoadShuffle src)); 8594 format %{ "vector_load_shuffle $dst, $src" %} 8595 ins_encode %{ 8596 int vlen_enc = vector_length_encoding(this); 8597 __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8598 %} 8599 ins_pipe( pipe_slow ); 8600 %} 8601 8602 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 8603 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8604 UseAVX > 0); 8605 match(Set dst (VectorRearrange src shuffle)); 8606 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8607 ins_encode %{ 8608 int vlen_enc = vector_length_encoding(this); 8609 BasicType bt = Matcher::vector_element_basic_type(this); 8610 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8611 %} 8612 ins_pipe( pipe_slow ); 8613 %} 8614 8615 // LoadShuffle/Rearrange for Long and Double 8616 8617 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 8618 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8619 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8620 match(Set dst (VectorLoadShuffle src)); 8621 effect(TEMP dst, TEMP vtmp); 8622 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8623 ins_encode %{ 8624 assert(UseAVX >= 2, "required"); 8625 8626 int vlen_enc = vector_length_encoding(this); 8627 // Create a double word shuffle mask from long shuffle mask 8628 // only double word shuffle instruction available on these platforms 8629 8630 // Multiply each shuffle by two to get double word index 8631 __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8632 __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8633 8634 // Duplicate each double word shuffle 8635 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 8636 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8637 8638 // Add one to get alternate double word index 8639 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 8640 %} 8641 ins_pipe( pipe_slow ); 8642 %} 8643 8644 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 8645 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8646 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8647 match(Set dst (VectorRearrange src shuffle)); 8648 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8649 ins_encode %{ 8650 assert(UseAVX >= 2, "required"); 8651 8652 int vlen_enc = vector_length_encoding(this); 8653 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8654 %} 8655 ins_pipe( pipe_slow ); 8656 %} 8657 8658 instruct loadShuffleL_evex(vec dst, vec src) %{ 8659 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8660 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8661 match(Set dst (VectorLoadShuffle src)); 8662 format %{ "vector_load_shuffle $dst, $src" %} 8663 ins_encode %{ 8664 assert(UseAVX > 2, "required"); 8665 8666 int vlen_enc = vector_length_encoding(this); 8667 __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8668 %} 8669 ins_pipe( pipe_slow ); 8670 %} 8671 8672 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 8673 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8674 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8675 match(Set dst (VectorRearrange src shuffle)); 8676 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8677 ins_encode %{ 8678 assert(UseAVX > 2, "required"); 8679 8680 int vlen_enc = vector_length_encoding(this); 8681 if (vlen_enc == Assembler::AVX_128bit) { 8682 vlen_enc = Assembler::AVX_256bit; 8683 } 8684 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8685 %} 8686 ins_pipe( pipe_slow ); 8687 %} 8688 8689 // --------------------------------- FMA -------------------------------------- 8690 // a * b + c 8691 8692 instruct vfmaF_reg(vec a, vec b, vec c) %{ 8693 match(Set c (FmaVF c (Binary a b))); 8694 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8695 ins_cost(150); 8696 ins_encode %{ 8697 assert(UseFMA, "not enabled"); 8698 int vlen_enc = vector_length_encoding(this); 8699 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8700 %} 8701 ins_pipe( pipe_slow ); 8702 %} 8703 8704 instruct vfmaF_mem(vec a, memory b, vec c) %{ 8705 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8706 match(Set c (FmaVF c (Binary a (LoadVector b)))); 8707 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8708 ins_cost(150); 8709 ins_encode %{ 8710 assert(UseFMA, "not enabled"); 8711 int vlen_enc = vector_length_encoding(this); 8712 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8713 %} 8714 ins_pipe( pipe_slow ); 8715 %} 8716 8717 instruct vfmaD_reg(vec a, vec b, vec c) %{ 8718 match(Set c (FmaVD c (Binary a b))); 8719 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8720 ins_cost(150); 8721 ins_encode %{ 8722 assert(UseFMA, "not enabled"); 8723 int vlen_enc = vector_length_encoding(this); 8724 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8725 %} 8726 ins_pipe( pipe_slow ); 8727 %} 8728 8729 instruct vfmaD_mem(vec a, memory b, vec c) %{ 8730 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8731 match(Set c (FmaVD c (Binary a (LoadVector b)))); 8732 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8733 ins_cost(150); 8734 ins_encode %{ 8735 assert(UseFMA, "not enabled"); 8736 int vlen_enc = vector_length_encoding(this); 8737 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8738 %} 8739 ins_pipe( pipe_slow ); 8740 %} 8741 8742 // --------------------------------- Vector Multiply Add -------------------------------------- 8743 8744 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 8745 predicate(UseAVX == 0); 8746 match(Set dst (MulAddVS2VI dst src1)); 8747 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 8748 ins_encode %{ 8749 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 8750 %} 8751 ins_pipe( pipe_slow ); 8752 %} 8753 8754 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 8755 predicate(UseAVX > 0); 8756 match(Set dst (MulAddVS2VI src1 src2)); 8757 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 8758 ins_encode %{ 8759 int vlen_enc = vector_length_encoding(this); 8760 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8761 %} 8762 ins_pipe( pipe_slow ); 8763 %} 8764 8765 // --------------------------------- Vector Multiply Add Add ---------------------------------- 8766 8767 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 8768 predicate(VM_Version::supports_avx512_vnni()); 8769 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 8770 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 8771 ins_encode %{ 8772 assert(UseAVX > 2, "required"); 8773 int vlen_enc = vector_length_encoding(this); 8774 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8775 %} 8776 ins_pipe( pipe_slow ); 8777 ins_cost(10); 8778 %} 8779 8780 // --------------------------------- PopCount -------------------------------------- 8781 8782 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 8783 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8784 match(Set dst (PopCountVI src)); 8785 match(Set dst (PopCountVL src)); 8786 format %{ "vector_popcount_integral $dst, $src" %} 8787 ins_encode %{ 8788 int opcode = this->ideal_Opcode(); 8789 int vlen_enc = vector_length_encoding(this, $src); 8790 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8791 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 8792 %} 8793 ins_pipe( pipe_slow ); 8794 %} 8795 8796 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 8797 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8798 match(Set dst (PopCountVI src mask)); 8799 match(Set dst (PopCountVL src mask)); 8800 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 8801 ins_encode %{ 8802 int vlen_enc = vector_length_encoding(this, $src); 8803 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8804 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8805 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 8806 %} 8807 ins_pipe( pipe_slow ); 8808 %} 8809 8810 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 8811 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8812 match(Set dst (PopCountVI src)); 8813 match(Set dst (PopCountVL src)); 8814 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 8815 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 8816 ins_encode %{ 8817 int opcode = this->ideal_Opcode(); 8818 int vlen_enc = vector_length_encoding(this, $src); 8819 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8820 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8821 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 8822 %} 8823 ins_pipe( pipe_slow ); 8824 %} 8825 8826 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 8827 8828 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 8829 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 8830 Matcher::vector_length_in_bytes(n->in(1)))); 8831 match(Set dst (CountTrailingZerosV src)); 8832 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 8833 ins_cost(400); 8834 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 8835 ins_encode %{ 8836 int vlen_enc = vector_length_encoding(this, $src); 8837 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8838 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 8839 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 8840 %} 8841 ins_pipe( pipe_slow ); 8842 %} 8843 8844 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 8845 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 8846 VM_Version::supports_avx512cd() && 8847 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 8848 match(Set dst (CountTrailingZerosV src)); 8849 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 8850 ins_cost(400); 8851 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 8852 ins_encode %{ 8853 int vlen_enc = vector_length_encoding(this, $src); 8854 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8855 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8856 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 8857 %} 8858 ins_pipe( pipe_slow ); 8859 %} 8860 8861 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 8862 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 8863 match(Set dst (CountTrailingZerosV src)); 8864 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 8865 ins_cost(400); 8866 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 8867 ins_encode %{ 8868 int vlen_enc = vector_length_encoding(this, $src); 8869 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8870 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8871 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 8872 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 8873 %} 8874 ins_pipe( pipe_slow ); 8875 %} 8876 8877 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 8878 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 8879 match(Set dst (CountTrailingZerosV src)); 8880 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 8881 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 8882 ins_encode %{ 8883 int vlen_enc = vector_length_encoding(this, $src); 8884 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8885 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8886 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 8887 %} 8888 ins_pipe( pipe_slow ); 8889 %} 8890 8891 8892 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 8893 8894 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 8895 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 8896 effect(TEMP dst); 8897 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8898 ins_encode %{ 8899 int vector_len = vector_length_encoding(this); 8900 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 8901 %} 8902 ins_pipe( pipe_slow ); 8903 %} 8904 8905 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 8906 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 8907 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 8908 effect(TEMP dst); 8909 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8910 ins_encode %{ 8911 int vector_len = vector_length_encoding(this); 8912 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 8913 %} 8914 ins_pipe( pipe_slow ); 8915 %} 8916 8917 // --------------------------------- Rotation Operations ---------------------------------- 8918 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 8919 match(Set dst (RotateLeftV src shift)); 8920 match(Set dst (RotateRightV src shift)); 8921 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 8922 ins_encode %{ 8923 int opcode = this->ideal_Opcode(); 8924 int vector_len = vector_length_encoding(this); 8925 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 8926 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 8927 %} 8928 ins_pipe( pipe_slow ); 8929 %} 8930 8931 instruct vprorate(vec dst, vec src, vec shift) %{ 8932 match(Set dst (RotateLeftV src shift)); 8933 match(Set dst (RotateRightV src shift)); 8934 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 8935 ins_encode %{ 8936 int opcode = this->ideal_Opcode(); 8937 int vector_len = vector_length_encoding(this); 8938 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 8939 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8940 %} 8941 ins_pipe( pipe_slow ); 8942 %} 8943 8944 // ---------------------------------- Masked Operations ------------------------------------ 8945 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 8946 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 8947 match(Set dst (LoadVectorMasked mem mask)); 8948 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 8949 ins_encode %{ 8950 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 8951 int vlen_enc = vector_length_encoding(this); 8952 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 8953 %} 8954 ins_pipe( pipe_slow ); 8955 %} 8956 8957 8958 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 8959 predicate(n->in(3)->bottom_type()->isa_vectmask()); 8960 match(Set dst (LoadVectorMasked mem mask)); 8961 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 8962 ins_encode %{ 8963 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 8964 int vector_len = vector_length_encoding(this); 8965 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 8966 %} 8967 ins_pipe( pipe_slow ); 8968 %} 8969 8970 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 8971 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 8972 match(Set mem (StoreVectorMasked mem (Binary src mask))); 8973 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 8974 ins_encode %{ 8975 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 8976 int vlen_enc = vector_length_encoding(src_node); 8977 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 8978 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8979 %} 8980 ins_pipe( pipe_slow ); 8981 %} 8982 8983 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 8984 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 8985 match(Set mem (StoreVectorMasked mem (Binary src mask))); 8986 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 8987 ins_encode %{ 8988 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 8989 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 8990 int vlen_enc = vector_length_encoding(src_node); 8991 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 8992 %} 8993 ins_pipe( pipe_slow ); 8994 %} 8995 8996 #ifdef _LP64 8997 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{ 8998 match(Set addr (VerifyVectorAlignment addr mask)); 8999 effect(KILL cr); 9000 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %} 9001 ins_encode %{ 9002 Label Lskip; 9003 // check if masked bits of addr are zero 9004 __ testq($addr$$Register, $mask$$constant); 9005 __ jccb(Assembler::equal, Lskip); 9006 __ stop("verify_vector_alignment found a misaligned vector memory access"); 9007 __ bind(Lskip); 9008 %} 9009 ins_pipe(pipe_slow); 9010 %} 9011 9012 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9013 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9014 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9015 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9016 ins_encode %{ 9017 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9018 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9019 9020 Label DONE; 9021 int vlen_enc = vector_length_encoding(this, $src1); 9022 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9023 9024 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9025 __ mov64($dst$$Register, -1L); 9026 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9027 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9028 __ jccb(Assembler::carrySet, DONE); 9029 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9030 __ notq($dst$$Register); 9031 __ tzcntq($dst$$Register, $dst$$Register); 9032 __ bind(DONE); 9033 %} 9034 ins_pipe( pipe_slow ); 9035 %} 9036 9037 9038 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9039 match(Set dst (VectorMaskGen len)); 9040 effect(TEMP temp, KILL cr); 9041 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9042 ins_encode %{ 9043 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9044 %} 9045 ins_pipe( pipe_slow ); 9046 %} 9047 9048 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9049 match(Set dst (VectorMaskGen len)); 9050 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9051 effect(TEMP temp); 9052 ins_encode %{ 9053 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9054 __ kmovql($dst$$KRegister, $temp$$Register); 9055 %} 9056 ins_pipe( pipe_slow ); 9057 %} 9058 9059 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9060 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9061 match(Set dst (VectorMaskToLong mask)); 9062 effect(TEMP dst, KILL cr); 9063 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9064 ins_encode %{ 9065 int opcode = this->ideal_Opcode(); 9066 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9067 int mask_len = Matcher::vector_length(this, $mask); 9068 int mask_size = mask_len * type2aelembytes(mbt); 9069 int vlen_enc = vector_length_encoding(this, $mask); 9070 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9071 $dst$$Register, mask_len, mask_size, vlen_enc); 9072 %} 9073 ins_pipe( pipe_slow ); 9074 %} 9075 9076 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9077 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9078 match(Set dst (VectorMaskToLong mask)); 9079 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9080 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9081 ins_encode %{ 9082 int opcode = this->ideal_Opcode(); 9083 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9084 int mask_len = Matcher::vector_length(this, $mask); 9085 int vlen_enc = vector_length_encoding(this, $mask); 9086 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9087 $dst$$Register, mask_len, mbt, vlen_enc); 9088 %} 9089 ins_pipe( pipe_slow ); 9090 %} 9091 9092 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9093 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9094 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9095 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9096 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9097 ins_encode %{ 9098 int opcode = this->ideal_Opcode(); 9099 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9100 int mask_len = Matcher::vector_length(this, $mask); 9101 int vlen_enc = vector_length_encoding(this, $mask); 9102 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9103 $dst$$Register, mask_len, mbt, vlen_enc); 9104 %} 9105 ins_pipe( pipe_slow ); 9106 %} 9107 9108 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9109 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9110 match(Set dst (VectorMaskTrueCount mask)); 9111 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9112 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9113 ins_encode %{ 9114 int opcode = this->ideal_Opcode(); 9115 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9116 int mask_len = Matcher::vector_length(this, $mask); 9117 int mask_size = mask_len * type2aelembytes(mbt); 9118 int vlen_enc = vector_length_encoding(this, $mask); 9119 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9120 $tmp$$Register, mask_len, mask_size, vlen_enc); 9121 %} 9122 ins_pipe( pipe_slow ); 9123 %} 9124 9125 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9126 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9127 match(Set dst (VectorMaskTrueCount mask)); 9128 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9129 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9130 ins_encode %{ 9131 int opcode = this->ideal_Opcode(); 9132 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9133 int mask_len = Matcher::vector_length(this, $mask); 9134 int vlen_enc = vector_length_encoding(this, $mask); 9135 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9136 $tmp$$Register, mask_len, mbt, vlen_enc); 9137 %} 9138 ins_pipe( pipe_slow ); 9139 %} 9140 9141 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9142 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9143 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9144 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9145 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9146 ins_encode %{ 9147 int opcode = this->ideal_Opcode(); 9148 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9149 int mask_len = Matcher::vector_length(this, $mask); 9150 int vlen_enc = vector_length_encoding(this, $mask); 9151 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9152 $tmp$$Register, mask_len, mbt, vlen_enc); 9153 %} 9154 ins_pipe( pipe_slow ); 9155 %} 9156 9157 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9158 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9159 match(Set dst (VectorMaskFirstTrue mask)); 9160 match(Set dst (VectorMaskLastTrue mask)); 9161 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9162 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9163 ins_encode %{ 9164 int opcode = this->ideal_Opcode(); 9165 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9166 int mask_len = Matcher::vector_length(this, $mask); 9167 int mask_size = mask_len * type2aelembytes(mbt); 9168 int vlen_enc = vector_length_encoding(this, $mask); 9169 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9170 $tmp$$Register, mask_len, mask_size, vlen_enc); 9171 %} 9172 ins_pipe( pipe_slow ); 9173 %} 9174 9175 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9176 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9177 match(Set dst (VectorMaskFirstTrue mask)); 9178 match(Set dst (VectorMaskLastTrue mask)); 9179 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9180 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9181 ins_encode %{ 9182 int opcode = this->ideal_Opcode(); 9183 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9184 int mask_len = Matcher::vector_length(this, $mask); 9185 int vlen_enc = vector_length_encoding(this, $mask); 9186 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9187 $tmp$$Register, mask_len, mbt, vlen_enc); 9188 %} 9189 ins_pipe( pipe_slow ); 9190 %} 9191 9192 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9193 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9194 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9195 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9196 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9197 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9198 ins_encode %{ 9199 int opcode = this->ideal_Opcode(); 9200 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9201 int mask_len = Matcher::vector_length(this, $mask); 9202 int vlen_enc = vector_length_encoding(this, $mask); 9203 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9204 $tmp$$Register, mask_len, mbt, vlen_enc); 9205 %} 9206 ins_pipe( pipe_slow ); 9207 %} 9208 9209 // --------------------------------- Compress/Expand Operations --------------------------- 9210 #ifdef _LP64 9211 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{ 9212 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 9213 match(Set dst (CompressV src mask)); 9214 match(Set dst (ExpandV src mask)); 9215 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr); 9216 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %} 9217 ins_encode %{ 9218 int opcode = this->ideal_Opcode(); 9219 int vlen_enc = vector_length_encoding(this); 9220 BasicType bt = Matcher::vector_element_basic_type(this); 9221 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register, 9222 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc); 9223 %} 9224 ins_pipe( pipe_slow ); 9225 %} 9226 #endif 9227 9228 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9229 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 9230 match(Set dst (CompressV src mask)); 9231 match(Set dst (ExpandV src mask)); 9232 format %{ "vector_compress_expand $dst, $src, $mask" %} 9233 ins_encode %{ 9234 int opcode = this->ideal_Opcode(); 9235 int vector_len = vector_length_encoding(this); 9236 BasicType bt = Matcher::vector_element_basic_type(this); 9237 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9238 %} 9239 ins_pipe( pipe_slow ); 9240 %} 9241 9242 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9243 match(Set dst (CompressM mask)); 9244 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9245 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9246 ins_encode %{ 9247 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9248 int mask_len = Matcher::vector_length(this); 9249 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9250 %} 9251 ins_pipe( pipe_slow ); 9252 %} 9253 9254 #endif // _LP64 9255 9256 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9257 9258 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9259 predicate(!VM_Version::supports_gfni()); 9260 match(Set dst (ReverseV src)); 9261 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9262 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9263 ins_encode %{ 9264 int vec_enc = vector_length_encoding(this); 9265 BasicType bt = Matcher::vector_element_basic_type(this); 9266 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9267 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9268 %} 9269 ins_pipe( pipe_slow ); 9270 %} 9271 9272 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9273 predicate(VM_Version::supports_gfni()); 9274 match(Set dst (ReverseV src)); 9275 effect(TEMP dst, TEMP xtmp); 9276 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9277 ins_encode %{ 9278 int vec_enc = vector_length_encoding(this); 9279 BasicType bt = Matcher::vector_element_basic_type(this); 9280 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1)); 9281 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9282 $xtmp$$XMMRegister); 9283 %} 9284 ins_pipe( pipe_slow ); 9285 %} 9286 9287 instruct vreverse_byte_reg(vec dst, vec src) %{ 9288 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9289 match(Set dst (ReverseBytesV src)); 9290 effect(TEMP dst); 9291 format %{ "vector_reverse_byte $dst, $src" %} 9292 ins_encode %{ 9293 int vec_enc = vector_length_encoding(this); 9294 BasicType bt = Matcher::vector_element_basic_type(this); 9295 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9296 %} 9297 ins_pipe( pipe_slow ); 9298 %} 9299 9300 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9301 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9302 match(Set dst (ReverseBytesV src)); 9303 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9304 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9305 ins_encode %{ 9306 int vec_enc = vector_length_encoding(this); 9307 BasicType bt = Matcher::vector_element_basic_type(this); 9308 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9309 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9310 %} 9311 ins_pipe( pipe_slow ); 9312 %} 9313 9314 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9315 9316 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9317 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9318 Matcher::vector_length_in_bytes(n->in(1)))); 9319 match(Set dst (CountLeadingZerosV src)); 9320 format %{ "vector_count_leading_zeros $dst, $src" %} 9321 ins_encode %{ 9322 int vlen_enc = vector_length_encoding(this, $src); 9323 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9324 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9325 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9326 %} 9327 ins_pipe( pipe_slow ); 9328 %} 9329 9330 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9331 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9332 Matcher::vector_length_in_bytes(n->in(1)))); 9333 match(Set dst (CountLeadingZerosV src mask)); 9334 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9335 ins_encode %{ 9336 int vlen_enc = vector_length_encoding(this, $src); 9337 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9338 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9339 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9340 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9341 %} 9342 ins_pipe( pipe_slow ); 9343 %} 9344 9345 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9346 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9347 VM_Version::supports_avx512cd() && 9348 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9349 match(Set dst (CountLeadingZerosV src)); 9350 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9351 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9352 ins_encode %{ 9353 int vlen_enc = vector_length_encoding(this, $src); 9354 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9355 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9356 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9357 %} 9358 ins_pipe( pipe_slow ); 9359 %} 9360 9361 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9362 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9363 match(Set dst (CountLeadingZerosV src)); 9364 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9365 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9366 ins_encode %{ 9367 int vlen_enc = vector_length_encoding(this, $src); 9368 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9369 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9370 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9371 $rtmp$$Register, true, vlen_enc); 9372 %} 9373 ins_pipe( pipe_slow ); 9374 %} 9375 9376 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9377 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9378 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9379 match(Set dst (CountLeadingZerosV src)); 9380 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9381 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9382 ins_encode %{ 9383 int vlen_enc = vector_length_encoding(this, $src); 9384 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9385 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9386 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9387 %} 9388 ins_pipe( pipe_slow ); 9389 %} 9390 9391 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9392 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9393 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9394 match(Set dst (CountLeadingZerosV src)); 9395 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9396 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9397 ins_encode %{ 9398 int vlen_enc = vector_length_encoding(this, $src); 9399 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9400 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9401 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9402 %} 9403 ins_pipe( pipe_slow ); 9404 %} 9405 9406 // ---------------------------------- Vector Masked Operations ------------------------------------ 9407 9408 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9409 match(Set dst (AddVB (Binary dst src2) mask)); 9410 match(Set dst (AddVS (Binary dst src2) mask)); 9411 match(Set dst (AddVI (Binary dst src2) mask)); 9412 match(Set dst (AddVL (Binary dst src2) mask)); 9413 match(Set dst (AddVF (Binary dst src2) mask)); 9414 match(Set dst (AddVD (Binary dst src2) mask)); 9415 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9416 ins_encode %{ 9417 int vlen_enc = vector_length_encoding(this); 9418 BasicType bt = Matcher::vector_element_basic_type(this); 9419 int opc = this->ideal_Opcode(); 9420 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9421 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9422 %} 9423 ins_pipe( pipe_slow ); 9424 %} 9425 9426 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9427 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9428 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9429 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9430 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9431 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9432 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9433 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9434 ins_encode %{ 9435 int vlen_enc = vector_length_encoding(this); 9436 BasicType bt = Matcher::vector_element_basic_type(this); 9437 int opc = this->ideal_Opcode(); 9438 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9439 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9440 %} 9441 ins_pipe( pipe_slow ); 9442 %} 9443 9444 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9445 match(Set dst (XorV (Binary dst src2) mask)); 9446 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9447 ins_encode %{ 9448 int vlen_enc = vector_length_encoding(this); 9449 BasicType bt = Matcher::vector_element_basic_type(this); 9450 int opc = this->ideal_Opcode(); 9451 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9452 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9453 %} 9454 ins_pipe( pipe_slow ); 9455 %} 9456 9457 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9458 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9459 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9460 ins_encode %{ 9461 int vlen_enc = vector_length_encoding(this); 9462 BasicType bt = Matcher::vector_element_basic_type(this); 9463 int opc = this->ideal_Opcode(); 9464 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9465 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9466 %} 9467 ins_pipe( pipe_slow ); 9468 %} 9469 9470 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9471 match(Set dst (OrV (Binary dst src2) mask)); 9472 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9473 ins_encode %{ 9474 int vlen_enc = vector_length_encoding(this); 9475 BasicType bt = Matcher::vector_element_basic_type(this); 9476 int opc = this->ideal_Opcode(); 9477 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9478 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9479 %} 9480 ins_pipe( pipe_slow ); 9481 %} 9482 9483 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9484 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9485 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9486 ins_encode %{ 9487 int vlen_enc = vector_length_encoding(this); 9488 BasicType bt = Matcher::vector_element_basic_type(this); 9489 int opc = this->ideal_Opcode(); 9490 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9491 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9492 %} 9493 ins_pipe( pipe_slow ); 9494 %} 9495 9496 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9497 match(Set dst (AndV (Binary dst src2) mask)); 9498 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9499 ins_encode %{ 9500 int vlen_enc = vector_length_encoding(this); 9501 BasicType bt = Matcher::vector_element_basic_type(this); 9502 int opc = this->ideal_Opcode(); 9503 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9504 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9505 %} 9506 ins_pipe( pipe_slow ); 9507 %} 9508 9509 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9510 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9511 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9512 ins_encode %{ 9513 int vlen_enc = vector_length_encoding(this); 9514 BasicType bt = Matcher::vector_element_basic_type(this); 9515 int opc = this->ideal_Opcode(); 9516 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9517 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9518 %} 9519 ins_pipe( pipe_slow ); 9520 %} 9521 9522 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9523 match(Set dst (SubVB (Binary dst src2) mask)); 9524 match(Set dst (SubVS (Binary dst src2) mask)); 9525 match(Set dst (SubVI (Binary dst src2) mask)); 9526 match(Set dst (SubVL (Binary dst src2) mask)); 9527 match(Set dst (SubVF (Binary dst src2) mask)); 9528 match(Set dst (SubVD (Binary dst src2) mask)); 9529 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9530 ins_encode %{ 9531 int vlen_enc = vector_length_encoding(this); 9532 BasicType bt = Matcher::vector_element_basic_type(this); 9533 int opc = this->ideal_Opcode(); 9534 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9535 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9536 %} 9537 ins_pipe( pipe_slow ); 9538 %} 9539 9540 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9541 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9542 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9543 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9544 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9545 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9546 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9547 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9548 ins_encode %{ 9549 int vlen_enc = vector_length_encoding(this); 9550 BasicType bt = Matcher::vector_element_basic_type(this); 9551 int opc = this->ideal_Opcode(); 9552 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9553 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9554 %} 9555 ins_pipe( pipe_slow ); 9556 %} 9557 9558 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9559 match(Set dst (MulVS (Binary dst src2) mask)); 9560 match(Set dst (MulVI (Binary dst src2) mask)); 9561 match(Set dst (MulVL (Binary dst src2) mask)); 9562 match(Set dst (MulVF (Binary dst src2) mask)); 9563 match(Set dst (MulVD (Binary dst src2) mask)); 9564 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9565 ins_encode %{ 9566 int vlen_enc = vector_length_encoding(this); 9567 BasicType bt = Matcher::vector_element_basic_type(this); 9568 int opc = this->ideal_Opcode(); 9569 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9570 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9571 %} 9572 ins_pipe( pipe_slow ); 9573 %} 9574 9575 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 9576 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 9577 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 9578 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 9579 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 9580 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 9581 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9582 ins_encode %{ 9583 int vlen_enc = vector_length_encoding(this); 9584 BasicType bt = Matcher::vector_element_basic_type(this); 9585 int opc = this->ideal_Opcode(); 9586 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9587 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9588 %} 9589 ins_pipe( pipe_slow ); 9590 %} 9591 9592 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 9593 match(Set dst (SqrtVF dst mask)); 9594 match(Set dst (SqrtVD dst mask)); 9595 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 9596 ins_encode %{ 9597 int vlen_enc = vector_length_encoding(this); 9598 BasicType bt = Matcher::vector_element_basic_type(this); 9599 int opc = this->ideal_Opcode(); 9600 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9601 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9602 %} 9603 ins_pipe( pipe_slow ); 9604 %} 9605 9606 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 9607 match(Set dst (DivVF (Binary dst src2) mask)); 9608 match(Set dst (DivVD (Binary dst src2) mask)); 9609 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9610 ins_encode %{ 9611 int vlen_enc = vector_length_encoding(this); 9612 BasicType bt = Matcher::vector_element_basic_type(this); 9613 int opc = this->ideal_Opcode(); 9614 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9615 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9616 %} 9617 ins_pipe( pipe_slow ); 9618 %} 9619 9620 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 9621 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 9622 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 9623 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9624 ins_encode %{ 9625 int vlen_enc = vector_length_encoding(this); 9626 BasicType bt = Matcher::vector_element_basic_type(this); 9627 int opc = this->ideal_Opcode(); 9628 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9629 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9630 %} 9631 ins_pipe( pipe_slow ); 9632 %} 9633 9634 9635 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9636 match(Set dst (RotateLeftV (Binary dst shift) mask)); 9637 match(Set dst (RotateRightV (Binary dst shift) mask)); 9638 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 9639 ins_encode %{ 9640 int vlen_enc = vector_length_encoding(this); 9641 BasicType bt = Matcher::vector_element_basic_type(this); 9642 int opc = this->ideal_Opcode(); 9643 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9644 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9645 %} 9646 ins_pipe( pipe_slow ); 9647 %} 9648 9649 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 9650 match(Set dst (RotateLeftV (Binary dst src2) mask)); 9651 match(Set dst (RotateRightV (Binary dst src2) mask)); 9652 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 9653 ins_encode %{ 9654 int vlen_enc = vector_length_encoding(this); 9655 BasicType bt = Matcher::vector_element_basic_type(this); 9656 int opc = this->ideal_Opcode(); 9657 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9658 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9659 %} 9660 ins_pipe( pipe_slow ); 9661 %} 9662 9663 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9664 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 9665 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 9666 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 9667 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 9668 ins_encode %{ 9669 int vlen_enc = vector_length_encoding(this); 9670 BasicType bt = Matcher::vector_element_basic_type(this); 9671 int opc = this->ideal_Opcode(); 9672 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9673 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9674 %} 9675 ins_pipe( pipe_slow ); 9676 %} 9677 9678 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9679 predicate(!n->as_ShiftV()->is_var_shift()); 9680 match(Set dst (LShiftVS (Binary dst src2) mask)); 9681 match(Set dst (LShiftVI (Binary dst src2) mask)); 9682 match(Set dst (LShiftVL (Binary dst src2) mask)); 9683 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9684 ins_encode %{ 9685 int vlen_enc = vector_length_encoding(this); 9686 BasicType bt = Matcher::vector_element_basic_type(this); 9687 int opc = this->ideal_Opcode(); 9688 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9689 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9690 %} 9691 ins_pipe( pipe_slow ); 9692 %} 9693 9694 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9695 predicate(n->as_ShiftV()->is_var_shift()); 9696 match(Set dst (LShiftVS (Binary dst src2) mask)); 9697 match(Set dst (LShiftVI (Binary dst src2) mask)); 9698 match(Set dst (LShiftVL (Binary dst src2) mask)); 9699 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9700 ins_encode %{ 9701 int vlen_enc = vector_length_encoding(this); 9702 BasicType bt = Matcher::vector_element_basic_type(this); 9703 int opc = this->ideal_Opcode(); 9704 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9705 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9706 %} 9707 ins_pipe( pipe_slow ); 9708 %} 9709 9710 instruct vlshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9711 match(Set dst (LShiftVS (Binary dst (LoadVector src2)) mask)); 9712 match(Set dst (LShiftVI (Binary dst (LoadVector src2)) mask)); 9713 match(Set dst (LShiftVL (Binary dst (LoadVector src2)) mask)); 9714 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9715 ins_encode %{ 9716 int vlen_enc = vector_length_encoding(this); 9717 BasicType bt = Matcher::vector_element_basic_type(this); 9718 int opc = this->ideal_Opcode(); 9719 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9720 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9721 %} 9722 ins_pipe( pipe_slow ); 9723 %} 9724 9725 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9726 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 9727 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 9728 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 9729 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 9730 ins_encode %{ 9731 int vlen_enc = vector_length_encoding(this); 9732 BasicType bt = Matcher::vector_element_basic_type(this); 9733 int opc = this->ideal_Opcode(); 9734 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9735 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9736 %} 9737 ins_pipe( pipe_slow ); 9738 %} 9739 9740 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9741 predicate(!n->as_ShiftV()->is_var_shift()); 9742 match(Set dst (RShiftVS (Binary dst src2) mask)); 9743 match(Set dst (RShiftVI (Binary dst src2) mask)); 9744 match(Set dst (RShiftVL (Binary dst src2) mask)); 9745 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9746 ins_encode %{ 9747 int vlen_enc = vector_length_encoding(this); 9748 BasicType bt = Matcher::vector_element_basic_type(this); 9749 int opc = this->ideal_Opcode(); 9750 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9751 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9752 %} 9753 ins_pipe( pipe_slow ); 9754 %} 9755 9756 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9757 predicate(n->as_ShiftV()->is_var_shift()); 9758 match(Set dst (RShiftVS (Binary dst src2) mask)); 9759 match(Set dst (RShiftVI (Binary dst src2) mask)); 9760 match(Set dst (RShiftVL (Binary dst src2) mask)); 9761 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9762 ins_encode %{ 9763 int vlen_enc = vector_length_encoding(this); 9764 BasicType bt = Matcher::vector_element_basic_type(this); 9765 int opc = this->ideal_Opcode(); 9766 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9767 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9768 %} 9769 ins_pipe( pipe_slow ); 9770 %} 9771 9772 instruct vrshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9773 match(Set dst (RShiftVS (Binary dst (LoadVector src2)) mask)); 9774 match(Set dst (RShiftVI (Binary dst (LoadVector src2)) mask)); 9775 match(Set dst (RShiftVL (Binary dst (LoadVector src2)) mask)); 9776 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9777 ins_encode %{ 9778 int vlen_enc = vector_length_encoding(this); 9779 BasicType bt = Matcher::vector_element_basic_type(this); 9780 int opc = this->ideal_Opcode(); 9781 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9782 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9783 %} 9784 ins_pipe( pipe_slow ); 9785 %} 9786 9787 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9788 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 9789 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 9790 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 9791 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 9792 ins_encode %{ 9793 int vlen_enc = vector_length_encoding(this); 9794 BasicType bt = Matcher::vector_element_basic_type(this); 9795 int opc = this->ideal_Opcode(); 9796 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9797 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9798 %} 9799 ins_pipe( pipe_slow ); 9800 %} 9801 9802 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9803 predicate(!n->as_ShiftV()->is_var_shift()); 9804 match(Set dst (URShiftVS (Binary dst src2) mask)); 9805 match(Set dst (URShiftVI (Binary dst src2) mask)); 9806 match(Set dst (URShiftVL (Binary dst src2) mask)); 9807 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9808 ins_encode %{ 9809 int vlen_enc = vector_length_encoding(this); 9810 BasicType bt = Matcher::vector_element_basic_type(this); 9811 int opc = this->ideal_Opcode(); 9812 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9813 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9814 %} 9815 ins_pipe( pipe_slow ); 9816 %} 9817 9818 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9819 predicate(n->as_ShiftV()->is_var_shift()); 9820 match(Set dst (URShiftVS (Binary dst src2) mask)); 9821 match(Set dst (URShiftVI (Binary dst src2) mask)); 9822 match(Set dst (URShiftVL (Binary dst src2) mask)); 9823 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9824 ins_encode %{ 9825 int vlen_enc = vector_length_encoding(this); 9826 BasicType bt = Matcher::vector_element_basic_type(this); 9827 int opc = this->ideal_Opcode(); 9828 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9829 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9830 %} 9831 ins_pipe( pipe_slow ); 9832 %} 9833 9834 instruct vurshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9835 match(Set dst (URShiftVS (Binary dst (LoadVector src2)) mask)); 9836 match(Set dst (URShiftVI (Binary dst (LoadVector src2)) mask)); 9837 match(Set dst (URShiftVL (Binary dst (LoadVector src2)) mask)); 9838 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9839 ins_encode %{ 9840 int vlen_enc = vector_length_encoding(this); 9841 BasicType bt = Matcher::vector_element_basic_type(this); 9842 int opc = this->ideal_Opcode(); 9843 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9844 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9845 %} 9846 ins_pipe( pipe_slow ); 9847 %} 9848 9849 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 9850 match(Set dst (MaxV (Binary dst src2) mask)); 9851 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 9852 ins_encode %{ 9853 int vlen_enc = vector_length_encoding(this); 9854 BasicType bt = Matcher::vector_element_basic_type(this); 9855 int opc = this->ideal_Opcode(); 9856 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9857 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9858 %} 9859 ins_pipe( pipe_slow ); 9860 %} 9861 9862 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 9863 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 9864 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 9865 ins_encode %{ 9866 int vlen_enc = vector_length_encoding(this); 9867 BasicType bt = Matcher::vector_element_basic_type(this); 9868 int opc = this->ideal_Opcode(); 9869 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9870 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9871 %} 9872 ins_pipe( pipe_slow ); 9873 %} 9874 9875 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 9876 match(Set dst (MinV (Binary dst src2) mask)); 9877 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 9878 ins_encode %{ 9879 int vlen_enc = vector_length_encoding(this); 9880 BasicType bt = Matcher::vector_element_basic_type(this); 9881 int opc = this->ideal_Opcode(); 9882 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9883 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9884 %} 9885 ins_pipe( pipe_slow ); 9886 %} 9887 9888 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 9889 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 9890 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 9891 ins_encode %{ 9892 int vlen_enc = vector_length_encoding(this); 9893 BasicType bt = Matcher::vector_element_basic_type(this); 9894 int opc = this->ideal_Opcode(); 9895 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9896 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9897 %} 9898 ins_pipe( pipe_slow ); 9899 %} 9900 9901 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 9902 match(Set dst (VectorRearrange (Binary dst src2) mask)); 9903 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 9904 ins_encode %{ 9905 int vlen_enc = vector_length_encoding(this); 9906 BasicType bt = Matcher::vector_element_basic_type(this); 9907 int opc = this->ideal_Opcode(); 9908 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9909 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 9910 %} 9911 ins_pipe( pipe_slow ); 9912 %} 9913 9914 instruct vabs_masked(vec dst, kReg mask) %{ 9915 match(Set dst (AbsVB dst mask)); 9916 match(Set dst (AbsVS dst mask)); 9917 match(Set dst (AbsVI dst mask)); 9918 match(Set dst (AbsVL dst mask)); 9919 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 9920 ins_encode %{ 9921 int vlen_enc = vector_length_encoding(this); 9922 BasicType bt = Matcher::vector_element_basic_type(this); 9923 int opc = this->ideal_Opcode(); 9924 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9925 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9926 %} 9927 ins_pipe( pipe_slow ); 9928 %} 9929 9930 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 9931 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 9932 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 9933 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 9934 ins_encode %{ 9935 assert(UseFMA, "Needs FMA instructions support."); 9936 int vlen_enc = vector_length_encoding(this); 9937 BasicType bt = Matcher::vector_element_basic_type(this); 9938 int opc = this->ideal_Opcode(); 9939 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9940 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 9941 %} 9942 ins_pipe( pipe_slow ); 9943 %} 9944 9945 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 9946 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 9947 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 9948 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 9949 ins_encode %{ 9950 assert(UseFMA, "Needs FMA instructions support."); 9951 int vlen_enc = vector_length_encoding(this); 9952 BasicType bt = Matcher::vector_element_basic_type(this); 9953 int opc = this->ideal_Opcode(); 9954 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9955 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 9956 %} 9957 ins_pipe( pipe_slow ); 9958 %} 9959 9960 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 9961 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 9962 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 9963 ins_encode %{ 9964 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 9965 int vlen_enc = vector_length_encoding(this, $src1); 9966 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 9967 9968 // Comparison i 9969 switch (src1_elem_bt) { 9970 case T_BYTE: { 9971 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 9972 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9973 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9974 break; 9975 } 9976 case T_SHORT: { 9977 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 9978 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9979 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9980 break; 9981 } 9982 case T_INT: { 9983 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 9984 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9985 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9986 break; 9987 } 9988 case T_LONG: { 9989 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 9990 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9991 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9992 break; 9993 } 9994 case T_FLOAT: { 9995 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 9996 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 9997 break; 9998 } 9999 case T_DOUBLE: { 10000 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10001 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10002 break; 10003 } 10004 default: assert(false, "%s", type2name(src1_elem_bt)); break; 10005 } 10006 %} 10007 ins_pipe( pipe_slow ); 10008 %} 10009 10010 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 10011 predicate(Matcher::vector_length(n) <= 32); 10012 match(Set dst (MaskAll src)); 10013 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 10014 ins_encode %{ 10015 int mask_len = Matcher::vector_length(this); 10016 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 10017 %} 10018 ins_pipe( pipe_slow ); 10019 %} 10020 10021 #ifdef _LP64 10022 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10023 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10024 match(Set dst (XorVMask src (MaskAll cnt))); 10025 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10026 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10027 ins_encode %{ 10028 uint masklen = Matcher::vector_length(this); 10029 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10030 %} 10031 ins_pipe( pipe_slow ); 10032 %} 10033 10034 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10035 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10036 (Matcher::vector_length(n) == 16) || 10037 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10038 match(Set dst (XorVMask src (MaskAll cnt))); 10039 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10040 ins_encode %{ 10041 uint masklen = Matcher::vector_length(this); 10042 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10043 %} 10044 ins_pipe( pipe_slow ); 10045 %} 10046 10047 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10048 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10049 match(Set dst (VectorLongToMask src)); 10050 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10051 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10052 ins_encode %{ 10053 int mask_len = Matcher::vector_length(this); 10054 int vec_enc = vector_length_encoding(mask_len); 10055 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10056 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10057 %} 10058 ins_pipe( pipe_slow ); 10059 %} 10060 10061 10062 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10063 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10064 match(Set dst (VectorLongToMask src)); 10065 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10066 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10067 ins_encode %{ 10068 int mask_len = Matcher::vector_length(this); 10069 assert(mask_len <= 32, "invalid mask length"); 10070 int vec_enc = vector_length_encoding(mask_len); 10071 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10072 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10073 %} 10074 ins_pipe( pipe_slow ); 10075 %} 10076 10077 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10078 predicate(n->bottom_type()->isa_vectmask()); 10079 match(Set dst (VectorLongToMask src)); 10080 format %{ "long_to_mask_evex $dst, $src\t!" %} 10081 ins_encode %{ 10082 __ kmov($dst$$KRegister, $src$$Register); 10083 %} 10084 ins_pipe( pipe_slow ); 10085 %} 10086 #endif 10087 10088 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10089 match(Set dst (AndVMask src1 src2)); 10090 match(Set dst (OrVMask src1 src2)); 10091 match(Set dst (XorVMask src1 src2)); 10092 effect(TEMP kscratch); 10093 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10094 ins_encode %{ 10095 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10096 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10097 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 10098 uint masklen = Matcher::vector_length(this); 10099 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10100 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10101 %} 10102 ins_pipe( pipe_slow ); 10103 %} 10104 10105 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10106 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10107 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10108 ins_encode %{ 10109 int vlen_enc = vector_length_encoding(this); 10110 BasicType bt = Matcher::vector_element_basic_type(this); 10111 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10112 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10113 %} 10114 ins_pipe( pipe_slow ); 10115 %} 10116 10117 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10118 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10119 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10120 ins_encode %{ 10121 int vlen_enc = vector_length_encoding(this); 10122 BasicType bt = Matcher::vector_element_basic_type(this); 10123 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10124 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10125 %} 10126 ins_pipe( pipe_slow ); 10127 %} 10128 10129 instruct castMM(kReg dst) 10130 %{ 10131 match(Set dst (CastVV dst)); 10132 10133 size(0); 10134 format %{ "# castVV of $dst" %} 10135 ins_encode(/* empty encoding */); 10136 ins_cost(0); 10137 ins_pipe(empty); 10138 %} 10139 10140 instruct castVV(vec dst) 10141 %{ 10142 match(Set dst (CastVV dst)); 10143 10144 size(0); 10145 format %{ "# castVV of $dst" %} 10146 ins_encode(/* empty encoding */); 10147 ins_cost(0); 10148 ins_pipe(empty); 10149 %} 10150 10151 instruct castVVLeg(legVec dst) 10152 %{ 10153 match(Set dst (CastVV dst)); 10154 10155 size(0); 10156 format %{ "# castVV of $dst" %} 10157 ins_encode(/* empty encoding */); 10158 ins_cost(0); 10159 ins_pipe(empty); 10160 %} 10161 10162 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10163 %{ 10164 match(Set dst (IsInfiniteF src)); 10165 effect(TEMP ktmp, KILL cr); 10166 format %{ "float_class_check $dst, $src" %} 10167 ins_encode %{ 10168 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10169 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10170 %} 10171 ins_pipe(pipe_slow); 10172 %} 10173 10174 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10175 %{ 10176 match(Set dst (IsInfiniteD src)); 10177 effect(TEMP ktmp, KILL cr); 10178 format %{ "double_class_check $dst, $src" %} 10179 ins_encode %{ 10180 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10181 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10182 %} 10183 ins_pipe(pipe_slow); 10184 %}