1 // 2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 // AVX3 Mask Registers. 632 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 633 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 634 635 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 636 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 637 638 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 639 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 640 641 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 642 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 643 644 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 645 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 646 647 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 648 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 649 650 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 651 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 652 653 654 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 655 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 656 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 657 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 658 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 659 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 660 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 661 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 662 #ifdef _LP64 663 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 664 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 665 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 666 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 667 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 668 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 669 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 670 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 671 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 672 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 673 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 674 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 675 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 676 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 677 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 678 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 679 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 680 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 681 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 682 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 683 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 684 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 685 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 686 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 687 #endif 688 ); 689 690 alloc_class chunk2(K7, K7_H, 691 K6, K6_H, 692 K5, K5_H, 693 K4, K4_H, 694 K3, K3_H, 695 K2, K2_H, 696 K1, K1_H); 697 698 reg_class vectmask_reg(K1, K1_H, 699 K2, K2_H, 700 K3, K3_H, 701 K4, K4_H, 702 K5, K5_H, 703 K6, K6_H, 704 K7, K7_H); 705 706 reg_class vectmask_reg_K1(K1, K1_H); 707 reg_class vectmask_reg_K2(K2, K2_H); 708 reg_class vectmask_reg_K3(K3, K3_H); 709 reg_class vectmask_reg_K4(K4, K4_H); 710 reg_class vectmask_reg_K5(K5, K5_H); 711 reg_class vectmask_reg_K6(K6, K6_H); 712 reg_class vectmask_reg_K7(K7, K7_H); 713 714 // flags allocation class should be last. 715 alloc_class chunk3(RFLAGS); 716 717 718 // Singleton class for condition codes 719 reg_class int_flags(RFLAGS); 720 721 // Class for pre evex float registers 722 reg_class float_reg_legacy(XMM0, 723 XMM1, 724 XMM2, 725 XMM3, 726 XMM4, 727 XMM5, 728 XMM6, 729 XMM7 730 #ifdef _LP64 731 ,XMM8, 732 XMM9, 733 XMM10, 734 XMM11, 735 XMM12, 736 XMM13, 737 XMM14, 738 XMM15 739 #endif 740 ); 741 742 // Class for evex float registers 743 reg_class float_reg_evex(XMM0, 744 XMM1, 745 XMM2, 746 XMM3, 747 XMM4, 748 XMM5, 749 XMM6, 750 XMM7 751 #ifdef _LP64 752 ,XMM8, 753 XMM9, 754 XMM10, 755 XMM11, 756 XMM12, 757 XMM13, 758 XMM14, 759 XMM15, 760 XMM16, 761 XMM17, 762 XMM18, 763 XMM19, 764 XMM20, 765 XMM21, 766 XMM22, 767 XMM23, 768 XMM24, 769 XMM25, 770 XMM26, 771 XMM27, 772 XMM28, 773 XMM29, 774 XMM30, 775 XMM31 776 #endif 777 ); 778 779 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 780 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 781 782 // Class for pre evex double registers 783 reg_class double_reg_legacy(XMM0, XMM0b, 784 XMM1, XMM1b, 785 XMM2, XMM2b, 786 XMM3, XMM3b, 787 XMM4, XMM4b, 788 XMM5, XMM5b, 789 XMM6, XMM6b, 790 XMM7, XMM7b 791 #ifdef _LP64 792 ,XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b 800 #endif 801 ); 802 803 // Class for evex double registers 804 reg_class double_reg_evex(XMM0, XMM0b, 805 XMM1, XMM1b, 806 XMM2, XMM2b, 807 XMM3, XMM3b, 808 XMM4, XMM4b, 809 XMM5, XMM5b, 810 XMM6, XMM6b, 811 XMM7, XMM7b 812 #ifdef _LP64 813 ,XMM8, XMM8b, 814 XMM9, XMM9b, 815 XMM10, XMM10b, 816 XMM11, XMM11b, 817 XMM12, XMM12b, 818 XMM13, XMM13b, 819 XMM14, XMM14b, 820 XMM15, XMM15b, 821 XMM16, XMM16b, 822 XMM17, XMM17b, 823 XMM18, XMM18b, 824 XMM19, XMM19b, 825 XMM20, XMM20b, 826 XMM21, XMM21b, 827 XMM22, XMM22b, 828 XMM23, XMM23b, 829 XMM24, XMM24b, 830 XMM25, XMM25b, 831 XMM26, XMM26b, 832 XMM27, XMM27b, 833 XMM28, XMM28b, 834 XMM29, XMM29b, 835 XMM30, XMM30b, 836 XMM31, XMM31b 837 #endif 838 ); 839 840 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 841 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 842 843 // Class for pre evex 32bit vector registers 844 reg_class vectors_reg_legacy(XMM0, 845 XMM1, 846 XMM2, 847 XMM3, 848 XMM4, 849 XMM5, 850 XMM6, 851 XMM7 852 #ifdef _LP64 853 ,XMM8, 854 XMM9, 855 XMM10, 856 XMM11, 857 XMM12, 858 XMM13, 859 XMM14, 860 XMM15 861 #endif 862 ); 863 864 // Class for evex 32bit vector registers 865 reg_class vectors_reg_evex(XMM0, 866 XMM1, 867 XMM2, 868 XMM3, 869 XMM4, 870 XMM5, 871 XMM6, 872 XMM7 873 #ifdef _LP64 874 ,XMM8, 875 XMM9, 876 XMM10, 877 XMM11, 878 XMM12, 879 XMM13, 880 XMM14, 881 XMM15, 882 XMM16, 883 XMM17, 884 XMM18, 885 XMM19, 886 XMM20, 887 XMM21, 888 XMM22, 889 XMM23, 890 XMM24, 891 XMM25, 892 XMM26, 893 XMM27, 894 XMM28, 895 XMM29, 896 XMM30, 897 XMM31 898 #endif 899 ); 900 901 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 902 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 903 904 // Class for all 64bit vector registers 905 reg_class vectord_reg_legacy(XMM0, XMM0b, 906 XMM1, XMM1b, 907 XMM2, XMM2b, 908 XMM3, XMM3b, 909 XMM4, XMM4b, 910 XMM5, XMM5b, 911 XMM6, XMM6b, 912 XMM7, XMM7b 913 #ifdef _LP64 914 ,XMM8, XMM8b, 915 XMM9, XMM9b, 916 XMM10, XMM10b, 917 XMM11, XMM11b, 918 XMM12, XMM12b, 919 XMM13, XMM13b, 920 XMM14, XMM14b, 921 XMM15, XMM15b 922 #endif 923 ); 924 925 // Class for all 64bit vector registers 926 reg_class vectord_reg_evex(XMM0, XMM0b, 927 XMM1, XMM1b, 928 XMM2, XMM2b, 929 XMM3, XMM3b, 930 XMM4, XMM4b, 931 XMM5, XMM5b, 932 XMM6, XMM6b, 933 XMM7, XMM7b 934 #ifdef _LP64 935 ,XMM8, XMM8b, 936 XMM9, XMM9b, 937 XMM10, XMM10b, 938 XMM11, XMM11b, 939 XMM12, XMM12b, 940 XMM13, XMM13b, 941 XMM14, XMM14b, 942 XMM15, XMM15b, 943 XMM16, XMM16b, 944 XMM17, XMM17b, 945 XMM18, XMM18b, 946 XMM19, XMM19b, 947 XMM20, XMM20b, 948 XMM21, XMM21b, 949 XMM22, XMM22b, 950 XMM23, XMM23b, 951 XMM24, XMM24b, 952 XMM25, XMM25b, 953 XMM26, XMM26b, 954 XMM27, XMM27b, 955 XMM28, XMM28b, 956 XMM29, XMM29b, 957 XMM30, XMM30b, 958 XMM31, XMM31b 959 #endif 960 ); 961 962 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 963 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 964 965 // Class for all 128bit vector registers 966 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 967 XMM1, XMM1b, XMM1c, XMM1d, 968 XMM2, XMM2b, XMM2c, XMM2d, 969 XMM3, XMM3b, XMM3c, XMM3d, 970 XMM4, XMM4b, XMM4c, XMM4d, 971 XMM5, XMM5b, XMM5c, XMM5d, 972 XMM6, XMM6b, XMM6c, XMM6d, 973 XMM7, XMM7b, XMM7c, XMM7d 974 #ifdef _LP64 975 ,XMM8, XMM8b, XMM8c, XMM8d, 976 XMM9, XMM9b, XMM9c, XMM9d, 977 XMM10, XMM10b, XMM10c, XMM10d, 978 XMM11, XMM11b, XMM11c, XMM11d, 979 XMM12, XMM12b, XMM12c, XMM12d, 980 XMM13, XMM13b, XMM13c, XMM13d, 981 XMM14, XMM14b, XMM14c, XMM14d, 982 XMM15, XMM15b, XMM15c, XMM15d 983 #endif 984 ); 985 986 // Class for all 128bit vector registers 987 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 988 XMM1, XMM1b, XMM1c, XMM1d, 989 XMM2, XMM2b, XMM2c, XMM2d, 990 XMM3, XMM3b, XMM3c, XMM3d, 991 XMM4, XMM4b, XMM4c, XMM4d, 992 XMM5, XMM5b, XMM5c, XMM5d, 993 XMM6, XMM6b, XMM6c, XMM6d, 994 XMM7, XMM7b, XMM7c, XMM7d 995 #ifdef _LP64 996 ,XMM8, XMM8b, XMM8c, XMM8d, 997 XMM9, XMM9b, XMM9c, XMM9d, 998 XMM10, XMM10b, XMM10c, XMM10d, 999 XMM11, XMM11b, XMM11c, XMM11d, 1000 XMM12, XMM12b, XMM12c, XMM12d, 1001 XMM13, XMM13b, XMM13c, XMM13d, 1002 XMM14, XMM14b, XMM14c, XMM14d, 1003 XMM15, XMM15b, XMM15c, XMM15d, 1004 XMM16, XMM16b, XMM16c, XMM16d, 1005 XMM17, XMM17b, XMM17c, XMM17d, 1006 XMM18, XMM18b, XMM18c, XMM18d, 1007 XMM19, XMM19b, XMM19c, XMM19d, 1008 XMM20, XMM20b, XMM20c, XMM20d, 1009 XMM21, XMM21b, XMM21c, XMM21d, 1010 XMM22, XMM22b, XMM22c, XMM22d, 1011 XMM23, XMM23b, XMM23c, XMM23d, 1012 XMM24, XMM24b, XMM24c, XMM24d, 1013 XMM25, XMM25b, XMM25c, XMM25d, 1014 XMM26, XMM26b, XMM26c, XMM26d, 1015 XMM27, XMM27b, XMM27c, XMM27d, 1016 XMM28, XMM28b, XMM28c, XMM28d, 1017 XMM29, XMM29b, XMM29c, XMM29d, 1018 XMM30, XMM30b, XMM30c, XMM30d, 1019 XMM31, XMM31b, XMM31c, XMM31d 1020 #endif 1021 ); 1022 1023 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1024 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1025 1026 // Class for all 256bit vector registers 1027 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1028 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1029 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1030 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1031 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1032 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1033 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1034 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1035 #ifdef _LP64 1036 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1037 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1038 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1039 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1040 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1041 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1042 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1043 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1044 #endif 1045 ); 1046 1047 // Class for all 256bit vector registers 1048 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1049 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1050 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1051 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1052 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1053 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1054 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1055 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1056 #ifdef _LP64 1057 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1058 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1059 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1060 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1061 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1062 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1063 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1064 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1065 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1066 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1067 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1068 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1069 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1070 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1071 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1072 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1073 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1074 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1075 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1076 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1077 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1078 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1079 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1080 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1081 #endif 1082 ); 1083 1084 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1085 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1086 1087 // Class for all 512bit vector registers 1088 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1089 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1090 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1091 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1092 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1093 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1094 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1095 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1096 #ifdef _LP64 1097 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1098 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1099 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1100 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1101 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1102 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1103 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1104 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1105 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1106 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1107 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1108 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1109 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1110 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1111 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1112 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1113 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1114 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1115 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1116 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1117 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1118 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1119 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1120 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1121 #endif 1122 ); 1123 1124 // Class for restricted 512bit vector registers 1125 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1126 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1127 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1128 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1129 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1130 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1131 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1132 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1133 #ifdef _LP64 1134 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1135 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1136 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1137 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1138 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1139 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1140 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1141 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1142 #endif 1143 ); 1144 1145 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1146 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1147 1148 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1149 %} 1150 1151 1152 //----------SOURCE BLOCK------------------------------------------------------- 1153 // This is a block of C++ code which provides values, functions, and 1154 // definitions necessary in the rest of the architecture description 1155 1156 source_hpp %{ 1157 // Header information of the source block. 1158 // Method declarations/definitions which are used outside 1159 // the ad-scope can conveniently be defined here. 1160 // 1161 // To keep related declarations/definitions/uses close together, 1162 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1163 1164 #include "runtime/vm_version.hpp" 1165 1166 class NativeJump; 1167 1168 class CallStubImpl { 1169 1170 //-------------------------------------------------------------- 1171 //---< Used for optimization in Compile::shorten_branches >--- 1172 //-------------------------------------------------------------- 1173 1174 public: 1175 // Size of call trampoline stub. 1176 static uint size_call_trampoline() { 1177 return 0; // no call trampolines on this platform 1178 } 1179 1180 // number of relocations needed by a call trampoline stub 1181 static uint reloc_call_trampoline() { 1182 return 0; // no call trampolines on this platform 1183 } 1184 }; 1185 1186 class HandlerImpl { 1187 1188 public: 1189 1190 static int emit_exception_handler(C2_MacroAssembler *masm); 1191 static int emit_deopt_handler(C2_MacroAssembler* masm); 1192 1193 static uint size_exception_handler() { 1194 // NativeCall instruction size is the same as NativeJump. 1195 // exception handler starts out as jump and can be patched to 1196 // a call be deoptimization. (4932387) 1197 // Note that this value is also credited (in output.cpp) to 1198 // the size of the code section. 1199 return NativeJump::instruction_size; 1200 } 1201 1202 #ifdef _LP64 1203 static uint size_deopt_handler() { 1204 // three 5 byte instructions plus one move for unreachable address. 1205 return 15+3; 1206 } 1207 #else 1208 static uint size_deopt_handler() { 1209 // NativeCall instruction size is the same as NativeJump. 1210 // exception handler starts out as jump and can be patched to 1211 // a call be deoptimization. (4932387) 1212 // Note that this value is also credited (in output.cpp) to 1213 // the size of the code section. 1214 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1215 } 1216 #endif 1217 }; 1218 1219 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1220 switch(bytes) { 1221 case 4: // fall-through 1222 case 8: // fall-through 1223 case 16: return Assembler::AVX_128bit; 1224 case 32: return Assembler::AVX_256bit; 1225 case 64: return Assembler::AVX_512bit; 1226 1227 default: { 1228 ShouldNotReachHere(); 1229 return Assembler::AVX_NoVec; 1230 } 1231 } 1232 } 1233 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1236 } 1237 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1239 uint def_idx = use->operand_index(opnd); 1240 Node* def = use->in(def_idx); 1241 return vector_length_encoding(def); 1242 } 1243 1244 static inline bool is_vector_popcount_predicate(BasicType bt) { 1245 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1246 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1247 } 1248 1249 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1250 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1251 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1252 } 1253 1254 class Node::PD { 1255 public: 1256 enum NodeFlags { 1257 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1258 Flag_sets_carry_flag = Node::_last_flag << 2, 1259 Flag_sets_parity_flag = Node::_last_flag << 3, 1260 Flag_sets_zero_flag = Node::_last_flag << 4, 1261 Flag_sets_overflow_flag = Node::_last_flag << 5, 1262 Flag_sets_sign_flag = Node::_last_flag << 6, 1263 Flag_clears_carry_flag = Node::_last_flag << 7, 1264 Flag_clears_parity_flag = Node::_last_flag << 8, 1265 Flag_clears_zero_flag = Node::_last_flag << 9, 1266 Flag_clears_overflow_flag = Node::_last_flag << 10, 1267 Flag_clears_sign_flag = Node::_last_flag << 11, 1268 _last_flag = Flag_clears_sign_flag 1269 }; 1270 }; 1271 1272 %} // end source_hpp 1273 1274 source %{ 1275 1276 #include "opto/addnode.hpp" 1277 #include "c2_intelJccErratum_x86.hpp" 1278 1279 void PhaseOutput::pd_perform_mach_node_analysis() { 1280 if (VM_Version::has_intel_jcc_erratum()) { 1281 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1282 _buf_sizes._code += extra_padding; 1283 } 1284 } 1285 1286 int MachNode::pd_alignment_required() const { 1287 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1288 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1289 return IntelJccErratum::largest_jcc_size() + 1; 1290 } else { 1291 return 1; 1292 } 1293 } 1294 1295 int MachNode::compute_padding(int current_offset) const { 1296 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1297 Compile* C = Compile::current(); 1298 PhaseOutput* output = C->output(); 1299 Block* block = output->block(); 1300 int index = output->index(); 1301 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1302 } else { 1303 return 0; 1304 } 1305 } 1306 1307 // Emit exception handler code. 1308 // Stuff framesize into a register and call a VM stub routine. 1309 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) { 1310 1311 // Note that the code buffer's insts_mark is always relative to insts. 1312 // That's why we must use the macroassembler to generate a handler. 1313 address base = __ start_a_stub(size_exception_handler()); 1314 if (base == nullptr) { 1315 ciEnv::current()->record_failure("CodeCache is full"); 1316 return 0; // CodeBuffer::expand failed 1317 } 1318 int offset = __ offset(); 1319 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1320 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1321 __ end_a_stub(); 1322 return offset; 1323 } 1324 1325 // Emit deopt handler code. 1326 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 address base = __ start_a_stub(size_deopt_handler()); 1331 if (base == nullptr) { 1332 ciEnv::current()->record_failure("CodeCache is full"); 1333 return 0; // CodeBuffer::expand failed 1334 } 1335 int offset = __ offset(); 1336 1337 #ifdef _LP64 1338 address the_pc = (address) __ pc(); 1339 Label next; 1340 // push a "the_pc" on the stack without destroying any registers 1341 // as they all may be live. 1342 1343 // push address of "next" 1344 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1345 __ bind(next); 1346 // adjust it so it matches "the_pc" 1347 __ subptr(Address(rsp, 0), __ offset() - offset); 1348 #else 1349 InternalAddress here(__ pc()); 1350 __ pushptr(here.addr(), noreg); 1351 #endif 1352 1353 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1354 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1355 __ end_a_stub(); 1356 return offset; 1357 } 1358 1359 static Assembler::Width widthForType(BasicType bt) { 1360 if (bt == T_BYTE) { 1361 return Assembler::B; 1362 } else if (bt == T_SHORT) { 1363 return Assembler::W; 1364 } else if (bt == T_INT) { 1365 return Assembler::D; 1366 } else { 1367 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1368 return Assembler::Q; 1369 } 1370 } 1371 1372 //============================================================================= 1373 1374 // Float masks come from different places depending on platform. 1375 #ifdef _LP64 1376 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1377 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1378 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1379 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1380 #else 1381 static address float_signmask() { return (address)float_signmask_pool; } 1382 static address float_signflip() { return (address)float_signflip_pool; } 1383 static address double_signmask() { return (address)double_signmask_pool; } 1384 static address double_signflip() { return (address)double_signflip_pool; } 1385 #endif 1386 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1387 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1388 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1389 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1390 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1391 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1392 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1393 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1394 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1395 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1396 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1397 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1398 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1399 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1400 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1401 1402 //============================================================================= 1403 bool Matcher::match_rule_supported(int opcode) { 1404 if (!has_match_rule(opcode)) { 1405 return false; // no match rule present 1406 } 1407 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1408 switch (opcode) { 1409 case Op_AbsVL: 1410 case Op_StoreVectorScatter: 1411 if (UseAVX < 3) { 1412 return false; 1413 } 1414 break; 1415 case Op_PopCountI: 1416 case Op_PopCountL: 1417 if (!UsePopCountInstruction) { 1418 return false; 1419 } 1420 break; 1421 case Op_PopCountVI: 1422 if (UseAVX < 2) { 1423 return false; 1424 } 1425 break; 1426 case Op_CompressV: 1427 case Op_ExpandV: 1428 case Op_PopCountVL: 1429 if (UseAVX < 2) { 1430 return false; 1431 } 1432 break; 1433 case Op_MulVI: 1434 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1435 return false; 1436 } 1437 break; 1438 case Op_MulVL: 1439 if (UseSSE < 4) { // only with SSE4_1 or AVX 1440 return false; 1441 } 1442 break; 1443 case Op_MulReductionVL: 1444 if (VM_Version::supports_avx512dq() == false) { 1445 return false; 1446 } 1447 break; 1448 case Op_AddReductionVL: 1449 if (UseSSE < 2) { // requires at least SSE2 1450 return false; 1451 } 1452 break; 1453 case Op_AbsVB: 1454 case Op_AbsVS: 1455 case Op_AbsVI: 1456 case Op_AddReductionVI: 1457 case Op_AndReductionV: 1458 case Op_OrReductionV: 1459 case Op_XorReductionV: 1460 if (UseSSE < 3) { // requires at least SSSE3 1461 return false; 1462 } 1463 break; 1464 case Op_AddHF: 1465 case Op_DivHF: 1466 case Op_FmaHF: 1467 case Op_MaxHF: 1468 case Op_MinHF: 1469 case Op_MulHF: 1470 case Op_ReinterpretS2HF: 1471 case Op_ReinterpretHF2S: 1472 case Op_SubHF: 1473 case Op_SqrtHF: 1474 if (!VM_Version::supports_avx512_fp16()) { 1475 return false; 1476 } 1477 break; 1478 case Op_VectorLoadShuffle: 1479 case Op_VectorRearrange: 1480 case Op_MulReductionVI: 1481 if (UseSSE < 4) { // requires at least SSE4 1482 return false; 1483 } 1484 break; 1485 case Op_IsInfiniteF: 1486 case Op_IsInfiniteD: 1487 if (!VM_Version::supports_avx512dq()) { 1488 return false; 1489 } 1490 break; 1491 case Op_SqrtVD: 1492 case Op_SqrtVF: 1493 case Op_VectorMaskCmp: 1494 case Op_VectorCastB2X: 1495 case Op_VectorCastS2X: 1496 case Op_VectorCastI2X: 1497 case Op_VectorCastL2X: 1498 case Op_VectorCastF2X: 1499 case Op_VectorCastD2X: 1500 case Op_VectorUCastB2X: 1501 case Op_VectorUCastS2X: 1502 case Op_VectorUCastI2X: 1503 case Op_VectorMaskCast: 1504 if (UseAVX < 1) { // enabled for AVX only 1505 return false; 1506 } 1507 break; 1508 case Op_PopulateIndex: 1509 if (!is_LP64 || (UseAVX < 2)) { 1510 return false; 1511 } 1512 break; 1513 case Op_RoundVF: 1514 if (UseAVX < 2) { // enabled for AVX2 only 1515 return false; 1516 } 1517 break; 1518 case Op_RoundVD: 1519 if (UseAVX < 3) { 1520 return false; // enabled for AVX3 only 1521 } 1522 break; 1523 case Op_CompareAndSwapL: 1524 #ifdef _LP64 1525 case Op_CompareAndSwapP: 1526 #endif 1527 break; 1528 case Op_StrIndexOf: 1529 if (!UseSSE42Intrinsics) { 1530 return false; 1531 } 1532 break; 1533 case Op_StrIndexOfChar: 1534 if (!UseSSE42Intrinsics) { 1535 return false; 1536 } 1537 break; 1538 case Op_OnSpinWait: 1539 if (VM_Version::supports_on_spin_wait() == false) { 1540 return false; 1541 } 1542 break; 1543 case Op_MulVB: 1544 case Op_LShiftVB: 1545 case Op_RShiftVB: 1546 case Op_URShiftVB: 1547 case Op_VectorInsert: 1548 case Op_VectorLoadMask: 1549 case Op_VectorStoreMask: 1550 case Op_VectorBlend: 1551 if (UseSSE < 4) { 1552 return false; 1553 } 1554 break; 1555 #ifdef _LP64 1556 case Op_MaxD: 1557 case Op_MaxF: 1558 case Op_MinD: 1559 case Op_MinF: 1560 if (UseAVX < 1) { // enabled for AVX only 1561 return false; 1562 } 1563 break; 1564 #endif 1565 case Op_CacheWB: 1566 case Op_CacheWBPreSync: 1567 case Op_CacheWBPostSync: 1568 if (!VM_Version::supports_data_cache_line_flush()) { 1569 return false; 1570 } 1571 break; 1572 case Op_ExtractB: 1573 case Op_ExtractL: 1574 case Op_ExtractI: 1575 case Op_RoundDoubleMode: 1576 if (UseSSE < 4) { 1577 return false; 1578 } 1579 break; 1580 case Op_RoundDoubleModeV: 1581 if (VM_Version::supports_avx() == false) { 1582 return false; // 128bit vroundpd is not available 1583 } 1584 break; 1585 case Op_LoadVectorGather: 1586 case Op_LoadVectorGatherMasked: 1587 if (UseAVX < 2) { 1588 return false; 1589 } 1590 break; 1591 case Op_FmaF: 1592 case Op_FmaD: 1593 case Op_FmaVD: 1594 case Op_FmaVF: 1595 if (!UseFMA) { 1596 return false; 1597 } 1598 break; 1599 case Op_MacroLogicV: 1600 if (UseAVX < 3 || !UseVectorMacroLogic) { 1601 return false; 1602 } 1603 break; 1604 1605 case Op_VectorCmpMasked: 1606 case Op_VectorMaskGen: 1607 if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { 1608 return false; 1609 } 1610 break; 1611 case Op_VectorMaskFirstTrue: 1612 case Op_VectorMaskLastTrue: 1613 case Op_VectorMaskTrueCount: 1614 case Op_VectorMaskToLong: 1615 if (!is_LP64 || UseAVX < 1) { 1616 return false; 1617 } 1618 break; 1619 case Op_RoundF: 1620 case Op_RoundD: 1621 if (!is_LP64) { 1622 return false; 1623 } 1624 break; 1625 case Op_CopySignD: 1626 case Op_CopySignF: 1627 if (UseAVX < 3 || !is_LP64) { 1628 return false; 1629 } 1630 if (!VM_Version::supports_avx512vl()) { 1631 return false; 1632 } 1633 break; 1634 #ifndef _LP64 1635 case Op_AddReductionVF: 1636 case Op_AddReductionVD: 1637 case Op_MulReductionVF: 1638 case Op_MulReductionVD: 1639 if (UseSSE < 1) { // requires at least SSE 1640 return false; 1641 } 1642 break; 1643 case Op_MulAddVS2VI: 1644 case Op_RShiftVL: 1645 case Op_AbsVD: 1646 case Op_NegVD: 1647 if (UseSSE < 2) { 1648 return false; 1649 } 1650 break; 1651 #endif // !LP64 1652 case Op_CompressBits: 1653 if (!VM_Version::supports_bmi2() || (!is_LP64 && UseSSE < 2)) { 1654 return false; 1655 } 1656 break; 1657 case Op_ExpandBits: 1658 if (!VM_Version::supports_bmi2() || (!is_LP64 && (UseSSE < 2 || !VM_Version::supports_bmi1()))) { 1659 return false; 1660 } 1661 break; 1662 case Op_SignumF: 1663 if (UseSSE < 1) { 1664 return false; 1665 } 1666 break; 1667 case Op_SignumD: 1668 if (UseSSE < 2) { 1669 return false; 1670 } 1671 break; 1672 case Op_CompressM: 1673 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1674 return false; 1675 } 1676 break; 1677 case Op_SqrtF: 1678 if (UseSSE < 1) { 1679 return false; 1680 } 1681 break; 1682 case Op_SqrtD: 1683 #ifdef _LP64 1684 if (UseSSE < 2) { 1685 return false; 1686 } 1687 #else 1688 // x86_32.ad has a special match rule for SqrtD. 1689 // Together with common x86 rules, this handles all UseSSE cases. 1690 #endif 1691 break; 1692 case Op_ConvF2HF: 1693 case Op_ConvHF2F: 1694 if (!VM_Version::supports_float16()) { 1695 return false; 1696 } 1697 break; 1698 case Op_VectorCastF2HF: 1699 case Op_VectorCastHF2F: 1700 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1701 return false; 1702 } 1703 break; 1704 } 1705 return true; // Match rules are supported by default. 1706 } 1707 1708 //------------------------------------------------------------------------ 1709 1710 static inline bool is_pop_count_instr_target(BasicType bt) { 1711 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1712 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1713 } 1714 1715 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) { 1716 return match_rule_supported_vector(opcode, vlen, bt); 1717 } 1718 1719 // Identify extra cases that we might want to provide match rules for vector nodes and 1720 // other intrinsics guarded with vector length (vlen) and element type (bt). 1721 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1722 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1723 if (!match_rule_supported(opcode)) { 1724 return false; 1725 } 1726 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1727 // * SSE2 supports 128bit vectors for all types; 1728 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1729 // * AVX2 supports 256bit vectors for all types; 1730 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1731 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1732 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1733 // And MaxVectorSize is taken into account as well. 1734 if (!vector_size_supported(bt, vlen)) { 1735 return false; 1736 } 1737 // Special cases which require vector length follow: 1738 // * implementation limitations 1739 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1740 // * 128bit vroundpd instruction is present only in AVX1 1741 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1742 switch (opcode) { 1743 case Op_AbsVF: 1744 case Op_NegVF: 1745 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1746 return false; // 512bit vandps and vxorps are not available 1747 } 1748 break; 1749 case Op_AbsVD: 1750 case Op_NegVD: 1751 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1752 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1753 } 1754 break; 1755 case Op_RotateRightV: 1756 case Op_RotateLeftV: 1757 if (bt != T_INT && bt != T_LONG) { 1758 return false; 1759 } // fallthrough 1760 case Op_MacroLogicV: 1761 if (!VM_Version::supports_evex() || 1762 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1763 return false; 1764 } 1765 break; 1766 case Op_ClearArray: 1767 case Op_VectorMaskGen: 1768 case Op_VectorCmpMasked: 1769 if (!is_LP64 || !VM_Version::supports_avx512bw()) { 1770 return false; 1771 } 1772 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1773 return false; 1774 } 1775 break; 1776 case Op_LoadVectorMasked: 1777 case Op_StoreVectorMasked: 1778 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1779 return false; 1780 } 1781 break; 1782 case Op_UMinV: 1783 case Op_UMaxV: 1784 if (UseAVX == 0) { 1785 return false; 1786 } 1787 break; 1788 case Op_MaxV: 1789 case Op_MinV: 1790 if (UseSSE < 4 && is_integral_type(bt)) { 1791 return false; 1792 } 1793 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1794 // Float/Double intrinsics are enabled for AVX family currently. 1795 if (UseAVX == 0) { 1796 return false; 1797 } 1798 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1799 return false; 1800 } 1801 } 1802 break; 1803 case Op_CallLeafVector: 1804 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1805 return false; 1806 } 1807 break; 1808 case Op_AddReductionVI: 1809 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1810 return false; 1811 } 1812 // fallthrough 1813 case Op_AndReductionV: 1814 case Op_OrReductionV: 1815 case Op_XorReductionV: 1816 if (is_subword_type(bt) && (UseSSE < 4)) { 1817 return false; 1818 } 1819 #ifndef _LP64 1820 if (bt == T_BYTE || bt == T_LONG) { 1821 return false; 1822 } 1823 #endif 1824 break; 1825 #ifndef _LP64 1826 case Op_VectorInsert: 1827 if (bt == T_LONG || bt == T_DOUBLE) { 1828 return false; 1829 } 1830 break; 1831 #endif 1832 case Op_MinReductionV: 1833 case Op_MaxReductionV: 1834 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1835 return false; 1836 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1837 return false; 1838 } 1839 // Float/Double intrinsics enabled for AVX family. 1840 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1841 return false; 1842 } 1843 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1844 return false; 1845 } 1846 #ifndef _LP64 1847 if (bt == T_BYTE || bt == T_LONG) { 1848 return false; 1849 } 1850 #endif 1851 break; 1852 case Op_VectorTest: 1853 if (UseSSE < 4) { 1854 return false; // Implementation limitation 1855 } else if (size_in_bits < 32) { 1856 return false; // Implementation limitation 1857 } 1858 break; 1859 case Op_VectorLoadShuffle: 1860 case Op_VectorRearrange: 1861 if(vlen == 2) { 1862 return false; // Implementation limitation due to how shuffle is loaded 1863 } else if (size_in_bits == 256 && UseAVX < 2) { 1864 return false; // Implementation limitation 1865 } 1866 break; 1867 case Op_VectorLoadMask: 1868 case Op_VectorMaskCast: 1869 if (size_in_bits == 256 && UseAVX < 2) { 1870 return false; // Implementation limitation 1871 } 1872 // fallthrough 1873 case Op_VectorStoreMask: 1874 if (vlen == 2) { 1875 return false; // Implementation limitation 1876 } 1877 break; 1878 case Op_PopulateIndex: 1879 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1880 return false; 1881 } 1882 break; 1883 case Op_VectorCastB2X: 1884 case Op_VectorCastS2X: 1885 case Op_VectorCastI2X: 1886 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1887 return false; 1888 } 1889 break; 1890 case Op_VectorCastL2X: 1891 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1892 return false; 1893 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1894 return false; 1895 } 1896 break; 1897 case Op_VectorCastF2X: { 1898 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1899 // happen after intermediate conversion to integer and special handling 1900 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1901 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1902 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1903 return false; 1904 } 1905 } 1906 // fallthrough 1907 case Op_VectorCastD2X: 1908 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1909 return false; 1910 } 1911 break; 1912 case Op_VectorCastF2HF: 1913 case Op_VectorCastHF2F: 1914 if (!VM_Version::supports_f16c() && 1915 ((!VM_Version::supports_evex() || 1916 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1917 return false; 1918 } 1919 break; 1920 case Op_RoundVD: 1921 if (!VM_Version::supports_avx512dq()) { 1922 return false; 1923 } 1924 break; 1925 case Op_MulReductionVI: 1926 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1927 return false; 1928 } 1929 break; 1930 case Op_LoadVectorGatherMasked: 1931 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1932 return false; 1933 } 1934 if (is_subword_type(bt) && 1935 (!is_LP64 || 1936 (size_in_bits > 256 && !VM_Version::supports_avx512bw()) || 1937 (size_in_bits < 64) || 1938 (bt == T_SHORT && !VM_Version::supports_bmi2()))) { 1939 return false; 1940 } 1941 break; 1942 case Op_StoreVectorScatterMasked: 1943 case Op_StoreVectorScatter: 1944 if (is_subword_type(bt)) { 1945 return false; 1946 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1947 return false; 1948 } 1949 // fallthrough 1950 case Op_LoadVectorGather: 1951 if (!is_subword_type(bt) && size_in_bits == 64) { 1952 return false; 1953 } 1954 if (is_subword_type(bt) && size_in_bits < 64) { 1955 return false; 1956 } 1957 break; 1958 case Op_SaturatingAddV: 1959 case Op_SaturatingSubV: 1960 if (UseAVX < 1) { 1961 return false; // Implementation limitation 1962 } 1963 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1964 return false; 1965 } 1966 break; 1967 case Op_SelectFromTwoVector: 1968 if (size_in_bits < 128 || (size_in_bits < 512 && !VM_Version::supports_avx512vl())) { 1969 return false; 1970 } 1971 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 1972 return false; 1973 } 1974 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 1975 return false; 1976 } 1977 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) { 1978 return false; 1979 } 1980 break; 1981 case Op_MaskAll: 1982 if (!VM_Version::supports_evex()) { 1983 return false; 1984 } 1985 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1986 return false; 1987 } 1988 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1989 return false; 1990 } 1991 break; 1992 case Op_VectorMaskCmp: 1993 if (vlen < 2 || size_in_bits < 32) { 1994 return false; 1995 } 1996 break; 1997 case Op_CompressM: 1998 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1999 return false; 2000 } 2001 break; 2002 case Op_CompressV: 2003 case Op_ExpandV: 2004 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 2005 return false; 2006 } 2007 if (!is_LP64 && !VM_Version::supports_avx512vl() && size_in_bits < 512) { 2008 return false; 2009 } 2010 if (size_in_bits < 128 ) { 2011 return false; 2012 } 2013 case Op_VectorLongToMask: 2014 if (UseAVX < 1 || !is_LP64) { 2015 return false; 2016 } 2017 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 2018 return false; 2019 } 2020 break; 2021 case Op_SignumVD: 2022 case Op_SignumVF: 2023 if (UseAVX < 1) { 2024 return false; 2025 } 2026 break; 2027 case Op_PopCountVI: 2028 case Op_PopCountVL: { 2029 if (!is_pop_count_instr_target(bt) && 2030 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 2031 return false; 2032 } 2033 } 2034 break; 2035 case Op_ReverseV: 2036 case Op_ReverseBytesV: 2037 if (UseAVX < 2) { 2038 return false; 2039 } 2040 break; 2041 case Op_CountTrailingZerosV: 2042 case Op_CountLeadingZerosV: 2043 if (UseAVX < 2) { 2044 return false; 2045 } 2046 break; 2047 } 2048 return true; // Per default match rules are supported. 2049 } 2050 2051 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 2052 // ADLC based match_rule_supported routine checks for the existence of pattern based 2053 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 2054 // of their non-masked counterpart with mask edge being the differentiator. 2055 // This routine does a strict check on the existence of masked operation patterns 2056 // by returning a default false value for all the other opcodes apart from the 2057 // ones whose masked instruction patterns are defined in this file. 2058 if (!match_rule_supported_vector(opcode, vlen, bt)) { 2059 return false; 2060 } 2061 2062 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 2063 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 2064 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 2065 return false; 2066 } 2067 switch(opcode) { 2068 // Unary masked operations 2069 case Op_AbsVB: 2070 case Op_AbsVS: 2071 if(!VM_Version::supports_avx512bw()) { 2072 return false; // Implementation limitation 2073 } 2074 case Op_AbsVI: 2075 case Op_AbsVL: 2076 return true; 2077 2078 // Ternary masked operations 2079 case Op_FmaVF: 2080 case Op_FmaVD: 2081 return true; 2082 2083 case Op_MacroLogicV: 2084 if(bt != T_INT && bt != T_LONG) { 2085 return false; 2086 } 2087 return true; 2088 2089 // Binary masked operations 2090 case Op_AddVB: 2091 case Op_AddVS: 2092 case Op_SubVB: 2093 case Op_SubVS: 2094 case Op_MulVS: 2095 case Op_LShiftVS: 2096 case Op_RShiftVS: 2097 case Op_URShiftVS: 2098 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2099 if (!VM_Version::supports_avx512bw()) { 2100 return false; // Implementation limitation 2101 } 2102 return true; 2103 2104 case Op_MulVL: 2105 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2106 if (!VM_Version::supports_avx512dq()) { 2107 return false; // Implementation limitation 2108 } 2109 return true; 2110 2111 case Op_AndV: 2112 case Op_OrV: 2113 case Op_XorV: 2114 case Op_RotateRightV: 2115 case Op_RotateLeftV: 2116 if (bt != T_INT && bt != T_LONG) { 2117 return false; // Implementation limitation 2118 } 2119 return true; 2120 2121 case Op_VectorLoadMask: 2122 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2123 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2124 return false; 2125 } 2126 return true; 2127 2128 case Op_AddVI: 2129 case Op_AddVL: 2130 case Op_AddVF: 2131 case Op_AddVD: 2132 case Op_SubVI: 2133 case Op_SubVL: 2134 case Op_SubVF: 2135 case Op_SubVD: 2136 case Op_MulVI: 2137 case Op_MulVF: 2138 case Op_MulVD: 2139 case Op_DivVF: 2140 case Op_DivVD: 2141 case Op_SqrtVF: 2142 case Op_SqrtVD: 2143 case Op_LShiftVI: 2144 case Op_LShiftVL: 2145 case Op_RShiftVI: 2146 case Op_RShiftVL: 2147 case Op_URShiftVI: 2148 case Op_URShiftVL: 2149 case Op_LoadVectorMasked: 2150 case Op_StoreVectorMasked: 2151 case Op_LoadVectorGatherMasked: 2152 case Op_StoreVectorScatterMasked: 2153 return true; 2154 2155 case Op_UMinV: 2156 case Op_UMaxV: 2157 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 2158 return false; 2159 } // fallthrough 2160 case Op_MaxV: 2161 case Op_MinV: 2162 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2163 return false; // Implementation limitation 2164 } 2165 if (is_floating_point_type(bt)) { 2166 return false; // Implementation limitation 2167 } 2168 return true; 2169 case Op_SaturatingAddV: 2170 case Op_SaturatingSubV: 2171 if (!is_subword_type(bt)) { 2172 return false; 2173 } 2174 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) { 2175 return false; // Implementation limitation 2176 } 2177 return true; 2178 2179 case Op_VectorMaskCmp: 2180 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2181 return false; // Implementation limitation 2182 } 2183 return true; 2184 2185 case Op_VectorRearrange: 2186 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2187 return false; // Implementation limitation 2188 } 2189 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2190 return false; // Implementation limitation 2191 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2192 return false; // Implementation limitation 2193 } 2194 return true; 2195 2196 // Binary Logical operations 2197 case Op_AndVMask: 2198 case Op_OrVMask: 2199 case Op_XorVMask: 2200 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2201 return false; // Implementation limitation 2202 } 2203 return true; 2204 2205 case Op_PopCountVI: 2206 case Op_PopCountVL: 2207 if (!is_pop_count_instr_target(bt)) { 2208 return false; 2209 } 2210 return true; 2211 2212 case Op_MaskAll: 2213 return true; 2214 2215 case Op_CountLeadingZerosV: 2216 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2217 return true; 2218 } 2219 default: 2220 return false; 2221 } 2222 } 2223 2224 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2225 return false; 2226 } 2227 2228 // Return true if Vector::rearrange needs preparation of the shuffle argument 2229 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) { 2230 switch (elem_bt) { 2231 case T_BYTE: return false; 2232 case T_SHORT: return !VM_Version::supports_avx512bw(); 2233 case T_INT: return !VM_Version::supports_avx(); 2234 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl(); 2235 default: 2236 ShouldNotReachHere(); 2237 return false; 2238 } 2239 } 2240 2241 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2242 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2243 bool legacy = (generic_opnd->opcode() == LEGVEC); 2244 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2245 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2246 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2247 return new legVecZOper(); 2248 } 2249 if (legacy) { 2250 switch (ideal_reg) { 2251 case Op_VecS: return new legVecSOper(); 2252 case Op_VecD: return new legVecDOper(); 2253 case Op_VecX: return new legVecXOper(); 2254 case Op_VecY: return new legVecYOper(); 2255 case Op_VecZ: return new legVecZOper(); 2256 } 2257 } else { 2258 switch (ideal_reg) { 2259 case Op_VecS: return new vecSOper(); 2260 case Op_VecD: return new vecDOper(); 2261 case Op_VecX: return new vecXOper(); 2262 case Op_VecY: return new vecYOper(); 2263 case Op_VecZ: return new vecZOper(); 2264 } 2265 } 2266 ShouldNotReachHere(); 2267 return nullptr; 2268 } 2269 2270 bool Matcher::is_reg2reg_move(MachNode* m) { 2271 switch (m->rule()) { 2272 case MoveVec2Leg_rule: 2273 case MoveLeg2Vec_rule: 2274 case MoveF2VL_rule: 2275 case MoveF2LEG_rule: 2276 case MoveVL2F_rule: 2277 case MoveLEG2F_rule: 2278 case MoveD2VL_rule: 2279 case MoveD2LEG_rule: 2280 case MoveVL2D_rule: 2281 case MoveLEG2D_rule: 2282 return true; 2283 default: 2284 return false; 2285 } 2286 } 2287 2288 bool Matcher::is_generic_vector(MachOper* opnd) { 2289 switch (opnd->opcode()) { 2290 case VEC: 2291 case LEGVEC: 2292 return true; 2293 default: 2294 return false; 2295 } 2296 } 2297 2298 //------------------------------------------------------------------------ 2299 2300 const RegMask* Matcher::predicate_reg_mask(void) { 2301 return &_VECTMASK_REG_mask; 2302 } 2303 2304 // Max vector size in bytes. 0 if not supported. 2305 int Matcher::vector_width_in_bytes(BasicType bt) { 2306 assert(is_java_primitive(bt), "only primitive type vectors"); 2307 if (UseSSE < 2) return 0; 2308 // SSE2 supports 128bit vectors for all types. 2309 // AVX2 supports 256bit vectors for all types. 2310 // AVX2/EVEX supports 512bit vectors for all types. 2311 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2312 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2313 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2314 size = (UseAVX > 2) ? 64 : 32; 2315 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2316 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2317 // Use flag to limit vector size. 2318 size = MIN2(size,(int)MaxVectorSize); 2319 // Minimum 2 values in vector (or 4 for bytes). 2320 switch (bt) { 2321 case T_DOUBLE: 2322 case T_LONG: 2323 if (size < 16) return 0; 2324 break; 2325 case T_FLOAT: 2326 case T_INT: 2327 if (size < 8) return 0; 2328 break; 2329 case T_BOOLEAN: 2330 if (size < 4) return 0; 2331 break; 2332 case T_CHAR: 2333 if (size < 4) return 0; 2334 break; 2335 case T_BYTE: 2336 if (size < 4) return 0; 2337 break; 2338 case T_SHORT: 2339 if (size < 4) return 0; 2340 break; 2341 default: 2342 ShouldNotReachHere(); 2343 } 2344 return size; 2345 } 2346 2347 // Limits on vector size (number of elements) loaded into vector. 2348 int Matcher::max_vector_size(const BasicType bt) { 2349 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2350 } 2351 int Matcher::min_vector_size(const BasicType bt) { 2352 int max_size = max_vector_size(bt); 2353 // Min size which can be loaded into vector is 4 bytes. 2354 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2355 // Support for calling svml double64 vectors 2356 if (bt == T_DOUBLE) { 2357 size = 1; 2358 } 2359 return MIN2(size,max_size); 2360 } 2361 2362 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) { 2363 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2364 // by default on Cascade Lake 2365 if (VM_Version::is_default_intel_cascade_lake()) { 2366 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2367 } 2368 return Matcher::max_vector_size(bt); 2369 } 2370 2371 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2372 return -1; 2373 } 2374 2375 // Vector ideal reg corresponding to specified size in bytes 2376 uint Matcher::vector_ideal_reg(int size) { 2377 assert(MaxVectorSize >= size, ""); 2378 switch(size) { 2379 case 4: return Op_VecS; 2380 case 8: return Op_VecD; 2381 case 16: return Op_VecX; 2382 case 32: return Op_VecY; 2383 case 64: return Op_VecZ; 2384 } 2385 ShouldNotReachHere(); 2386 return 0; 2387 } 2388 2389 // Check for shift by small constant as well 2390 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2391 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2392 shift->in(2)->get_int() <= 3 && 2393 // Are there other uses besides address expressions? 2394 !matcher->is_visited(shift)) { 2395 address_visited.set(shift->_idx); // Flag as address_visited 2396 mstack.push(shift->in(2), Matcher::Visit); 2397 Node *conv = shift->in(1); 2398 #ifdef _LP64 2399 // Allow Matcher to match the rule which bypass 2400 // ConvI2L operation for an array index on LP64 2401 // if the index value is positive. 2402 if (conv->Opcode() == Op_ConvI2L && 2403 conv->as_Type()->type()->is_long()->_lo >= 0 && 2404 // Are there other uses besides address expressions? 2405 !matcher->is_visited(conv)) { 2406 address_visited.set(conv->_idx); // Flag as address_visited 2407 mstack.push(conv->in(1), Matcher::Pre_Visit); 2408 } else 2409 #endif 2410 mstack.push(conv, Matcher::Pre_Visit); 2411 return true; 2412 } 2413 return false; 2414 } 2415 2416 // This function identifies sub-graphs in which a 'load' node is 2417 // input to two different nodes, and such that it can be matched 2418 // with BMI instructions like blsi, blsr, etc. 2419 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2420 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2421 // refers to the same node. 2422 // 2423 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2424 // This is a temporary solution until we make DAGs expressible in ADL. 2425 template<typename ConType> 2426 class FusedPatternMatcher { 2427 Node* _op1_node; 2428 Node* _mop_node; 2429 int _con_op; 2430 2431 static int match_next(Node* n, int next_op, int next_op_idx) { 2432 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2433 return -1; 2434 } 2435 2436 if (next_op_idx == -1) { // n is commutative, try rotations 2437 if (n->in(1)->Opcode() == next_op) { 2438 return 1; 2439 } else if (n->in(2)->Opcode() == next_op) { 2440 return 2; 2441 } 2442 } else { 2443 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2444 if (n->in(next_op_idx)->Opcode() == next_op) { 2445 return next_op_idx; 2446 } 2447 } 2448 return -1; 2449 } 2450 2451 public: 2452 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2453 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2454 2455 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2456 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2457 typename ConType::NativeType con_value) { 2458 if (_op1_node->Opcode() != op1) { 2459 return false; 2460 } 2461 if (_mop_node->outcnt() > 2) { 2462 return false; 2463 } 2464 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2465 if (op1_op2_idx == -1) { 2466 return false; 2467 } 2468 // Memory operation must be the other edge 2469 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2470 2471 // Check that the mop node is really what we want 2472 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2473 Node* op2_node = _op1_node->in(op1_op2_idx); 2474 if (op2_node->outcnt() > 1) { 2475 return false; 2476 } 2477 assert(op2_node->Opcode() == op2, "Should be"); 2478 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2479 if (op2_con_idx == -1) { 2480 return false; 2481 } 2482 // Memory operation must be the other edge 2483 int op2_mop_idx = (op2_con_idx & 1) + 1; 2484 // Check that the memory operation is the same node 2485 if (op2_node->in(op2_mop_idx) == _mop_node) { 2486 // Now check the constant 2487 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2488 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2489 return true; 2490 } 2491 } 2492 } 2493 return false; 2494 } 2495 }; 2496 2497 static bool is_bmi_pattern(Node* n, Node* m) { 2498 assert(UseBMI1Instructions, "sanity"); 2499 if (n != nullptr && m != nullptr) { 2500 if (m->Opcode() == Op_LoadI) { 2501 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2502 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2503 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2504 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2505 } else if (m->Opcode() == Op_LoadL) { 2506 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2507 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2508 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2509 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2510 } 2511 } 2512 return false; 2513 } 2514 2515 // Should the matcher clone input 'm' of node 'n'? 2516 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2517 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2518 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2519 mstack.push(m, Visit); 2520 return true; 2521 } 2522 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2523 mstack.push(m, Visit); // m = ShiftCntV 2524 return true; 2525 } 2526 if (is_encode_and_store_pattern(n, m)) { 2527 mstack.push(m, Visit); 2528 return true; 2529 } 2530 return false; 2531 } 2532 2533 // Should the Matcher clone shifts on addressing modes, expecting them 2534 // to be subsumed into complex addressing expressions or compute them 2535 // into registers? 2536 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2537 Node *off = m->in(AddPNode::Offset); 2538 if (off->is_Con()) { 2539 address_visited.test_set(m->_idx); // Flag as address_visited 2540 Node *adr = m->in(AddPNode::Address); 2541 2542 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset. 2543 // AtomicAdd is not an addressing expression. 2544 // Cheap to find it by looking for screwy base. 2545 if (adr->is_AddP() && 2546 !adr->in(AddPNode::Base)->is_top() && 2547 !adr->in(AddPNode::Offset)->is_Con() && 2548 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2549 // Are there other uses besides address expressions? 2550 !is_visited(adr)) { 2551 address_visited.set(adr->_idx); // Flag as address_visited 2552 Node *shift = adr->in(AddPNode::Offset); 2553 if (!clone_shift(shift, this, mstack, address_visited)) { 2554 mstack.push(shift, Pre_Visit); 2555 } 2556 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2557 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2558 } else { 2559 mstack.push(adr, Pre_Visit); 2560 } 2561 2562 // Clone X+offset as it also folds into most addressing expressions 2563 mstack.push(off, Visit); 2564 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2565 return true; 2566 } else if (clone_shift(off, this, mstack, address_visited)) { 2567 address_visited.test_set(m->_idx); // Flag as address_visited 2568 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2569 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2570 return true; 2571 } 2572 return false; 2573 } 2574 2575 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2576 switch (bt) { 2577 case BoolTest::eq: 2578 return Assembler::eq; 2579 case BoolTest::ne: 2580 return Assembler::neq; 2581 case BoolTest::le: 2582 case BoolTest::ule: 2583 return Assembler::le; 2584 case BoolTest::ge: 2585 case BoolTest::uge: 2586 return Assembler::nlt; 2587 case BoolTest::lt: 2588 case BoolTest::ult: 2589 return Assembler::lt; 2590 case BoolTest::gt: 2591 case BoolTest::ugt: 2592 return Assembler::nle; 2593 default : ShouldNotReachHere(); return Assembler::_false; 2594 } 2595 } 2596 2597 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2598 switch (bt) { 2599 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2600 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2601 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2602 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2603 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2604 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2605 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2606 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2607 } 2608 } 2609 2610 // Helper methods for MachSpillCopyNode::implementation(). 2611 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 2612 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2613 assert(ireg == Op_VecS || // 32bit vector 2614 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2615 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi), 2616 "no non-adjacent vector moves" ); 2617 if (masm) { 2618 switch (ireg) { 2619 case Op_VecS: // copy whole register 2620 case Op_VecD: 2621 case Op_VecX: 2622 #ifndef _LP64 2623 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2624 #else 2625 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2626 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2627 } else { 2628 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2629 } 2630 #endif 2631 break; 2632 case Op_VecY: 2633 #ifndef _LP64 2634 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2635 #else 2636 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2637 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2638 } else { 2639 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2640 } 2641 #endif 2642 break; 2643 case Op_VecZ: 2644 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2645 break; 2646 default: 2647 ShouldNotReachHere(); 2648 } 2649 #ifndef PRODUCT 2650 } else { 2651 switch (ireg) { 2652 case Op_VecS: 2653 case Op_VecD: 2654 case Op_VecX: 2655 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2656 break; 2657 case Op_VecY: 2658 case Op_VecZ: 2659 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2660 break; 2661 default: 2662 ShouldNotReachHere(); 2663 } 2664 #endif 2665 } 2666 } 2667 2668 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 2669 int stack_offset, int reg, uint ireg, outputStream* st) { 2670 if (masm) { 2671 if (is_load) { 2672 switch (ireg) { 2673 case Op_VecS: 2674 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2675 break; 2676 case Op_VecD: 2677 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2678 break; 2679 case Op_VecX: 2680 #ifndef _LP64 2681 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2682 #else 2683 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2684 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2685 } else { 2686 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2687 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2688 } 2689 #endif 2690 break; 2691 case Op_VecY: 2692 #ifndef _LP64 2693 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2694 #else 2695 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2696 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2697 } else { 2698 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2699 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2700 } 2701 #endif 2702 break; 2703 case Op_VecZ: 2704 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2705 break; 2706 default: 2707 ShouldNotReachHere(); 2708 } 2709 } else { // store 2710 switch (ireg) { 2711 case Op_VecS: 2712 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2713 break; 2714 case Op_VecD: 2715 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2716 break; 2717 case Op_VecX: 2718 #ifndef _LP64 2719 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2720 #else 2721 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2722 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2723 } 2724 else { 2725 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2726 } 2727 #endif 2728 break; 2729 case Op_VecY: 2730 #ifndef _LP64 2731 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2732 #else 2733 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2734 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2735 } 2736 else { 2737 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2738 } 2739 #endif 2740 break; 2741 case Op_VecZ: 2742 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2743 break; 2744 default: 2745 ShouldNotReachHere(); 2746 } 2747 } 2748 #ifndef PRODUCT 2749 } else { 2750 if (is_load) { 2751 switch (ireg) { 2752 case Op_VecS: 2753 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2754 break; 2755 case Op_VecD: 2756 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2757 break; 2758 case Op_VecX: 2759 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2760 break; 2761 case Op_VecY: 2762 case Op_VecZ: 2763 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2764 break; 2765 default: 2766 ShouldNotReachHere(); 2767 } 2768 } else { // store 2769 switch (ireg) { 2770 case Op_VecS: 2771 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2772 break; 2773 case Op_VecD: 2774 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2775 break; 2776 case Op_VecX: 2777 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2778 break; 2779 case Op_VecY: 2780 case Op_VecZ: 2781 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2782 break; 2783 default: 2784 ShouldNotReachHere(); 2785 } 2786 } 2787 #endif 2788 } 2789 } 2790 2791 template <class T> 2792 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) { 2793 int size = type2aelembytes(bt) * len; 2794 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0); 2795 for (int i = 0; i < len; i++) { 2796 int offset = i * type2aelembytes(bt); 2797 switch (bt) { 2798 case T_BYTE: val->at(i) = con; break; 2799 case T_SHORT: { 2800 jshort c = con; 2801 memcpy(val->adr_at(offset), &c, sizeof(jshort)); 2802 break; 2803 } 2804 case T_INT: { 2805 jint c = con; 2806 memcpy(val->adr_at(offset), &c, sizeof(jint)); 2807 break; 2808 } 2809 case T_LONG: { 2810 jlong c = con; 2811 memcpy(val->adr_at(offset), &c, sizeof(jlong)); 2812 break; 2813 } 2814 case T_FLOAT: { 2815 jfloat c = con; 2816 memcpy(val->adr_at(offset), &c, sizeof(jfloat)); 2817 break; 2818 } 2819 case T_DOUBLE: { 2820 jdouble c = con; 2821 memcpy(val->adr_at(offset), &c, sizeof(jdouble)); 2822 break; 2823 } 2824 default: assert(false, "%s", type2name(bt)); 2825 } 2826 } 2827 return val; 2828 } 2829 2830 static inline jlong high_bit_set(BasicType bt) { 2831 switch (bt) { 2832 case T_BYTE: return 0x8080808080808080; 2833 case T_SHORT: return 0x8000800080008000; 2834 case T_INT: return 0x8000000080000000; 2835 case T_LONG: return 0x8000000000000000; 2836 default: 2837 ShouldNotReachHere(); 2838 return 0; 2839 } 2840 } 2841 2842 #ifndef PRODUCT 2843 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2844 st->print("nop \t# %d bytes pad for loops and calls", _count); 2845 } 2846 #endif 2847 2848 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const { 2849 __ nop(_count); 2850 } 2851 2852 uint MachNopNode::size(PhaseRegAlloc*) const { 2853 return _count; 2854 } 2855 2856 #ifndef PRODUCT 2857 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2858 st->print("# breakpoint"); 2859 } 2860 #endif 2861 2862 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const { 2863 __ int3(); 2864 } 2865 2866 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2867 return MachNode::size(ra_); 2868 } 2869 2870 %} 2871 2872 encode %{ 2873 2874 enc_class call_epilog %{ 2875 if (VerifyStackAtCalls) { 2876 // Check that stack depth is unchanged: find majik cookie on stack 2877 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2878 Label L; 2879 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2880 __ jccb(Assembler::equal, L); 2881 // Die if stack mismatch 2882 __ int3(); 2883 __ bind(L); 2884 } 2885 %} 2886 2887 %} 2888 2889 // Operands for bound floating pointer register arguments 2890 operand rxmm0() %{ 2891 constraint(ALLOC_IN_RC(xmm0_reg)); 2892 match(VecX); 2893 format%{%} 2894 interface(REG_INTER); 2895 %} 2896 2897 //----------OPERANDS----------------------------------------------------------- 2898 // Operand definitions must precede instruction definitions for correct parsing 2899 // in the ADLC because operands constitute user defined types which are used in 2900 // instruction definitions. 2901 2902 // Vectors 2903 2904 // Dummy generic vector class. Should be used for all vector operands. 2905 // Replaced with vec[SDXYZ] during post-selection pass. 2906 operand vec() %{ 2907 constraint(ALLOC_IN_RC(dynamic)); 2908 match(VecX); 2909 match(VecY); 2910 match(VecZ); 2911 match(VecS); 2912 match(VecD); 2913 2914 format %{ %} 2915 interface(REG_INTER); 2916 %} 2917 2918 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2919 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2920 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2921 // runtime code generation via reg_class_dynamic. 2922 operand legVec() %{ 2923 constraint(ALLOC_IN_RC(dynamic)); 2924 match(VecX); 2925 match(VecY); 2926 match(VecZ); 2927 match(VecS); 2928 match(VecD); 2929 2930 format %{ %} 2931 interface(REG_INTER); 2932 %} 2933 2934 // Replaces vec during post-selection cleanup. See above. 2935 operand vecS() %{ 2936 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2937 match(VecS); 2938 2939 format %{ %} 2940 interface(REG_INTER); 2941 %} 2942 2943 // Replaces legVec during post-selection cleanup. See above. 2944 operand legVecS() %{ 2945 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2946 match(VecS); 2947 2948 format %{ %} 2949 interface(REG_INTER); 2950 %} 2951 2952 // Replaces vec during post-selection cleanup. See above. 2953 operand vecD() %{ 2954 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2955 match(VecD); 2956 2957 format %{ %} 2958 interface(REG_INTER); 2959 %} 2960 2961 // Replaces legVec during post-selection cleanup. See above. 2962 operand legVecD() %{ 2963 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2964 match(VecD); 2965 2966 format %{ %} 2967 interface(REG_INTER); 2968 %} 2969 2970 // Replaces vec during post-selection cleanup. See above. 2971 operand vecX() %{ 2972 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2973 match(VecX); 2974 2975 format %{ %} 2976 interface(REG_INTER); 2977 %} 2978 2979 // Replaces legVec during post-selection cleanup. See above. 2980 operand legVecX() %{ 2981 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2982 match(VecX); 2983 2984 format %{ %} 2985 interface(REG_INTER); 2986 %} 2987 2988 // Replaces vec during post-selection cleanup. See above. 2989 operand vecY() %{ 2990 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2991 match(VecY); 2992 2993 format %{ %} 2994 interface(REG_INTER); 2995 %} 2996 2997 // Replaces legVec during post-selection cleanup. See above. 2998 operand legVecY() %{ 2999 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 3000 match(VecY); 3001 3002 format %{ %} 3003 interface(REG_INTER); 3004 %} 3005 3006 // Replaces vec during post-selection cleanup. See above. 3007 operand vecZ() %{ 3008 constraint(ALLOC_IN_RC(vectorz_reg)); 3009 match(VecZ); 3010 3011 format %{ %} 3012 interface(REG_INTER); 3013 %} 3014 3015 // Replaces legVec during post-selection cleanup. See above. 3016 operand legVecZ() %{ 3017 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 3018 match(VecZ); 3019 3020 format %{ %} 3021 interface(REG_INTER); 3022 %} 3023 3024 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 3025 3026 // ============================================================================ 3027 3028 instruct ShouldNotReachHere() %{ 3029 match(Halt); 3030 format %{ "stop\t# ShouldNotReachHere" %} 3031 ins_encode %{ 3032 if (is_reachable()) { 3033 __ stop(_halt_reason); 3034 } 3035 %} 3036 ins_pipe(pipe_slow); 3037 %} 3038 3039 // ============================================================================ 3040 3041 instruct addF_reg(regF dst, regF src) %{ 3042 predicate((UseSSE>=1) && (UseAVX == 0)); 3043 match(Set dst (AddF dst src)); 3044 3045 format %{ "addss $dst, $src" %} 3046 ins_cost(150); 3047 ins_encode %{ 3048 __ addss($dst$$XMMRegister, $src$$XMMRegister); 3049 %} 3050 ins_pipe(pipe_slow); 3051 %} 3052 3053 instruct addF_mem(regF dst, memory src) %{ 3054 predicate((UseSSE>=1) && (UseAVX == 0)); 3055 match(Set dst (AddF dst (LoadF src))); 3056 3057 format %{ "addss $dst, $src" %} 3058 ins_cost(150); 3059 ins_encode %{ 3060 __ addss($dst$$XMMRegister, $src$$Address); 3061 %} 3062 ins_pipe(pipe_slow); 3063 %} 3064 3065 instruct addF_imm(regF dst, immF con) %{ 3066 predicate((UseSSE>=1) && (UseAVX == 0)); 3067 match(Set dst (AddF dst con)); 3068 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3069 ins_cost(150); 3070 ins_encode %{ 3071 __ addss($dst$$XMMRegister, $constantaddress($con)); 3072 %} 3073 ins_pipe(pipe_slow); 3074 %} 3075 3076 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 3077 predicate(UseAVX > 0); 3078 match(Set dst (AddF src1 src2)); 3079 3080 format %{ "vaddss $dst, $src1, $src2" %} 3081 ins_cost(150); 3082 ins_encode %{ 3083 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3084 %} 3085 ins_pipe(pipe_slow); 3086 %} 3087 3088 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 3089 predicate(UseAVX > 0); 3090 match(Set dst (AddF src1 (LoadF src2))); 3091 3092 format %{ "vaddss $dst, $src1, $src2" %} 3093 ins_cost(150); 3094 ins_encode %{ 3095 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3096 %} 3097 ins_pipe(pipe_slow); 3098 %} 3099 3100 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 3101 predicate(UseAVX > 0); 3102 match(Set dst (AddF src con)); 3103 3104 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3105 ins_cost(150); 3106 ins_encode %{ 3107 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3108 %} 3109 ins_pipe(pipe_slow); 3110 %} 3111 3112 instruct addD_reg(regD dst, regD src) %{ 3113 predicate((UseSSE>=2) && (UseAVX == 0)); 3114 match(Set dst (AddD dst src)); 3115 3116 format %{ "addsd $dst, $src" %} 3117 ins_cost(150); 3118 ins_encode %{ 3119 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 3120 %} 3121 ins_pipe(pipe_slow); 3122 %} 3123 3124 instruct addD_mem(regD dst, memory src) %{ 3125 predicate((UseSSE>=2) && (UseAVX == 0)); 3126 match(Set dst (AddD dst (LoadD src))); 3127 3128 format %{ "addsd $dst, $src" %} 3129 ins_cost(150); 3130 ins_encode %{ 3131 __ addsd($dst$$XMMRegister, $src$$Address); 3132 %} 3133 ins_pipe(pipe_slow); 3134 %} 3135 3136 instruct addD_imm(regD dst, immD con) %{ 3137 predicate((UseSSE>=2) && (UseAVX == 0)); 3138 match(Set dst (AddD dst con)); 3139 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3140 ins_cost(150); 3141 ins_encode %{ 3142 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3143 %} 3144 ins_pipe(pipe_slow); 3145 %} 3146 3147 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3148 predicate(UseAVX > 0); 3149 match(Set dst (AddD src1 src2)); 3150 3151 format %{ "vaddsd $dst, $src1, $src2" %} 3152 ins_cost(150); 3153 ins_encode %{ 3154 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3155 %} 3156 ins_pipe(pipe_slow); 3157 %} 3158 3159 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3160 predicate(UseAVX > 0); 3161 match(Set dst (AddD src1 (LoadD src2))); 3162 3163 format %{ "vaddsd $dst, $src1, $src2" %} 3164 ins_cost(150); 3165 ins_encode %{ 3166 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3167 %} 3168 ins_pipe(pipe_slow); 3169 %} 3170 3171 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3172 predicate(UseAVX > 0); 3173 match(Set dst (AddD src con)); 3174 3175 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3176 ins_cost(150); 3177 ins_encode %{ 3178 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3179 %} 3180 ins_pipe(pipe_slow); 3181 %} 3182 3183 instruct subF_reg(regF dst, regF src) %{ 3184 predicate((UseSSE>=1) && (UseAVX == 0)); 3185 match(Set dst (SubF dst src)); 3186 3187 format %{ "subss $dst, $src" %} 3188 ins_cost(150); 3189 ins_encode %{ 3190 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3191 %} 3192 ins_pipe(pipe_slow); 3193 %} 3194 3195 instruct subF_mem(regF dst, memory src) %{ 3196 predicate((UseSSE>=1) && (UseAVX == 0)); 3197 match(Set dst (SubF dst (LoadF src))); 3198 3199 format %{ "subss $dst, $src" %} 3200 ins_cost(150); 3201 ins_encode %{ 3202 __ subss($dst$$XMMRegister, $src$$Address); 3203 %} 3204 ins_pipe(pipe_slow); 3205 %} 3206 3207 instruct subF_imm(regF dst, immF con) %{ 3208 predicate((UseSSE>=1) && (UseAVX == 0)); 3209 match(Set dst (SubF dst con)); 3210 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3211 ins_cost(150); 3212 ins_encode %{ 3213 __ subss($dst$$XMMRegister, $constantaddress($con)); 3214 %} 3215 ins_pipe(pipe_slow); 3216 %} 3217 3218 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3219 predicate(UseAVX > 0); 3220 match(Set dst (SubF src1 src2)); 3221 3222 format %{ "vsubss $dst, $src1, $src2" %} 3223 ins_cost(150); 3224 ins_encode %{ 3225 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3226 %} 3227 ins_pipe(pipe_slow); 3228 %} 3229 3230 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3231 predicate(UseAVX > 0); 3232 match(Set dst (SubF src1 (LoadF src2))); 3233 3234 format %{ "vsubss $dst, $src1, $src2" %} 3235 ins_cost(150); 3236 ins_encode %{ 3237 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3238 %} 3239 ins_pipe(pipe_slow); 3240 %} 3241 3242 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3243 predicate(UseAVX > 0); 3244 match(Set dst (SubF src con)); 3245 3246 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3247 ins_cost(150); 3248 ins_encode %{ 3249 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3250 %} 3251 ins_pipe(pipe_slow); 3252 %} 3253 3254 instruct subD_reg(regD dst, regD src) %{ 3255 predicate((UseSSE>=2) && (UseAVX == 0)); 3256 match(Set dst (SubD dst src)); 3257 3258 format %{ "subsd $dst, $src" %} 3259 ins_cost(150); 3260 ins_encode %{ 3261 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3262 %} 3263 ins_pipe(pipe_slow); 3264 %} 3265 3266 instruct subD_mem(regD dst, memory src) %{ 3267 predicate((UseSSE>=2) && (UseAVX == 0)); 3268 match(Set dst (SubD dst (LoadD src))); 3269 3270 format %{ "subsd $dst, $src" %} 3271 ins_cost(150); 3272 ins_encode %{ 3273 __ subsd($dst$$XMMRegister, $src$$Address); 3274 %} 3275 ins_pipe(pipe_slow); 3276 %} 3277 3278 instruct subD_imm(regD dst, immD con) %{ 3279 predicate((UseSSE>=2) && (UseAVX == 0)); 3280 match(Set dst (SubD dst con)); 3281 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3282 ins_cost(150); 3283 ins_encode %{ 3284 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3285 %} 3286 ins_pipe(pipe_slow); 3287 %} 3288 3289 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3290 predicate(UseAVX > 0); 3291 match(Set dst (SubD src1 src2)); 3292 3293 format %{ "vsubsd $dst, $src1, $src2" %} 3294 ins_cost(150); 3295 ins_encode %{ 3296 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3297 %} 3298 ins_pipe(pipe_slow); 3299 %} 3300 3301 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3302 predicate(UseAVX > 0); 3303 match(Set dst (SubD src1 (LoadD src2))); 3304 3305 format %{ "vsubsd $dst, $src1, $src2" %} 3306 ins_cost(150); 3307 ins_encode %{ 3308 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3309 %} 3310 ins_pipe(pipe_slow); 3311 %} 3312 3313 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3314 predicate(UseAVX > 0); 3315 match(Set dst (SubD src con)); 3316 3317 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3318 ins_cost(150); 3319 ins_encode %{ 3320 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3321 %} 3322 ins_pipe(pipe_slow); 3323 %} 3324 3325 instruct mulF_reg(regF dst, regF src) %{ 3326 predicate((UseSSE>=1) && (UseAVX == 0)); 3327 match(Set dst (MulF dst src)); 3328 3329 format %{ "mulss $dst, $src" %} 3330 ins_cost(150); 3331 ins_encode %{ 3332 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3333 %} 3334 ins_pipe(pipe_slow); 3335 %} 3336 3337 instruct mulF_mem(regF dst, memory src) %{ 3338 predicate((UseSSE>=1) && (UseAVX == 0)); 3339 match(Set dst (MulF dst (LoadF src))); 3340 3341 format %{ "mulss $dst, $src" %} 3342 ins_cost(150); 3343 ins_encode %{ 3344 __ mulss($dst$$XMMRegister, $src$$Address); 3345 %} 3346 ins_pipe(pipe_slow); 3347 %} 3348 3349 instruct mulF_imm(regF dst, immF con) %{ 3350 predicate((UseSSE>=1) && (UseAVX == 0)); 3351 match(Set dst (MulF dst con)); 3352 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3353 ins_cost(150); 3354 ins_encode %{ 3355 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3356 %} 3357 ins_pipe(pipe_slow); 3358 %} 3359 3360 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3361 predicate(UseAVX > 0); 3362 match(Set dst (MulF src1 src2)); 3363 3364 format %{ "vmulss $dst, $src1, $src2" %} 3365 ins_cost(150); 3366 ins_encode %{ 3367 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3368 %} 3369 ins_pipe(pipe_slow); 3370 %} 3371 3372 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3373 predicate(UseAVX > 0); 3374 match(Set dst (MulF src1 (LoadF src2))); 3375 3376 format %{ "vmulss $dst, $src1, $src2" %} 3377 ins_cost(150); 3378 ins_encode %{ 3379 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3380 %} 3381 ins_pipe(pipe_slow); 3382 %} 3383 3384 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3385 predicate(UseAVX > 0); 3386 match(Set dst (MulF src con)); 3387 3388 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3389 ins_cost(150); 3390 ins_encode %{ 3391 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3392 %} 3393 ins_pipe(pipe_slow); 3394 %} 3395 3396 instruct mulD_reg(regD dst, regD src) %{ 3397 predicate((UseSSE>=2) && (UseAVX == 0)); 3398 match(Set dst (MulD dst src)); 3399 3400 format %{ "mulsd $dst, $src" %} 3401 ins_cost(150); 3402 ins_encode %{ 3403 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3404 %} 3405 ins_pipe(pipe_slow); 3406 %} 3407 3408 instruct mulD_mem(regD dst, memory src) %{ 3409 predicate((UseSSE>=2) && (UseAVX == 0)); 3410 match(Set dst (MulD dst (LoadD src))); 3411 3412 format %{ "mulsd $dst, $src" %} 3413 ins_cost(150); 3414 ins_encode %{ 3415 __ mulsd($dst$$XMMRegister, $src$$Address); 3416 %} 3417 ins_pipe(pipe_slow); 3418 %} 3419 3420 instruct mulD_imm(regD dst, immD con) %{ 3421 predicate((UseSSE>=2) && (UseAVX == 0)); 3422 match(Set dst (MulD dst con)); 3423 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3424 ins_cost(150); 3425 ins_encode %{ 3426 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3427 %} 3428 ins_pipe(pipe_slow); 3429 %} 3430 3431 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3432 predicate(UseAVX > 0); 3433 match(Set dst (MulD src1 src2)); 3434 3435 format %{ "vmulsd $dst, $src1, $src2" %} 3436 ins_cost(150); 3437 ins_encode %{ 3438 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3439 %} 3440 ins_pipe(pipe_slow); 3441 %} 3442 3443 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3444 predicate(UseAVX > 0); 3445 match(Set dst (MulD src1 (LoadD src2))); 3446 3447 format %{ "vmulsd $dst, $src1, $src2" %} 3448 ins_cost(150); 3449 ins_encode %{ 3450 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3451 %} 3452 ins_pipe(pipe_slow); 3453 %} 3454 3455 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3456 predicate(UseAVX > 0); 3457 match(Set dst (MulD src con)); 3458 3459 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3460 ins_cost(150); 3461 ins_encode %{ 3462 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3463 %} 3464 ins_pipe(pipe_slow); 3465 %} 3466 3467 instruct divF_reg(regF dst, regF src) %{ 3468 predicate((UseSSE>=1) && (UseAVX == 0)); 3469 match(Set dst (DivF dst src)); 3470 3471 format %{ "divss $dst, $src" %} 3472 ins_cost(150); 3473 ins_encode %{ 3474 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3475 %} 3476 ins_pipe(pipe_slow); 3477 %} 3478 3479 instruct divF_mem(regF dst, memory src) %{ 3480 predicate((UseSSE>=1) && (UseAVX == 0)); 3481 match(Set dst (DivF dst (LoadF src))); 3482 3483 format %{ "divss $dst, $src" %} 3484 ins_cost(150); 3485 ins_encode %{ 3486 __ divss($dst$$XMMRegister, $src$$Address); 3487 %} 3488 ins_pipe(pipe_slow); 3489 %} 3490 3491 instruct divF_imm(regF dst, immF con) %{ 3492 predicate((UseSSE>=1) && (UseAVX == 0)); 3493 match(Set dst (DivF dst con)); 3494 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3495 ins_cost(150); 3496 ins_encode %{ 3497 __ divss($dst$$XMMRegister, $constantaddress($con)); 3498 %} 3499 ins_pipe(pipe_slow); 3500 %} 3501 3502 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3503 predicate(UseAVX > 0); 3504 match(Set dst (DivF src1 src2)); 3505 3506 format %{ "vdivss $dst, $src1, $src2" %} 3507 ins_cost(150); 3508 ins_encode %{ 3509 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3510 %} 3511 ins_pipe(pipe_slow); 3512 %} 3513 3514 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3515 predicate(UseAVX > 0); 3516 match(Set dst (DivF src1 (LoadF src2))); 3517 3518 format %{ "vdivss $dst, $src1, $src2" %} 3519 ins_cost(150); 3520 ins_encode %{ 3521 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3522 %} 3523 ins_pipe(pipe_slow); 3524 %} 3525 3526 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3527 predicate(UseAVX > 0); 3528 match(Set dst (DivF src con)); 3529 3530 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3531 ins_cost(150); 3532 ins_encode %{ 3533 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3534 %} 3535 ins_pipe(pipe_slow); 3536 %} 3537 3538 instruct divD_reg(regD dst, regD src) %{ 3539 predicate((UseSSE>=2) && (UseAVX == 0)); 3540 match(Set dst (DivD dst src)); 3541 3542 format %{ "divsd $dst, $src" %} 3543 ins_cost(150); 3544 ins_encode %{ 3545 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3546 %} 3547 ins_pipe(pipe_slow); 3548 %} 3549 3550 instruct divD_mem(regD dst, memory src) %{ 3551 predicate((UseSSE>=2) && (UseAVX == 0)); 3552 match(Set dst (DivD dst (LoadD src))); 3553 3554 format %{ "divsd $dst, $src" %} 3555 ins_cost(150); 3556 ins_encode %{ 3557 __ divsd($dst$$XMMRegister, $src$$Address); 3558 %} 3559 ins_pipe(pipe_slow); 3560 %} 3561 3562 instruct divD_imm(regD dst, immD con) %{ 3563 predicate((UseSSE>=2) && (UseAVX == 0)); 3564 match(Set dst (DivD dst con)); 3565 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3566 ins_cost(150); 3567 ins_encode %{ 3568 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3569 %} 3570 ins_pipe(pipe_slow); 3571 %} 3572 3573 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3574 predicate(UseAVX > 0); 3575 match(Set dst (DivD src1 src2)); 3576 3577 format %{ "vdivsd $dst, $src1, $src2" %} 3578 ins_cost(150); 3579 ins_encode %{ 3580 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3581 %} 3582 ins_pipe(pipe_slow); 3583 %} 3584 3585 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3586 predicate(UseAVX > 0); 3587 match(Set dst (DivD src1 (LoadD src2))); 3588 3589 format %{ "vdivsd $dst, $src1, $src2" %} 3590 ins_cost(150); 3591 ins_encode %{ 3592 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3593 %} 3594 ins_pipe(pipe_slow); 3595 %} 3596 3597 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3598 predicate(UseAVX > 0); 3599 match(Set dst (DivD src con)); 3600 3601 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3602 ins_cost(150); 3603 ins_encode %{ 3604 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3605 %} 3606 ins_pipe(pipe_slow); 3607 %} 3608 3609 instruct absF_reg(regF dst) %{ 3610 predicate((UseSSE>=1) && (UseAVX == 0)); 3611 match(Set dst (AbsF dst)); 3612 ins_cost(150); 3613 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3614 ins_encode %{ 3615 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3616 %} 3617 ins_pipe(pipe_slow); 3618 %} 3619 3620 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3621 predicate(UseAVX > 0); 3622 match(Set dst (AbsF src)); 3623 ins_cost(150); 3624 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3625 ins_encode %{ 3626 int vlen_enc = Assembler::AVX_128bit; 3627 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3628 ExternalAddress(float_signmask()), vlen_enc); 3629 %} 3630 ins_pipe(pipe_slow); 3631 %} 3632 3633 instruct absD_reg(regD dst) %{ 3634 predicate((UseSSE>=2) && (UseAVX == 0)); 3635 match(Set dst (AbsD dst)); 3636 ins_cost(150); 3637 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3638 "# abs double by sign masking" %} 3639 ins_encode %{ 3640 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3641 %} 3642 ins_pipe(pipe_slow); 3643 %} 3644 3645 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3646 predicate(UseAVX > 0); 3647 match(Set dst (AbsD src)); 3648 ins_cost(150); 3649 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3650 "# abs double by sign masking" %} 3651 ins_encode %{ 3652 int vlen_enc = Assembler::AVX_128bit; 3653 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3654 ExternalAddress(double_signmask()), vlen_enc); 3655 %} 3656 ins_pipe(pipe_slow); 3657 %} 3658 3659 instruct negF_reg(regF dst) %{ 3660 predicate((UseSSE>=1) && (UseAVX == 0)); 3661 match(Set dst (NegF dst)); 3662 ins_cost(150); 3663 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3664 ins_encode %{ 3665 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3666 %} 3667 ins_pipe(pipe_slow); 3668 %} 3669 3670 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3671 predicate(UseAVX > 0); 3672 match(Set dst (NegF src)); 3673 ins_cost(150); 3674 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3675 ins_encode %{ 3676 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3677 ExternalAddress(float_signflip())); 3678 %} 3679 ins_pipe(pipe_slow); 3680 %} 3681 3682 instruct negD_reg(regD dst) %{ 3683 predicate((UseSSE>=2) && (UseAVX == 0)); 3684 match(Set dst (NegD dst)); 3685 ins_cost(150); 3686 format %{ "xorpd $dst, [0x8000000000000000]\t" 3687 "# neg double by sign flipping" %} 3688 ins_encode %{ 3689 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3690 %} 3691 ins_pipe(pipe_slow); 3692 %} 3693 3694 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3695 predicate(UseAVX > 0); 3696 match(Set dst (NegD src)); 3697 ins_cost(150); 3698 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3699 "# neg double by sign flipping" %} 3700 ins_encode %{ 3701 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3702 ExternalAddress(double_signflip())); 3703 %} 3704 ins_pipe(pipe_slow); 3705 %} 3706 3707 // sqrtss instruction needs destination register to be pre initialized for best performance 3708 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3709 instruct sqrtF_reg(regF dst) %{ 3710 predicate(UseSSE>=1); 3711 match(Set dst (SqrtF dst)); 3712 format %{ "sqrtss $dst, $dst" %} 3713 ins_encode %{ 3714 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3715 %} 3716 ins_pipe(pipe_slow); 3717 %} 3718 3719 // sqrtsd instruction needs destination register to be pre initialized for best performance 3720 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3721 instruct sqrtD_reg(regD dst) %{ 3722 predicate(UseSSE>=2); 3723 match(Set dst (SqrtD dst)); 3724 format %{ "sqrtsd $dst, $dst" %} 3725 ins_encode %{ 3726 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3727 %} 3728 ins_pipe(pipe_slow); 3729 %} 3730 3731 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3732 effect(TEMP tmp); 3733 match(Set dst (ConvF2HF src)); 3734 ins_cost(125); 3735 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3736 ins_encode %{ 3737 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3738 %} 3739 ins_pipe( pipe_slow ); 3740 %} 3741 3742 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3743 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3744 effect(TEMP ktmp, TEMP rtmp); 3745 match(Set mem (StoreC mem (ConvF2HF src))); 3746 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3747 ins_encode %{ 3748 __ movl($rtmp$$Register, 0x1); 3749 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3750 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3751 %} 3752 ins_pipe( pipe_slow ); 3753 %} 3754 3755 instruct vconvF2HF(vec dst, vec src) %{ 3756 match(Set dst (VectorCastF2HF src)); 3757 format %{ "vector_conv_F2HF $dst $src" %} 3758 ins_encode %{ 3759 int vlen_enc = vector_length_encoding(this, $src); 3760 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3761 %} 3762 ins_pipe( pipe_slow ); 3763 %} 3764 3765 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3766 predicate(n->as_StoreVector()->memory_size() >= 16); 3767 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3768 format %{ "vcvtps2ph $mem,$src" %} 3769 ins_encode %{ 3770 int vlen_enc = vector_length_encoding(this, $src); 3771 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3772 %} 3773 ins_pipe( pipe_slow ); 3774 %} 3775 3776 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3777 match(Set dst (ConvHF2F src)); 3778 format %{ "vcvtph2ps $dst,$src" %} 3779 ins_encode %{ 3780 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3781 %} 3782 ins_pipe( pipe_slow ); 3783 %} 3784 3785 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3786 match(Set dst (VectorCastHF2F (LoadVector mem))); 3787 format %{ "vcvtph2ps $dst,$mem" %} 3788 ins_encode %{ 3789 int vlen_enc = vector_length_encoding(this); 3790 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3791 %} 3792 ins_pipe( pipe_slow ); 3793 %} 3794 3795 instruct vconvHF2F(vec dst, vec src) %{ 3796 match(Set dst (VectorCastHF2F src)); 3797 ins_cost(125); 3798 format %{ "vector_conv_HF2F $dst,$src" %} 3799 ins_encode %{ 3800 int vlen_enc = vector_length_encoding(this); 3801 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3802 %} 3803 ins_pipe( pipe_slow ); 3804 %} 3805 3806 // ---------------------------------------- VectorReinterpret ------------------------------------ 3807 instruct reinterpret_mask(kReg dst) %{ 3808 predicate(n->bottom_type()->isa_vectmask() && 3809 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3810 match(Set dst (VectorReinterpret dst)); 3811 ins_cost(125); 3812 format %{ "vector_reinterpret $dst\t!" %} 3813 ins_encode %{ 3814 // empty 3815 %} 3816 ins_pipe( pipe_slow ); 3817 %} 3818 3819 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3820 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3821 n->bottom_type()->isa_vectmask() && 3822 n->in(1)->bottom_type()->isa_vectmask() && 3823 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3824 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3825 match(Set dst (VectorReinterpret src)); 3826 effect(TEMP xtmp); 3827 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3828 ins_encode %{ 3829 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3830 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3831 assert(src_sz == dst_sz , "src and dst size mismatch"); 3832 int vlen_enc = vector_length_encoding(src_sz); 3833 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3834 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3835 %} 3836 ins_pipe( pipe_slow ); 3837 %} 3838 3839 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3840 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3841 n->bottom_type()->isa_vectmask() && 3842 n->in(1)->bottom_type()->isa_vectmask() && 3843 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3844 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3845 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3846 match(Set dst (VectorReinterpret src)); 3847 effect(TEMP xtmp); 3848 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3849 ins_encode %{ 3850 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3851 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3852 assert(src_sz == dst_sz , "src and dst size mismatch"); 3853 int vlen_enc = vector_length_encoding(src_sz); 3854 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3855 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3856 %} 3857 ins_pipe( pipe_slow ); 3858 %} 3859 3860 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3861 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3862 n->bottom_type()->isa_vectmask() && 3863 n->in(1)->bottom_type()->isa_vectmask() && 3864 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3865 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3866 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3867 match(Set dst (VectorReinterpret src)); 3868 effect(TEMP xtmp); 3869 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3870 ins_encode %{ 3871 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3872 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3873 assert(src_sz == dst_sz , "src and dst size mismatch"); 3874 int vlen_enc = vector_length_encoding(src_sz); 3875 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3876 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3877 %} 3878 ins_pipe( pipe_slow ); 3879 %} 3880 3881 instruct reinterpret(vec dst) %{ 3882 predicate(!n->bottom_type()->isa_vectmask() && 3883 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3884 match(Set dst (VectorReinterpret dst)); 3885 ins_cost(125); 3886 format %{ "vector_reinterpret $dst\t!" %} 3887 ins_encode %{ 3888 // empty 3889 %} 3890 ins_pipe( pipe_slow ); 3891 %} 3892 3893 instruct reinterpret_expand(vec dst, vec src) %{ 3894 predicate(UseAVX == 0 && 3895 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3896 match(Set dst (VectorReinterpret src)); 3897 ins_cost(125); 3898 effect(TEMP dst); 3899 format %{ "vector_reinterpret_expand $dst,$src" %} 3900 ins_encode %{ 3901 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3902 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3903 3904 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3905 if (src_vlen_in_bytes == 4) { 3906 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3907 } else { 3908 assert(src_vlen_in_bytes == 8, ""); 3909 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3910 } 3911 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3912 %} 3913 ins_pipe( pipe_slow ); 3914 %} 3915 3916 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3917 predicate(UseAVX > 0 && 3918 !n->bottom_type()->isa_vectmask() && 3919 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3920 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3921 match(Set dst (VectorReinterpret src)); 3922 ins_cost(125); 3923 format %{ "vector_reinterpret_expand $dst,$src" %} 3924 ins_encode %{ 3925 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3926 %} 3927 ins_pipe( pipe_slow ); 3928 %} 3929 3930 3931 instruct vreinterpret_expand(legVec dst, vec src) %{ 3932 predicate(UseAVX > 0 && 3933 !n->bottom_type()->isa_vectmask() && 3934 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3935 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3936 match(Set dst (VectorReinterpret src)); 3937 ins_cost(125); 3938 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3939 ins_encode %{ 3940 switch (Matcher::vector_length_in_bytes(this, $src)) { 3941 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3942 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3943 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3944 default: ShouldNotReachHere(); 3945 } 3946 %} 3947 ins_pipe( pipe_slow ); 3948 %} 3949 3950 instruct reinterpret_shrink(vec dst, legVec src) %{ 3951 predicate(!n->bottom_type()->isa_vectmask() && 3952 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3953 match(Set dst (VectorReinterpret src)); 3954 ins_cost(125); 3955 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3956 ins_encode %{ 3957 switch (Matcher::vector_length_in_bytes(this)) { 3958 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3959 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3960 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3961 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3962 default: ShouldNotReachHere(); 3963 } 3964 %} 3965 ins_pipe( pipe_slow ); 3966 %} 3967 3968 // ---------------------------------------------------------------------------------------------------- 3969 3970 #ifdef _LP64 3971 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3972 match(Set dst (RoundDoubleMode src rmode)); 3973 format %{ "roundsd $dst,$src" %} 3974 ins_cost(150); 3975 ins_encode %{ 3976 assert(UseSSE >= 4, "required"); 3977 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) { 3978 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3979 } 3980 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3981 %} 3982 ins_pipe(pipe_slow); 3983 %} 3984 3985 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3986 match(Set dst (RoundDoubleMode con rmode)); 3987 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3988 ins_cost(150); 3989 ins_encode %{ 3990 assert(UseSSE >= 4, "required"); 3991 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3992 %} 3993 ins_pipe(pipe_slow); 3994 %} 3995 3996 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3997 predicate(Matcher::vector_length(n) < 8); 3998 match(Set dst (RoundDoubleModeV src rmode)); 3999 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 4000 ins_encode %{ 4001 assert(UseAVX > 0, "required"); 4002 int vlen_enc = vector_length_encoding(this); 4003 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 4004 %} 4005 ins_pipe( pipe_slow ); 4006 %} 4007 4008 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 4009 predicate(Matcher::vector_length(n) == 8); 4010 match(Set dst (RoundDoubleModeV src rmode)); 4011 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 4012 ins_encode %{ 4013 assert(UseAVX > 2, "required"); 4014 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 4015 %} 4016 ins_pipe( pipe_slow ); 4017 %} 4018 4019 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 4020 predicate(Matcher::vector_length(n) < 8); 4021 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 4022 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 4023 ins_encode %{ 4024 assert(UseAVX > 0, "required"); 4025 int vlen_enc = vector_length_encoding(this); 4026 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 4027 %} 4028 ins_pipe( pipe_slow ); 4029 %} 4030 4031 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 4032 predicate(Matcher::vector_length(n) == 8); 4033 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 4034 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 4035 ins_encode %{ 4036 assert(UseAVX > 2, "required"); 4037 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 4038 %} 4039 ins_pipe( pipe_slow ); 4040 %} 4041 #endif // _LP64 4042 4043 instruct onspinwait() %{ 4044 match(OnSpinWait); 4045 ins_cost(200); 4046 4047 format %{ 4048 $$template 4049 $$emit$$"pause\t! membar_onspinwait" 4050 %} 4051 ins_encode %{ 4052 __ pause(); 4053 %} 4054 ins_pipe(pipe_slow); 4055 %} 4056 4057 // a * b + c 4058 instruct fmaD_reg(regD a, regD b, regD c) %{ 4059 match(Set c (FmaD c (Binary a b))); 4060 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 4061 ins_cost(150); 4062 ins_encode %{ 4063 assert(UseFMA, "Needs FMA instructions support."); 4064 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4065 %} 4066 ins_pipe( pipe_slow ); 4067 %} 4068 4069 // a * b + c 4070 instruct fmaF_reg(regF a, regF b, regF c) %{ 4071 match(Set c (FmaF c (Binary a b))); 4072 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 4073 ins_cost(150); 4074 ins_encode %{ 4075 assert(UseFMA, "Needs FMA instructions support."); 4076 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4077 %} 4078 ins_pipe( pipe_slow ); 4079 %} 4080 4081 // ====================VECTOR INSTRUCTIONS===================================== 4082 4083 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4084 instruct MoveVec2Leg(legVec dst, vec src) %{ 4085 match(Set dst src); 4086 format %{ "" %} 4087 ins_encode %{ 4088 ShouldNotReachHere(); 4089 %} 4090 ins_pipe( fpu_reg_reg ); 4091 %} 4092 4093 instruct MoveLeg2Vec(vec dst, legVec src) %{ 4094 match(Set dst src); 4095 format %{ "" %} 4096 ins_encode %{ 4097 ShouldNotReachHere(); 4098 %} 4099 ins_pipe( fpu_reg_reg ); 4100 %} 4101 4102 // ============================================================================ 4103 4104 // Load vectors generic operand pattern 4105 instruct loadV(vec dst, memory mem) %{ 4106 match(Set dst (LoadVector mem)); 4107 ins_cost(125); 4108 format %{ "load_vector $dst,$mem" %} 4109 ins_encode %{ 4110 BasicType bt = Matcher::vector_element_basic_type(this); 4111 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 4112 %} 4113 ins_pipe( pipe_slow ); 4114 %} 4115 4116 // Store vectors generic operand pattern. 4117 instruct storeV(memory mem, vec src) %{ 4118 match(Set mem (StoreVector mem src)); 4119 ins_cost(145); 4120 format %{ "store_vector $mem,$src\n\t" %} 4121 ins_encode %{ 4122 switch (Matcher::vector_length_in_bytes(this, $src)) { 4123 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 4124 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 4125 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 4126 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 4127 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 4128 default: ShouldNotReachHere(); 4129 } 4130 %} 4131 ins_pipe( pipe_slow ); 4132 %} 4133 4134 // ---------------------------------------- Gather ------------------------------------ 4135 4136 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE 4137 4138 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 4139 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) && 4140 Matcher::vector_length_in_bytes(n) <= 32); 4141 match(Set dst (LoadVectorGather mem idx)); 4142 effect(TEMP dst, TEMP tmp, TEMP mask); 4143 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 4144 ins_encode %{ 4145 int vlen_enc = vector_length_encoding(this); 4146 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4147 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4148 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4149 __ lea($tmp$$Register, $mem$$Address); 4150 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4151 %} 4152 ins_pipe( pipe_slow ); 4153 %} 4154 4155 4156 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 4157 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4158 !is_subword_type(Matcher::vector_element_basic_type(n))); 4159 match(Set dst (LoadVectorGather mem idx)); 4160 effect(TEMP dst, TEMP tmp, TEMP ktmp); 4161 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 4162 ins_encode %{ 4163 int vlen_enc = vector_length_encoding(this); 4164 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4165 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister); 4166 __ lea($tmp$$Register, $mem$$Address); 4167 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4168 %} 4169 ins_pipe( pipe_slow ); 4170 %} 4171 4172 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4173 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4174 !is_subword_type(Matcher::vector_element_basic_type(n))); 4175 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4176 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4177 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4178 ins_encode %{ 4179 assert(UseAVX > 2, "sanity"); 4180 int vlen_enc = vector_length_encoding(this); 4181 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4182 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4183 // Note: Since gather instruction partially updates the opmask register used 4184 // for predication hense moving mask operand to a temporary. 4185 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4186 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4187 __ lea($tmp$$Register, $mem$$Address); 4188 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4189 %} 4190 ins_pipe( pipe_slow ); 4191 %} 4192 4193 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegI rtmp) %{ 4194 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4195 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4196 effect(TEMP tmp, TEMP rtmp); 4197 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %} 4198 ins_encode %{ 4199 int vlen_enc = vector_length_encoding(this); 4200 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4201 __ lea($tmp$$Register, $mem$$Address); 4202 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp$$Register, vlen_enc); 4203 %} 4204 ins_pipe( pipe_slow ); 4205 %} 4206 4207 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegP idx_base_temp, 4208 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4209 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4210 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4211 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4212 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4213 ins_encode %{ 4214 int vlen_enc = vector_length_encoding(this); 4215 int vector_len = Matcher::vector_length(this); 4216 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4217 __ lea($tmp$$Register, $mem$$Address); 4218 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4219 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, noreg, $xtmp1$$XMMRegister, 4220 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4221 %} 4222 ins_pipe( pipe_slow ); 4223 %} 4224 4225 instruct vgather_subwordLE8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegI rtmp, rFlagsReg cr) %{ 4226 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4227 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4228 effect(TEMP tmp, TEMP rtmp, KILL cr); 4229 format %{ "vector_gatherLE8_off $dst, $mem, $idx_base, $offset\t! using $tmp and $rtmp as TEMP" %} 4230 ins_encode %{ 4231 int vlen_enc = vector_length_encoding(this); 4232 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4233 __ lea($tmp$$Register, $mem$$Address); 4234 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, $rtmp$$Register, vlen_enc); 4235 %} 4236 ins_pipe( pipe_slow ); 4237 %} 4238 4239 4240 instruct vgather_subwordGT8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegP idx_base_temp, 4241 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4242 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4243 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4244 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4245 format %{ "vector_gatherGT8_off $dst, $mem, $idx_base, $offset\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4246 ins_encode %{ 4247 int vlen_enc = vector_length_encoding(this); 4248 int vector_len = Matcher::vector_length(this); 4249 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4250 __ lea($tmp$$Register, $mem$$Address); 4251 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4252 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, noreg, $xtmp1$$XMMRegister, 4253 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4254 %} 4255 ins_pipe( pipe_slow ); 4256 %} 4257 4258 4259 #ifdef _LP64 4260 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4261 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4262 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4263 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4264 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4265 ins_encode %{ 4266 int vlen_enc = vector_length_encoding(this); 4267 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4268 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4269 __ lea($tmp$$Register, $mem$$Address); 4270 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4271 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4272 %} 4273 ins_pipe( pipe_slow ); 4274 %} 4275 4276 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4277 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4278 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4279 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4280 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4281 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4282 ins_encode %{ 4283 int vlen_enc = vector_length_encoding(this); 4284 int vector_len = Matcher::vector_length(this); 4285 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4286 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4287 __ lea($tmp$$Register, $mem$$Address); 4288 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4289 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4290 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4291 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4292 %} 4293 ins_pipe( pipe_slow ); 4294 %} 4295 4296 instruct vgather_masked_subwordLE8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4297 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4298 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4299 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4300 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4301 ins_encode %{ 4302 int vlen_enc = vector_length_encoding(this); 4303 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4304 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4305 __ lea($tmp$$Register, $mem$$Address); 4306 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4307 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4308 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4309 %} 4310 ins_pipe( pipe_slow ); 4311 %} 4312 4313 instruct vgather_masked_subwordGT8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4314 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4315 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4316 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4317 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4318 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4319 ins_encode %{ 4320 int vlen_enc = vector_length_encoding(this); 4321 int vector_len = Matcher::vector_length(this); 4322 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4323 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4324 __ lea($tmp$$Register, $mem$$Address); 4325 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4326 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4327 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4328 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4329 %} 4330 ins_pipe( pipe_slow ); 4331 %} 4332 4333 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4334 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4335 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4336 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4337 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4338 ins_encode %{ 4339 int vlen_enc = vector_length_encoding(this); 4340 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4341 __ lea($tmp$$Register, $mem$$Address); 4342 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4343 if (elem_bt == T_SHORT) { 4344 __ movl($mask_idx$$Register, 0x55555555); 4345 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4346 } 4347 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4348 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4349 %} 4350 ins_pipe( pipe_slow ); 4351 %} 4352 4353 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4354 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4355 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4356 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4357 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4358 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4359 ins_encode %{ 4360 int vlen_enc = vector_length_encoding(this); 4361 int vector_len = Matcher::vector_length(this); 4362 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4363 __ lea($tmp$$Register, $mem$$Address); 4364 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4365 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4366 if (elem_bt == T_SHORT) { 4367 __ movl($mask_idx$$Register, 0x55555555); 4368 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4369 } 4370 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4371 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4372 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4373 %} 4374 ins_pipe( pipe_slow ); 4375 %} 4376 4377 instruct vgather_masked_subwordLE8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4378 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4379 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4380 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4381 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4382 ins_encode %{ 4383 int vlen_enc = vector_length_encoding(this); 4384 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4385 __ lea($tmp$$Register, $mem$$Address); 4386 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4387 if (elem_bt == T_SHORT) { 4388 __ movl($mask_idx$$Register, 0x55555555); 4389 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4390 } 4391 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4392 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4393 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4394 %} 4395 ins_pipe( pipe_slow ); 4396 %} 4397 4398 instruct vgather_masked_subwordGT8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4399 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4400 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4401 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4402 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4403 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4404 ins_encode %{ 4405 int vlen_enc = vector_length_encoding(this); 4406 int vector_len = Matcher::vector_length(this); 4407 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4408 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4409 __ lea($tmp$$Register, $mem$$Address); 4410 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4411 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4412 if (elem_bt == T_SHORT) { 4413 __ movl($mask_idx$$Register, 0x55555555); 4414 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4415 } 4416 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4417 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4418 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4419 %} 4420 ins_pipe( pipe_slow ); 4421 %} 4422 #endif 4423 4424 // ====================Scatter======================================= 4425 4426 // Scatter INT, LONG, FLOAT, DOUBLE 4427 4428 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4429 predicate(UseAVX > 2); 4430 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4431 effect(TEMP tmp, TEMP ktmp); 4432 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4433 ins_encode %{ 4434 int vlen_enc = vector_length_encoding(this, $src); 4435 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4436 4437 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4438 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4439 4440 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4441 __ lea($tmp$$Register, $mem$$Address); 4442 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4443 %} 4444 ins_pipe( pipe_slow ); 4445 %} 4446 4447 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4448 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4449 effect(TEMP tmp, TEMP ktmp); 4450 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4451 ins_encode %{ 4452 int vlen_enc = vector_length_encoding(this, $src); 4453 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4454 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4455 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4456 // Note: Since scatter instruction partially updates the opmask register used 4457 // for predication hense moving mask operand to a temporary. 4458 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4459 __ lea($tmp$$Register, $mem$$Address); 4460 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4461 %} 4462 ins_pipe( pipe_slow ); 4463 %} 4464 4465 // ====================REPLICATE======================================= 4466 4467 // Replicate byte scalar to be vector 4468 instruct vReplB_reg(vec dst, rRegI src) %{ 4469 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4470 match(Set dst (Replicate src)); 4471 format %{ "replicateB $dst,$src" %} 4472 ins_encode %{ 4473 uint vlen = Matcher::vector_length(this); 4474 if (UseAVX >= 2) { 4475 int vlen_enc = vector_length_encoding(this); 4476 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4477 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4478 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4479 } else { 4480 __ movdl($dst$$XMMRegister, $src$$Register); 4481 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4482 } 4483 } else { 4484 assert(UseAVX < 2, ""); 4485 __ movdl($dst$$XMMRegister, $src$$Register); 4486 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4487 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4488 if (vlen >= 16) { 4489 assert(vlen == 16, ""); 4490 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4491 } 4492 } 4493 %} 4494 ins_pipe( pipe_slow ); 4495 %} 4496 4497 instruct ReplB_mem(vec dst, memory mem) %{ 4498 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4499 match(Set dst (Replicate (LoadB mem))); 4500 format %{ "replicateB $dst,$mem" %} 4501 ins_encode %{ 4502 int vlen_enc = vector_length_encoding(this); 4503 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4504 %} 4505 ins_pipe( pipe_slow ); 4506 %} 4507 4508 // ====================ReplicateS======================================= 4509 4510 instruct vReplS_reg(vec dst, rRegI src) %{ 4511 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4512 match(Set dst (Replicate src)); 4513 format %{ "replicateS $dst,$src" %} 4514 ins_encode %{ 4515 uint vlen = Matcher::vector_length(this); 4516 int vlen_enc = vector_length_encoding(this); 4517 if (UseAVX >= 2) { 4518 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4519 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4520 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4521 } else { 4522 __ movdl($dst$$XMMRegister, $src$$Register); 4523 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4524 } 4525 } else { 4526 assert(UseAVX < 2, ""); 4527 __ movdl($dst$$XMMRegister, $src$$Register); 4528 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4529 if (vlen >= 8) { 4530 assert(vlen == 8, ""); 4531 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4532 } 4533 } 4534 %} 4535 ins_pipe( pipe_slow ); 4536 %} 4537 4538 #ifdef _LP64 4539 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{ 4540 match(Set dst (Replicate con)); 4541 effect(TEMP rtmp); 4542 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %} 4543 ins_encode %{ 4544 int vlen_enc = vector_length_encoding(this); 4545 BasicType bt = Matcher::vector_element_basic_type(this); 4546 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, ""); 4547 __ movl($rtmp$$Register, $con$$constant); 4548 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4549 %} 4550 ins_pipe( pipe_slow ); 4551 %} 4552 4553 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{ 4554 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT); 4555 match(Set dst (Replicate src)); 4556 effect(TEMP rtmp); 4557 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %} 4558 ins_encode %{ 4559 int vlen_enc = vector_length_encoding(this); 4560 __ vmovw($rtmp$$Register, $src$$XMMRegister); 4561 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4562 %} 4563 ins_pipe( pipe_slow ); 4564 %} 4565 #endif 4566 4567 instruct ReplS_mem(vec dst, memory mem) %{ 4568 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4569 match(Set dst (Replicate (LoadS mem))); 4570 format %{ "replicateS $dst,$mem" %} 4571 ins_encode %{ 4572 int vlen_enc = vector_length_encoding(this); 4573 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4574 %} 4575 ins_pipe( pipe_slow ); 4576 %} 4577 4578 // ====================ReplicateI======================================= 4579 4580 instruct ReplI_reg(vec dst, rRegI src) %{ 4581 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4582 match(Set dst (Replicate src)); 4583 format %{ "replicateI $dst,$src" %} 4584 ins_encode %{ 4585 uint vlen = Matcher::vector_length(this); 4586 int vlen_enc = vector_length_encoding(this); 4587 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4588 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4589 } else if (VM_Version::supports_avx2()) { 4590 __ movdl($dst$$XMMRegister, $src$$Register); 4591 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4592 } else { 4593 __ movdl($dst$$XMMRegister, $src$$Register); 4594 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4595 } 4596 %} 4597 ins_pipe( pipe_slow ); 4598 %} 4599 4600 instruct ReplI_mem(vec dst, memory mem) %{ 4601 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4602 match(Set dst (Replicate (LoadI mem))); 4603 format %{ "replicateI $dst,$mem" %} 4604 ins_encode %{ 4605 int vlen_enc = vector_length_encoding(this); 4606 if (VM_Version::supports_avx2()) { 4607 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4608 } else if (VM_Version::supports_avx()) { 4609 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4610 } else { 4611 __ movdl($dst$$XMMRegister, $mem$$Address); 4612 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4613 } 4614 %} 4615 ins_pipe( pipe_slow ); 4616 %} 4617 4618 instruct ReplI_imm(vec dst, immI con) %{ 4619 predicate(Matcher::is_non_long_integral_vector(n)); 4620 match(Set dst (Replicate con)); 4621 format %{ "replicateI $dst,$con" %} 4622 ins_encode %{ 4623 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4624 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) / 4625 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4626 BasicType bt = Matcher::vector_element_basic_type(this); 4627 int vlen = Matcher::vector_length_in_bytes(this); 4628 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4629 %} 4630 ins_pipe( pipe_slow ); 4631 %} 4632 4633 // Replicate scalar zero to be vector 4634 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4635 predicate(Matcher::is_non_long_integral_vector(n)); 4636 match(Set dst (Replicate zero)); 4637 format %{ "replicateI $dst,$zero" %} 4638 ins_encode %{ 4639 int vlen_enc = vector_length_encoding(this); 4640 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4641 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4642 } else { 4643 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4644 } 4645 %} 4646 ins_pipe( fpu_reg_reg ); 4647 %} 4648 4649 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4650 predicate(UseSSE >= 2 && Matcher::is_non_long_integral_vector(n)); 4651 match(Set dst (Replicate con)); 4652 format %{ "vallones $dst" %} 4653 ins_encode %{ 4654 int vector_len = vector_length_encoding(this); 4655 __ vallones($dst$$XMMRegister, vector_len); 4656 %} 4657 ins_pipe( pipe_slow ); 4658 %} 4659 4660 // ====================ReplicateL======================================= 4661 4662 #ifdef _LP64 4663 // Replicate long (8 byte) scalar to be vector 4664 instruct ReplL_reg(vec dst, rRegL src) %{ 4665 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4666 match(Set dst (Replicate src)); 4667 format %{ "replicateL $dst,$src" %} 4668 ins_encode %{ 4669 int vlen = Matcher::vector_length(this); 4670 int vlen_enc = vector_length_encoding(this); 4671 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4672 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4673 } else if (VM_Version::supports_avx2()) { 4674 __ movdq($dst$$XMMRegister, $src$$Register); 4675 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4676 } else { 4677 __ movdq($dst$$XMMRegister, $src$$Register); 4678 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4679 } 4680 %} 4681 ins_pipe( pipe_slow ); 4682 %} 4683 #else // _LP64 4684 // Replicate long (8 byte) scalar to be vector 4685 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 4686 predicate(Matcher::vector_length(n) <= 4 && Matcher::vector_element_basic_type(n) == T_LONG); 4687 match(Set dst (Replicate src)); 4688 effect(TEMP dst, USE src, TEMP tmp); 4689 format %{ "replicateL $dst,$src" %} 4690 ins_encode %{ 4691 uint vlen = Matcher::vector_length(this); 4692 if (vlen == 2) { 4693 __ movdl($dst$$XMMRegister, $src$$Register); 4694 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4695 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4696 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4697 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4698 int vlen_enc = Assembler::AVX_256bit; 4699 __ movdl($dst$$XMMRegister, $src$$Register); 4700 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4701 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4702 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4703 } else { 4704 __ movdl($dst$$XMMRegister, $src$$Register); 4705 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4706 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4707 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4708 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4709 } 4710 %} 4711 ins_pipe( pipe_slow ); 4712 %} 4713 4714 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 4715 predicate(Matcher::vector_length(n) == 8 && Matcher::vector_element_basic_type(n) == T_LONG); 4716 match(Set dst (Replicate src)); 4717 effect(TEMP dst, USE src, TEMP tmp); 4718 format %{ "replicateL $dst,$src" %} 4719 ins_encode %{ 4720 if (VM_Version::supports_avx512vl()) { 4721 __ movdl($dst$$XMMRegister, $src$$Register); 4722 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4723 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4724 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4725 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4726 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4727 } else { 4728 int vlen_enc = Assembler::AVX_512bit; 4729 __ movdl($dst$$XMMRegister, $src$$Register); 4730 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4731 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4732 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4733 } 4734 %} 4735 ins_pipe( pipe_slow ); 4736 %} 4737 #endif // _LP64 4738 4739 instruct ReplL_mem(vec dst, memory mem) %{ 4740 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4741 match(Set dst (Replicate (LoadL mem))); 4742 format %{ "replicateL $dst,$mem" %} 4743 ins_encode %{ 4744 int vlen_enc = vector_length_encoding(this); 4745 if (VM_Version::supports_avx2()) { 4746 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4747 } else if (VM_Version::supports_sse3()) { 4748 __ movddup($dst$$XMMRegister, $mem$$Address); 4749 } else { 4750 __ movq($dst$$XMMRegister, $mem$$Address); 4751 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4752 } 4753 %} 4754 ins_pipe( pipe_slow ); 4755 %} 4756 4757 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4758 instruct ReplL_imm(vec dst, immL con) %{ 4759 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4760 match(Set dst (Replicate con)); 4761 format %{ "replicateL $dst,$con" %} 4762 ins_encode %{ 4763 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4764 int vlen = Matcher::vector_length_in_bytes(this); 4765 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4766 %} 4767 ins_pipe( pipe_slow ); 4768 %} 4769 4770 instruct ReplL_zero(vec dst, immL0 zero) %{ 4771 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4772 match(Set dst (Replicate zero)); 4773 format %{ "replicateL $dst,$zero" %} 4774 ins_encode %{ 4775 int vlen_enc = vector_length_encoding(this); 4776 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4777 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4778 } else { 4779 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4780 } 4781 %} 4782 ins_pipe( fpu_reg_reg ); 4783 %} 4784 4785 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4786 predicate(UseSSE >= 2 && Matcher::vector_element_basic_type(n) == T_LONG); 4787 match(Set dst (Replicate con)); 4788 format %{ "vallones $dst" %} 4789 ins_encode %{ 4790 int vector_len = vector_length_encoding(this); 4791 __ vallones($dst$$XMMRegister, vector_len); 4792 %} 4793 ins_pipe( pipe_slow ); 4794 %} 4795 4796 // ====================ReplicateF======================================= 4797 4798 instruct vReplF_reg(vec dst, vlRegF src) %{ 4799 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4800 match(Set dst (Replicate src)); 4801 format %{ "replicateF $dst,$src" %} 4802 ins_encode %{ 4803 uint vlen = Matcher::vector_length(this); 4804 int vlen_enc = vector_length_encoding(this); 4805 if (vlen <= 4) { 4806 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4807 } else if (VM_Version::supports_avx2()) { 4808 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4809 } else { 4810 assert(vlen == 8, "sanity"); 4811 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4812 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4813 } 4814 %} 4815 ins_pipe( pipe_slow ); 4816 %} 4817 4818 instruct ReplF_reg(vec dst, vlRegF src) %{ 4819 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4820 match(Set dst (Replicate src)); 4821 format %{ "replicateF $dst,$src" %} 4822 ins_encode %{ 4823 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4824 %} 4825 ins_pipe( pipe_slow ); 4826 %} 4827 4828 instruct ReplF_mem(vec dst, memory mem) %{ 4829 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4830 match(Set dst (Replicate (LoadF mem))); 4831 format %{ "replicateF $dst,$mem" %} 4832 ins_encode %{ 4833 int vlen_enc = vector_length_encoding(this); 4834 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4835 %} 4836 ins_pipe( pipe_slow ); 4837 %} 4838 4839 // Replicate float scalar immediate to be vector by loading from const table. 4840 instruct ReplF_imm(vec dst, immF con) %{ 4841 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4842 match(Set dst (Replicate con)); 4843 format %{ "replicateF $dst,$con" %} 4844 ins_encode %{ 4845 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant, 4846 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4)); 4847 int vlen = Matcher::vector_length_in_bytes(this); 4848 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4849 %} 4850 ins_pipe( pipe_slow ); 4851 %} 4852 4853 instruct ReplF_zero(vec dst, immF0 zero) %{ 4854 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4855 match(Set dst (Replicate zero)); 4856 format %{ "replicateF $dst,$zero" %} 4857 ins_encode %{ 4858 int vlen_enc = vector_length_encoding(this); 4859 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4860 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4861 } else { 4862 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4863 } 4864 %} 4865 ins_pipe( fpu_reg_reg ); 4866 %} 4867 4868 // ====================ReplicateD======================================= 4869 4870 // Replicate double (8 bytes) scalar to be vector 4871 instruct vReplD_reg(vec dst, vlRegD src) %{ 4872 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4873 match(Set dst (Replicate src)); 4874 format %{ "replicateD $dst,$src" %} 4875 ins_encode %{ 4876 uint vlen = Matcher::vector_length(this); 4877 int vlen_enc = vector_length_encoding(this); 4878 if (vlen <= 2) { 4879 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4880 } else if (VM_Version::supports_avx2()) { 4881 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4882 } else { 4883 assert(vlen == 4, "sanity"); 4884 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4885 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4886 } 4887 %} 4888 ins_pipe( pipe_slow ); 4889 %} 4890 4891 instruct ReplD_reg(vec dst, vlRegD src) %{ 4892 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4893 match(Set dst (Replicate src)); 4894 format %{ "replicateD $dst,$src" %} 4895 ins_encode %{ 4896 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4897 %} 4898 ins_pipe( pipe_slow ); 4899 %} 4900 4901 instruct ReplD_mem(vec dst, memory mem) %{ 4902 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4903 match(Set dst (Replicate (LoadD mem))); 4904 format %{ "replicateD $dst,$mem" %} 4905 ins_encode %{ 4906 if (Matcher::vector_length(this) >= 4) { 4907 int vlen_enc = vector_length_encoding(this); 4908 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4909 } else { 4910 __ movddup($dst$$XMMRegister, $mem$$Address); 4911 } 4912 %} 4913 ins_pipe( pipe_slow ); 4914 %} 4915 4916 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4917 instruct ReplD_imm(vec dst, immD con) %{ 4918 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4919 match(Set dst (Replicate con)); 4920 format %{ "replicateD $dst,$con" %} 4921 ins_encode %{ 4922 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4923 int vlen = Matcher::vector_length_in_bytes(this); 4924 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4925 %} 4926 ins_pipe( pipe_slow ); 4927 %} 4928 4929 instruct ReplD_zero(vec dst, immD0 zero) %{ 4930 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4931 match(Set dst (Replicate zero)); 4932 format %{ "replicateD $dst,$zero" %} 4933 ins_encode %{ 4934 int vlen_enc = vector_length_encoding(this); 4935 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4936 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4937 } else { 4938 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4939 } 4940 %} 4941 ins_pipe( fpu_reg_reg ); 4942 %} 4943 4944 // ====================VECTOR INSERT======================================= 4945 4946 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4947 predicate(Matcher::vector_length_in_bytes(n) < 32); 4948 match(Set dst (VectorInsert (Binary dst val) idx)); 4949 format %{ "vector_insert $dst,$val,$idx" %} 4950 ins_encode %{ 4951 assert(UseSSE >= 4, "required"); 4952 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4953 4954 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4955 4956 assert(is_integral_type(elem_bt), ""); 4957 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4958 4959 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4960 %} 4961 ins_pipe( pipe_slow ); 4962 %} 4963 4964 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4965 predicate(Matcher::vector_length_in_bytes(n) == 32); 4966 match(Set dst (VectorInsert (Binary src val) idx)); 4967 effect(TEMP vtmp); 4968 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4969 ins_encode %{ 4970 int vlen_enc = Assembler::AVX_256bit; 4971 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4972 int elem_per_lane = 16/type2aelembytes(elem_bt); 4973 int log2epr = log2(elem_per_lane); 4974 4975 assert(is_integral_type(elem_bt), "sanity"); 4976 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4977 4978 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4979 uint y_idx = ($idx$$constant >> log2epr) & 1; 4980 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4981 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4982 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4983 %} 4984 ins_pipe( pipe_slow ); 4985 %} 4986 4987 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4988 predicate(Matcher::vector_length_in_bytes(n) == 64); 4989 match(Set dst (VectorInsert (Binary src val) idx)); 4990 effect(TEMP vtmp); 4991 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4992 ins_encode %{ 4993 assert(UseAVX > 2, "sanity"); 4994 4995 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4996 int elem_per_lane = 16/type2aelembytes(elem_bt); 4997 int log2epr = log2(elem_per_lane); 4998 4999 assert(is_integral_type(elem_bt), ""); 5000 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5001 5002 uint x_idx = $idx$$constant & right_n_bits(log2epr); 5003 uint y_idx = ($idx$$constant >> log2epr) & 3; 5004 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5005 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 5006 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5007 %} 5008 ins_pipe( pipe_slow ); 5009 %} 5010 5011 #ifdef _LP64 5012 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 5013 predicate(Matcher::vector_length(n) == 2); 5014 match(Set dst (VectorInsert (Binary dst val) idx)); 5015 format %{ "vector_insert $dst,$val,$idx" %} 5016 ins_encode %{ 5017 assert(UseSSE >= 4, "required"); 5018 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 5019 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5020 5021 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 5022 %} 5023 ins_pipe( pipe_slow ); 5024 %} 5025 5026 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 5027 predicate(Matcher::vector_length(n) == 4); 5028 match(Set dst (VectorInsert (Binary src val) idx)); 5029 effect(TEMP vtmp); 5030 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5031 ins_encode %{ 5032 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 5033 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5034 5035 uint x_idx = $idx$$constant & right_n_bits(1); 5036 uint y_idx = ($idx$$constant >> 1) & 1; 5037 int vlen_enc = Assembler::AVX_256bit; 5038 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5039 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 5040 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5041 %} 5042 ins_pipe( pipe_slow ); 5043 %} 5044 5045 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 5046 predicate(Matcher::vector_length(n) == 8); 5047 match(Set dst (VectorInsert (Binary src val) idx)); 5048 effect(TEMP vtmp); 5049 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5050 ins_encode %{ 5051 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 5052 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5053 5054 uint x_idx = $idx$$constant & right_n_bits(1); 5055 uint y_idx = ($idx$$constant >> 1) & 3; 5056 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5057 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 5058 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5059 %} 5060 ins_pipe( pipe_slow ); 5061 %} 5062 #endif 5063 5064 instruct insertF(vec dst, regF val, immU8 idx) %{ 5065 predicate(Matcher::vector_length(n) < 8); 5066 match(Set dst (VectorInsert (Binary dst val) idx)); 5067 format %{ "vector_insert $dst,$val,$idx" %} 5068 ins_encode %{ 5069 assert(UseSSE >= 4, "sanity"); 5070 5071 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 5072 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5073 5074 uint x_idx = $idx$$constant & right_n_bits(2); 5075 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5076 %} 5077 ins_pipe( pipe_slow ); 5078 %} 5079 5080 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 5081 predicate(Matcher::vector_length(n) >= 8); 5082 match(Set dst (VectorInsert (Binary src val) idx)); 5083 effect(TEMP vtmp); 5084 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5085 ins_encode %{ 5086 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 5087 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5088 5089 int vlen = Matcher::vector_length(this); 5090 uint x_idx = $idx$$constant & right_n_bits(2); 5091 if (vlen == 8) { 5092 uint y_idx = ($idx$$constant >> 2) & 1; 5093 int vlen_enc = Assembler::AVX_256bit; 5094 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5095 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5096 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5097 } else { 5098 assert(vlen == 16, "sanity"); 5099 uint y_idx = ($idx$$constant >> 2) & 3; 5100 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5101 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5102 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5103 } 5104 %} 5105 ins_pipe( pipe_slow ); 5106 %} 5107 5108 #ifdef _LP64 5109 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 5110 predicate(Matcher::vector_length(n) == 2); 5111 match(Set dst (VectorInsert (Binary dst val) idx)); 5112 effect(TEMP tmp); 5113 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 5114 ins_encode %{ 5115 assert(UseSSE >= 4, "sanity"); 5116 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5117 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5118 5119 __ movq($tmp$$Register, $val$$XMMRegister); 5120 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 5121 %} 5122 ins_pipe( pipe_slow ); 5123 %} 5124 5125 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 5126 predicate(Matcher::vector_length(n) == 4); 5127 match(Set dst (VectorInsert (Binary src val) idx)); 5128 effect(TEMP vtmp, TEMP tmp); 5129 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 5130 ins_encode %{ 5131 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5132 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5133 5134 uint x_idx = $idx$$constant & right_n_bits(1); 5135 uint y_idx = ($idx$$constant >> 1) & 1; 5136 int vlen_enc = Assembler::AVX_256bit; 5137 __ movq($tmp$$Register, $val$$XMMRegister); 5138 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5139 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 5140 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5141 %} 5142 ins_pipe( pipe_slow ); 5143 %} 5144 5145 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 5146 predicate(Matcher::vector_length(n) == 8); 5147 match(Set dst (VectorInsert (Binary src val) idx)); 5148 effect(TEMP tmp, TEMP vtmp); 5149 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5150 ins_encode %{ 5151 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5152 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5153 5154 uint x_idx = $idx$$constant & right_n_bits(1); 5155 uint y_idx = ($idx$$constant >> 1) & 3; 5156 __ movq($tmp$$Register, $val$$XMMRegister); 5157 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5158 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 5159 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5160 %} 5161 ins_pipe( pipe_slow ); 5162 %} 5163 #endif 5164 5165 // ====================REDUCTION ARITHMETIC======================================= 5166 5167 // =======================Int Reduction========================================== 5168 5169 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5170 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 5171 match(Set dst (AddReductionVI src1 src2)); 5172 match(Set dst (MulReductionVI src1 src2)); 5173 match(Set dst (AndReductionV src1 src2)); 5174 match(Set dst ( OrReductionV src1 src2)); 5175 match(Set dst (XorReductionV src1 src2)); 5176 match(Set dst (MinReductionV src1 src2)); 5177 match(Set dst (MaxReductionV src1 src2)); 5178 effect(TEMP vtmp1, TEMP vtmp2); 5179 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5180 ins_encode %{ 5181 int opcode = this->ideal_Opcode(); 5182 int vlen = Matcher::vector_length(this, $src2); 5183 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5184 %} 5185 ins_pipe( pipe_slow ); 5186 %} 5187 5188 // =======================Long Reduction========================================== 5189 5190 #ifdef _LP64 5191 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5192 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 5193 match(Set dst (AddReductionVL src1 src2)); 5194 match(Set dst (MulReductionVL src1 src2)); 5195 match(Set dst (AndReductionV src1 src2)); 5196 match(Set dst ( OrReductionV src1 src2)); 5197 match(Set dst (XorReductionV src1 src2)); 5198 match(Set dst (MinReductionV src1 src2)); 5199 match(Set dst (MaxReductionV src1 src2)); 5200 effect(TEMP vtmp1, TEMP vtmp2); 5201 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5202 ins_encode %{ 5203 int opcode = this->ideal_Opcode(); 5204 int vlen = Matcher::vector_length(this, $src2); 5205 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5206 %} 5207 ins_pipe( pipe_slow ); 5208 %} 5209 5210 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 5211 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 5212 match(Set dst (AddReductionVL src1 src2)); 5213 match(Set dst (MulReductionVL src1 src2)); 5214 match(Set dst (AndReductionV src1 src2)); 5215 match(Set dst ( OrReductionV src1 src2)); 5216 match(Set dst (XorReductionV src1 src2)); 5217 match(Set dst (MinReductionV src1 src2)); 5218 match(Set dst (MaxReductionV src1 src2)); 5219 effect(TEMP vtmp1, TEMP vtmp2); 5220 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5221 ins_encode %{ 5222 int opcode = this->ideal_Opcode(); 5223 int vlen = Matcher::vector_length(this, $src2); 5224 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5225 %} 5226 ins_pipe( pipe_slow ); 5227 %} 5228 #endif // _LP64 5229 5230 // =======================Float Reduction========================================== 5231 5232 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 5233 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src 5234 match(Set dst (AddReductionVF dst src)); 5235 match(Set dst (MulReductionVF dst src)); 5236 effect(TEMP dst, TEMP vtmp); 5237 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 5238 ins_encode %{ 5239 int opcode = this->ideal_Opcode(); 5240 int vlen = Matcher::vector_length(this, $src); 5241 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5242 %} 5243 ins_pipe( pipe_slow ); 5244 %} 5245 5246 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 5247 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5248 match(Set dst (AddReductionVF dst src)); 5249 match(Set dst (MulReductionVF dst src)); 5250 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5251 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5252 ins_encode %{ 5253 int opcode = this->ideal_Opcode(); 5254 int vlen = Matcher::vector_length(this, $src); 5255 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5256 %} 5257 ins_pipe( pipe_slow ); 5258 %} 5259 5260 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5261 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src 5262 match(Set dst (AddReductionVF dst src)); 5263 match(Set dst (MulReductionVF dst src)); 5264 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5265 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5266 ins_encode %{ 5267 int opcode = this->ideal_Opcode(); 5268 int vlen = Matcher::vector_length(this, $src); 5269 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5270 %} 5271 ins_pipe( pipe_slow ); 5272 %} 5273 5274 5275 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{ 5276 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5277 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5278 // src1 contains reduction identity 5279 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5280 match(Set dst (AddReductionVF src1 src2)); 5281 match(Set dst (MulReductionVF src1 src2)); 5282 effect(TEMP dst); 5283 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %} 5284 ins_encode %{ 5285 int opcode = this->ideal_Opcode(); 5286 int vlen = Matcher::vector_length(this, $src2); 5287 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5288 %} 5289 ins_pipe( pipe_slow ); 5290 %} 5291 5292 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{ 5293 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5294 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5295 // src1 contains reduction identity 5296 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5297 match(Set dst (AddReductionVF src1 src2)); 5298 match(Set dst (MulReductionVF src1 src2)); 5299 effect(TEMP dst, TEMP vtmp); 5300 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5301 ins_encode %{ 5302 int opcode = this->ideal_Opcode(); 5303 int vlen = Matcher::vector_length(this, $src2); 5304 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5305 %} 5306 ins_pipe( pipe_slow ); 5307 %} 5308 5309 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{ 5310 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5311 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5312 // src1 contains reduction identity 5313 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5314 match(Set dst (AddReductionVF src1 src2)); 5315 match(Set dst (MulReductionVF src1 src2)); 5316 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5317 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5318 ins_encode %{ 5319 int opcode = this->ideal_Opcode(); 5320 int vlen = Matcher::vector_length(this, $src2); 5321 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5322 %} 5323 ins_pipe( pipe_slow ); 5324 %} 5325 5326 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5327 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5328 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5329 // src1 contains reduction identity 5330 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2 5331 match(Set dst (AddReductionVF src1 src2)); 5332 match(Set dst (MulReductionVF src1 src2)); 5333 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5334 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5335 ins_encode %{ 5336 int opcode = this->ideal_Opcode(); 5337 int vlen = Matcher::vector_length(this, $src2); 5338 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5339 %} 5340 ins_pipe( pipe_slow ); 5341 %} 5342 5343 // =======================Double Reduction========================================== 5344 5345 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 5346 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src 5347 match(Set dst (AddReductionVD dst src)); 5348 match(Set dst (MulReductionVD dst src)); 5349 effect(TEMP dst, TEMP vtmp); 5350 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 5351 ins_encode %{ 5352 int opcode = this->ideal_Opcode(); 5353 int vlen = Matcher::vector_length(this, $src); 5354 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5355 %} 5356 ins_pipe( pipe_slow ); 5357 %} 5358 5359 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 5360 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src 5361 match(Set dst (AddReductionVD dst src)); 5362 match(Set dst (MulReductionVD dst src)); 5363 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5364 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5365 ins_encode %{ 5366 int opcode = this->ideal_Opcode(); 5367 int vlen = Matcher::vector_length(this, $src); 5368 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5369 %} 5370 ins_pipe( pipe_slow ); 5371 %} 5372 5373 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5374 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5375 match(Set dst (AddReductionVD dst src)); 5376 match(Set dst (MulReductionVD dst src)); 5377 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5378 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5379 ins_encode %{ 5380 int opcode = this->ideal_Opcode(); 5381 int vlen = Matcher::vector_length(this, $src); 5382 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5383 %} 5384 ins_pipe( pipe_slow ); 5385 %} 5386 5387 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{ 5388 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5389 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5390 // src1 contains reduction identity 5391 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5392 match(Set dst (AddReductionVD src1 src2)); 5393 match(Set dst (MulReductionVD src1 src2)); 5394 effect(TEMP dst); 5395 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %} 5396 ins_encode %{ 5397 int opcode = this->ideal_Opcode(); 5398 int vlen = Matcher::vector_length(this, $src2); 5399 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5400 %} 5401 ins_pipe( pipe_slow ); 5402 %} 5403 5404 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{ 5405 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5406 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5407 // src1 contains reduction identity 5408 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5409 match(Set dst (AddReductionVD src1 src2)); 5410 match(Set dst (MulReductionVD src1 src2)); 5411 effect(TEMP dst, TEMP vtmp); 5412 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5413 ins_encode %{ 5414 int opcode = this->ideal_Opcode(); 5415 int vlen = Matcher::vector_length(this, $src2); 5416 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5417 %} 5418 ins_pipe( pipe_slow ); 5419 %} 5420 5421 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5422 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5423 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5424 // src1 contains reduction identity 5425 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5426 match(Set dst (AddReductionVD src1 src2)); 5427 match(Set dst (MulReductionVD src1 src2)); 5428 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5429 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5430 ins_encode %{ 5431 int opcode = this->ideal_Opcode(); 5432 int vlen = Matcher::vector_length(this, $src2); 5433 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5434 %} 5435 ins_pipe( pipe_slow ); 5436 %} 5437 5438 // =======================Byte Reduction========================================== 5439 5440 #ifdef _LP64 5441 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5442 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5443 match(Set dst (AddReductionVI src1 src2)); 5444 match(Set dst (AndReductionV src1 src2)); 5445 match(Set dst ( OrReductionV src1 src2)); 5446 match(Set dst (XorReductionV src1 src2)); 5447 match(Set dst (MinReductionV src1 src2)); 5448 match(Set dst (MaxReductionV src1 src2)); 5449 effect(TEMP vtmp1, TEMP vtmp2); 5450 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5451 ins_encode %{ 5452 int opcode = this->ideal_Opcode(); 5453 int vlen = Matcher::vector_length(this, $src2); 5454 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5455 %} 5456 ins_pipe( pipe_slow ); 5457 %} 5458 5459 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5460 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5461 match(Set dst (AddReductionVI src1 src2)); 5462 match(Set dst (AndReductionV src1 src2)); 5463 match(Set dst ( OrReductionV src1 src2)); 5464 match(Set dst (XorReductionV src1 src2)); 5465 match(Set dst (MinReductionV src1 src2)); 5466 match(Set dst (MaxReductionV src1 src2)); 5467 effect(TEMP vtmp1, TEMP vtmp2); 5468 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5469 ins_encode %{ 5470 int opcode = this->ideal_Opcode(); 5471 int vlen = Matcher::vector_length(this, $src2); 5472 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5473 %} 5474 ins_pipe( pipe_slow ); 5475 %} 5476 #endif 5477 5478 // =======================Short Reduction========================================== 5479 5480 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5481 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5482 match(Set dst (AddReductionVI src1 src2)); 5483 match(Set dst (MulReductionVI src1 src2)); 5484 match(Set dst (AndReductionV src1 src2)); 5485 match(Set dst ( OrReductionV src1 src2)); 5486 match(Set dst (XorReductionV src1 src2)); 5487 match(Set dst (MinReductionV src1 src2)); 5488 match(Set dst (MaxReductionV src1 src2)); 5489 effect(TEMP vtmp1, TEMP vtmp2); 5490 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5491 ins_encode %{ 5492 int opcode = this->ideal_Opcode(); 5493 int vlen = Matcher::vector_length(this, $src2); 5494 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5495 %} 5496 ins_pipe( pipe_slow ); 5497 %} 5498 5499 // =======================Mul Reduction========================================== 5500 5501 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5502 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5503 Matcher::vector_length(n->in(2)) <= 32); // src2 5504 match(Set dst (MulReductionVI src1 src2)); 5505 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5506 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5507 ins_encode %{ 5508 int opcode = this->ideal_Opcode(); 5509 int vlen = Matcher::vector_length(this, $src2); 5510 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5511 %} 5512 ins_pipe( pipe_slow ); 5513 %} 5514 5515 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5516 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5517 Matcher::vector_length(n->in(2)) == 64); // src2 5518 match(Set dst (MulReductionVI src1 src2)); 5519 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5520 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5521 ins_encode %{ 5522 int opcode = this->ideal_Opcode(); 5523 int vlen = Matcher::vector_length(this, $src2); 5524 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5525 %} 5526 ins_pipe( pipe_slow ); 5527 %} 5528 5529 //--------------------Min/Max Float Reduction -------------------- 5530 // Float Min Reduction 5531 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 5532 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5533 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5534 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5535 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5536 Matcher::vector_length(n->in(2)) == 2); 5537 match(Set dst (MinReductionV src1 src2)); 5538 match(Set dst (MaxReductionV src1 src2)); 5539 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5540 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5541 ins_encode %{ 5542 assert(UseAVX > 0, "sanity"); 5543 5544 int opcode = this->ideal_Opcode(); 5545 int vlen = Matcher::vector_length(this, $src2); 5546 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5547 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5548 %} 5549 ins_pipe( pipe_slow ); 5550 %} 5551 5552 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5553 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5554 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5555 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5556 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5557 Matcher::vector_length(n->in(2)) >= 4); 5558 match(Set dst (MinReductionV src1 src2)); 5559 match(Set dst (MaxReductionV src1 src2)); 5560 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5561 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5562 ins_encode %{ 5563 assert(UseAVX > 0, "sanity"); 5564 5565 int opcode = this->ideal_Opcode(); 5566 int vlen = Matcher::vector_length(this, $src2); 5567 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5568 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5569 %} 5570 ins_pipe( pipe_slow ); 5571 %} 5572 5573 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 5574 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5575 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5576 Matcher::vector_length(n->in(2)) == 2); 5577 match(Set dst (MinReductionV dst src)); 5578 match(Set dst (MaxReductionV dst src)); 5579 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5580 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5581 ins_encode %{ 5582 assert(UseAVX > 0, "sanity"); 5583 5584 int opcode = this->ideal_Opcode(); 5585 int vlen = Matcher::vector_length(this, $src); 5586 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5587 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5588 %} 5589 ins_pipe( pipe_slow ); 5590 %} 5591 5592 5593 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5594 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5595 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5596 Matcher::vector_length(n->in(2)) >= 4); 5597 match(Set dst (MinReductionV dst src)); 5598 match(Set dst (MaxReductionV dst src)); 5599 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5600 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5601 ins_encode %{ 5602 assert(UseAVX > 0, "sanity"); 5603 5604 int opcode = this->ideal_Opcode(); 5605 int vlen = Matcher::vector_length(this, $src); 5606 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5607 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5608 %} 5609 ins_pipe( pipe_slow ); 5610 %} 5611 5612 5613 //--------------------Min Double Reduction -------------------- 5614 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5615 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5616 rFlagsReg cr) %{ 5617 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5618 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5619 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5620 Matcher::vector_length(n->in(2)) == 2); 5621 match(Set dst (MinReductionV src1 src2)); 5622 match(Set dst (MaxReductionV src1 src2)); 5623 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5624 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5625 ins_encode %{ 5626 assert(UseAVX > 0, "sanity"); 5627 5628 int opcode = this->ideal_Opcode(); 5629 int vlen = Matcher::vector_length(this, $src2); 5630 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5631 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5632 %} 5633 ins_pipe( pipe_slow ); 5634 %} 5635 5636 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5637 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5638 rFlagsReg cr) %{ 5639 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5640 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5641 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5642 Matcher::vector_length(n->in(2)) >= 4); 5643 match(Set dst (MinReductionV src1 src2)); 5644 match(Set dst (MaxReductionV src1 src2)); 5645 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5646 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5647 ins_encode %{ 5648 assert(UseAVX > 0, "sanity"); 5649 5650 int opcode = this->ideal_Opcode(); 5651 int vlen = Matcher::vector_length(this, $src2); 5652 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5653 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5654 %} 5655 ins_pipe( pipe_slow ); 5656 %} 5657 5658 5659 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5660 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5661 rFlagsReg cr) %{ 5662 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5663 Matcher::vector_length(n->in(2)) == 2); 5664 match(Set dst (MinReductionV dst src)); 5665 match(Set dst (MaxReductionV dst src)); 5666 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5667 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5668 ins_encode %{ 5669 assert(UseAVX > 0, "sanity"); 5670 5671 int opcode = this->ideal_Opcode(); 5672 int vlen = Matcher::vector_length(this, $src); 5673 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5674 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5675 %} 5676 ins_pipe( pipe_slow ); 5677 %} 5678 5679 instruct minmax_reductionD_av(legRegD dst, legVec src, 5680 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5681 rFlagsReg cr) %{ 5682 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5683 Matcher::vector_length(n->in(2)) >= 4); 5684 match(Set dst (MinReductionV dst src)); 5685 match(Set dst (MaxReductionV dst src)); 5686 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5687 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5688 ins_encode %{ 5689 assert(UseAVX > 0, "sanity"); 5690 5691 int opcode = this->ideal_Opcode(); 5692 int vlen = Matcher::vector_length(this, $src); 5693 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5694 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5695 %} 5696 ins_pipe( pipe_slow ); 5697 %} 5698 5699 // ====================VECTOR ARITHMETIC======================================= 5700 5701 // --------------------------------- ADD -------------------------------------- 5702 5703 // Bytes vector add 5704 instruct vaddB(vec dst, vec src) %{ 5705 predicate(UseAVX == 0); 5706 match(Set dst (AddVB dst src)); 5707 format %{ "paddb $dst,$src\t! add packedB" %} 5708 ins_encode %{ 5709 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5710 %} 5711 ins_pipe( pipe_slow ); 5712 %} 5713 5714 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5715 predicate(UseAVX > 0); 5716 match(Set dst (AddVB src1 src2)); 5717 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5718 ins_encode %{ 5719 int vlen_enc = vector_length_encoding(this); 5720 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5721 %} 5722 ins_pipe( pipe_slow ); 5723 %} 5724 5725 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5726 predicate((UseAVX > 0) && 5727 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5728 match(Set dst (AddVB src (LoadVector mem))); 5729 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5730 ins_encode %{ 5731 int vlen_enc = vector_length_encoding(this); 5732 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5733 %} 5734 ins_pipe( pipe_slow ); 5735 %} 5736 5737 // Shorts/Chars vector add 5738 instruct vaddS(vec dst, vec src) %{ 5739 predicate(UseAVX == 0); 5740 match(Set dst (AddVS dst src)); 5741 format %{ "paddw $dst,$src\t! add packedS" %} 5742 ins_encode %{ 5743 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5744 %} 5745 ins_pipe( pipe_slow ); 5746 %} 5747 5748 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5749 predicate(UseAVX > 0); 5750 match(Set dst (AddVS src1 src2)); 5751 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5752 ins_encode %{ 5753 int vlen_enc = vector_length_encoding(this); 5754 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5755 %} 5756 ins_pipe( pipe_slow ); 5757 %} 5758 5759 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5760 predicate((UseAVX > 0) && 5761 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5762 match(Set dst (AddVS src (LoadVector mem))); 5763 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5764 ins_encode %{ 5765 int vlen_enc = vector_length_encoding(this); 5766 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5767 %} 5768 ins_pipe( pipe_slow ); 5769 %} 5770 5771 // Integers vector add 5772 instruct vaddI(vec dst, vec src) %{ 5773 predicate(UseAVX == 0); 5774 match(Set dst (AddVI dst src)); 5775 format %{ "paddd $dst,$src\t! add packedI" %} 5776 ins_encode %{ 5777 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5778 %} 5779 ins_pipe( pipe_slow ); 5780 %} 5781 5782 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5783 predicate(UseAVX > 0); 5784 match(Set dst (AddVI src1 src2)); 5785 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5786 ins_encode %{ 5787 int vlen_enc = vector_length_encoding(this); 5788 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5789 %} 5790 ins_pipe( pipe_slow ); 5791 %} 5792 5793 5794 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5795 predicate((UseAVX > 0) && 5796 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5797 match(Set dst (AddVI src (LoadVector mem))); 5798 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5799 ins_encode %{ 5800 int vlen_enc = vector_length_encoding(this); 5801 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5802 %} 5803 ins_pipe( pipe_slow ); 5804 %} 5805 5806 // Longs vector add 5807 instruct vaddL(vec dst, vec src) %{ 5808 predicate(UseAVX == 0); 5809 match(Set dst (AddVL dst src)); 5810 format %{ "paddq $dst,$src\t! add packedL" %} 5811 ins_encode %{ 5812 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5813 %} 5814 ins_pipe( pipe_slow ); 5815 %} 5816 5817 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5818 predicate(UseAVX > 0); 5819 match(Set dst (AddVL src1 src2)); 5820 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5821 ins_encode %{ 5822 int vlen_enc = vector_length_encoding(this); 5823 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5824 %} 5825 ins_pipe( pipe_slow ); 5826 %} 5827 5828 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5829 predicate((UseAVX > 0) && 5830 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5831 match(Set dst (AddVL src (LoadVector mem))); 5832 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5833 ins_encode %{ 5834 int vlen_enc = vector_length_encoding(this); 5835 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5836 %} 5837 ins_pipe( pipe_slow ); 5838 %} 5839 5840 // Floats vector add 5841 instruct vaddF(vec dst, vec src) %{ 5842 predicate(UseAVX == 0); 5843 match(Set dst (AddVF dst src)); 5844 format %{ "addps $dst,$src\t! add packedF" %} 5845 ins_encode %{ 5846 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5847 %} 5848 ins_pipe( pipe_slow ); 5849 %} 5850 5851 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5852 predicate(UseAVX > 0); 5853 match(Set dst (AddVF src1 src2)); 5854 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5855 ins_encode %{ 5856 int vlen_enc = vector_length_encoding(this); 5857 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5858 %} 5859 ins_pipe( pipe_slow ); 5860 %} 5861 5862 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5863 predicate((UseAVX > 0) && 5864 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5865 match(Set dst (AddVF src (LoadVector mem))); 5866 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5867 ins_encode %{ 5868 int vlen_enc = vector_length_encoding(this); 5869 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5870 %} 5871 ins_pipe( pipe_slow ); 5872 %} 5873 5874 // Doubles vector add 5875 instruct vaddD(vec dst, vec src) %{ 5876 predicate(UseAVX == 0); 5877 match(Set dst (AddVD dst src)); 5878 format %{ "addpd $dst,$src\t! add packedD" %} 5879 ins_encode %{ 5880 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5881 %} 5882 ins_pipe( pipe_slow ); 5883 %} 5884 5885 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5886 predicate(UseAVX > 0); 5887 match(Set dst (AddVD src1 src2)); 5888 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5889 ins_encode %{ 5890 int vlen_enc = vector_length_encoding(this); 5891 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5892 %} 5893 ins_pipe( pipe_slow ); 5894 %} 5895 5896 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5897 predicate((UseAVX > 0) && 5898 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5899 match(Set dst (AddVD src (LoadVector mem))); 5900 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5901 ins_encode %{ 5902 int vlen_enc = vector_length_encoding(this); 5903 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5904 %} 5905 ins_pipe( pipe_slow ); 5906 %} 5907 5908 // --------------------------------- SUB -------------------------------------- 5909 5910 // Bytes vector sub 5911 instruct vsubB(vec dst, vec src) %{ 5912 predicate(UseAVX == 0); 5913 match(Set dst (SubVB dst src)); 5914 format %{ "psubb $dst,$src\t! sub packedB" %} 5915 ins_encode %{ 5916 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5917 %} 5918 ins_pipe( pipe_slow ); 5919 %} 5920 5921 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5922 predicate(UseAVX > 0); 5923 match(Set dst (SubVB src1 src2)); 5924 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5925 ins_encode %{ 5926 int vlen_enc = vector_length_encoding(this); 5927 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5928 %} 5929 ins_pipe( pipe_slow ); 5930 %} 5931 5932 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5933 predicate((UseAVX > 0) && 5934 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5935 match(Set dst (SubVB src (LoadVector mem))); 5936 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5937 ins_encode %{ 5938 int vlen_enc = vector_length_encoding(this); 5939 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5940 %} 5941 ins_pipe( pipe_slow ); 5942 %} 5943 5944 // Shorts/Chars vector sub 5945 instruct vsubS(vec dst, vec src) %{ 5946 predicate(UseAVX == 0); 5947 match(Set dst (SubVS dst src)); 5948 format %{ "psubw $dst,$src\t! sub packedS" %} 5949 ins_encode %{ 5950 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5951 %} 5952 ins_pipe( pipe_slow ); 5953 %} 5954 5955 5956 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5957 predicate(UseAVX > 0); 5958 match(Set dst (SubVS src1 src2)); 5959 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5960 ins_encode %{ 5961 int vlen_enc = vector_length_encoding(this); 5962 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5963 %} 5964 ins_pipe( pipe_slow ); 5965 %} 5966 5967 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5968 predicate((UseAVX > 0) && 5969 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5970 match(Set dst (SubVS src (LoadVector mem))); 5971 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5972 ins_encode %{ 5973 int vlen_enc = vector_length_encoding(this); 5974 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5975 %} 5976 ins_pipe( pipe_slow ); 5977 %} 5978 5979 // Integers vector sub 5980 instruct vsubI(vec dst, vec src) %{ 5981 predicate(UseAVX == 0); 5982 match(Set dst (SubVI dst src)); 5983 format %{ "psubd $dst,$src\t! sub packedI" %} 5984 ins_encode %{ 5985 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5986 %} 5987 ins_pipe( pipe_slow ); 5988 %} 5989 5990 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5991 predicate(UseAVX > 0); 5992 match(Set dst (SubVI src1 src2)); 5993 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5994 ins_encode %{ 5995 int vlen_enc = vector_length_encoding(this); 5996 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5997 %} 5998 ins_pipe( pipe_slow ); 5999 %} 6000 6001 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 6002 predicate((UseAVX > 0) && 6003 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6004 match(Set dst (SubVI src (LoadVector mem))); 6005 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 6006 ins_encode %{ 6007 int vlen_enc = vector_length_encoding(this); 6008 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6009 %} 6010 ins_pipe( pipe_slow ); 6011 %} 6012 6013 // Longs vector sub 6014 instruct vsubL(vec dst, vec src) %{ 6015 predicate(UseAVX == 0); 6016 match(Set dst (SubVL dst src)); 6017 format %{ "psubq $dst,$src\t! sub packedL" %} 6018 ins_encode %{ 6019 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 6020 %} 6021 ins_pipe( pipe_slow ); 6022 %} 6023 6024 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 6025 predicate(UseAVX > 0); 6026 match(Set dst (SubVL src1 src2)); 6027 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 6028 ins_encode %{ 6029 int vlen_enc = vector_length_encoding(this); 6030 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6031 %} 6032 ins_pipe( pipe_slow ); 6033 %} 6034 6035 6036 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 6037 predicate((UseAVX > 0) && 6038 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6039 match(Set dst (SubVL src (LoadVector mem))); 6040 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 6041 ins_encode %{ 6042 int vlen_enc = vector_length_encoding(this); 6043 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6044 %} 6045 ins_pipe( pipe_slow ); 6046 %} 6047 6048 // Floats vector sub 6049 instruct vsubF(vec dst, vec src) %{ 6050 predicate(UseAVX == 0); 6051 match(Set dst (SubVF dst src)); 6052 format %{ "subps $dst,$src\t! sub packedF" %} 6053 ins_encode %{ 6054 __ subps($dst$$XMMRegister, $src$$XMMRegister); 6055 %} 6056 ins_pipe( pipe_slow ); 6057 %} 6058 6059 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 6060 predicate(UseAVX > 0); 6061 match(Set dst (SubVF src1 src2)); 6062 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 6063 ins_encode %{ 6064 int vlen_enc = vector_length_encoding(this); 6065 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6066 %} 6067 ins_pipe( pipe_slow ); 6068 %} 6069 6070 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 6071 predicate((UseAVX > 0) && 6072 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6073 match(Set dst (SubVF src (LoadVector mem))); 6074 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 6075 ins_encode %{ 6076 int vlen_enc = vector_length_encoding(this); 6077 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6078 %} 6079 ins_pipe( pipe_slow ); 6080 %} 6081 6082 // Doubles vector sub 6083 instruct vsubD(vec dst, vec src) %{ 6084 predicate(UseAVX == 0); 6085 match(Set dst (SubVD dst src)); 6086 format %{ "subpd $dst,$src\t! sub packedD" %} 6087 ins_encode %{ 6088 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 6089 %} 6090 ins_pipe( pipe_slow ); 6091 %} 6092 6093 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 6094 predicate(UseAVX > 0); 6095 match(Set dst (SubVD src1 src2)); 6096 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 6097 ins_encode %{ 6098 int vlen_enc = vector_length_encoding(this); 6099 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6100 %} 6101 ins_pipe( pipe_slow ); 6102 %} 6103 6104 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 6105 predicate((UseAVX > 0) && 6106 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6107 match(Set dst (SubVD src (LoadVector mem))); 6108 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 6109 ins_encode %{ 6110 int vlen_enc = vector_length_encoding(this); 6111 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6112 %} 6113 ins_pipe( pipe_slow ); 6114 %} 6115 6116 // --------------------------------- MUL -------------------------------------- 6117 6118 // Byte vector mul 6119 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 6120 predicate(Matcher::vector_length_in_bytes(n) <= 8); 6121 match(Set dst (MulVB src1 src2)); 6122 effect(TEMP dst, TEMP xtmp); 6123 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6124 ins_encode %{ 6125 assert(UseSSE > 3, "required"); 6126 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 6127 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 6128 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6129 __ psllw($dst$$XMMRegister, 8); 6130 __ psrlw($dst$$XMMRegister, 8); 6131 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6132 %} 6133 ins_pipe( pipe_slow ); 6134 %} 6135 6136 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 6137 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 6138 match(Set dst (MulVB src1 src2)); 6139 effect(TEMP dst, TEMP xtmp); 6140 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6141 ins_encode %{ 6142 assert(UseSSE > 3, "required"); 6143 // Odd-index elements 6144 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 6145 __ psrlw($dst$$XMMRegister, 8); 6146 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 6147 __ psrlw($xtmp$$XMMRegister, 8); 6148 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6149 __ psllw($dst$$XMMRegister, 8); 6150 // Even-index elements 6151 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6152 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 6153 __ psllw($xtmp$$XMMRegister, 8); 6154 __ psrlw($xtmp$$XMMRegister, 8); 6155 // Combine 6156 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 6157 %} 6158 ins_pipe( pipe_slow ); 6159 %} 6160 6161 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6162 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 6163 match(Set dst (MulVB src1 src2)); 6164 effect(TEMP xtmp1, TEMP xtmp2); 6165 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6166 ins_encode %{ 6167 int vlen_enc = vector_length_encoding(this); 6168 // Odd-index elements 6169 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 6170 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 6171 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6172 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 6173 // Even-index elements 6174 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6175 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6176 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6177 // Combine 6178 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6179 %} 6180 ins_pipe( pipe_slow ); 6181 %} 6182 6183 // Shorts/Chars vector mul 6184 instruct vmulS(vec dst, vec src) %{ 6185 predicate(UseAVX == 0); 6186 match(Set dst (MulVS dst src)); 6187 format %{ "pmullw $dst,$src\t! mul packedS" %} 6188 ins_encode %{ 6189 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6190 %} 6191 ins_pipe( pipe_slow ); 6192 %} 6193 6194 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 6195 predicate(UseAVX > 0); 6196 match(Set dst (MulVS src1 src2)); 6197 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 6198 ins_encode %{ 6199 int vlen_enc = vector_length_encoding(this); 6200 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6201 %} 6202 ins_pipe( pipe_slow ); 6203 %} 6204 6205 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 6206 predicate((UseAVX > 0) && 6207 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6208 match(Set dst (MulVS src (LoadVector mem))); 6209 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 6210 ins_encode %{ 6211 int vlen_enc = vector_length_encoding(this); 6212 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6213 %} 6214 ins_pipe( pipe_slow ); 6215 %} 6216 6217 // Integers vector mul 6218 instruct vmulI(vec dst, vec src) %{ 6219 predicate(UseAVX == 0); 6220 match(Set dst (MulVI dst src)); 6221 format %{ "pmulld $dst,$src\t! mul packedI" %} 6222 ins_encode %{ 6223 assert(UseSSE > 3, "required"); 6224 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6225 %} 6226 ins_pipe( pipe_slow ); 6227 %} 6228 6229 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 6230 predicate(UseAVX > 0); 6231 match(Set dst (MulVI src1 src2)); 6232 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 6233 ins_encode %{ 6234 int vlen_enc = vector_length_encoding(this); 6235 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6236 %} 6237 ins_pipe( pipe_slow ); 6238 %} 6239 6240 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 6241 predicate((UseAVX > 0) && 6242 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6243 match(Set dst (MulVI src (LoadVector mem))); 6244 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 6245 ins_encode %{ 6246 int vlen_enc = vector_length_encoding(this); 6247 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6248 %} 6249 ins_pipe( pipe_slow ); 6250 %} 6251 6252 // Longs vector mul 6253 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 6254 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6255 VM_Version::supports_avx512dq()) || 6256 VM_Version::supports_avx512vldq()); 6257 match(Set dst (MulVL src1 src2)); 6258 ins_cost(500); 6259 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 6260 ins_encode %{ 6261 assert(UseAVX > 2, "required"); 6262 int vlen_enc = vector_length_encoding(this); 6263 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6264 %} 6265 ins_pipe( pipe_slow ); 6266 %} 6267 6268 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 6269 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6270 VM_Version::supports_avx512dq()) || 6271 (Matcher::vector_length_in_bytes(n) > 8 && 6272 VM_Version::supports_avx512vldq())); 6273 match(Set dst (MulVL src (LoadVector mem))); 6274 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 6275 ins_cost(500); 6276 ins_encode %{ 6277 assert(UseAVX > 2, "required"); 6278 int vlen_enc = vector_length_encoding(this); 6279 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6280 %} 6281 ins_pipe( pipe_slow ); 6282 %} 6283 6284 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 6285 predicate(UseAVX == 0); 6286 match(Set dst (MulVL src1 src2)); 6287 ins_cost(500); 6288 effect(TEMP dst, TEMP xtmp); 6289 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6290 ins_encode %{ 6291 assert(VM_Version::supports_sse4_1(), "required"); 6292 // Get the lo-hi products, only the lower 32 bits is in concerns 6293 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 6294 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 6295 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 6296 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 6297 __ psllq($dst$$XMMRegister, 32); 6298 // Get the lo-lo products 6299 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6300 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 6301 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 6302 %} 6303 ins_pipe( pipe_slow ); 6304 %} 6305 6306 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6307 predicate(UseAVX > 0 && 6308 ((Matcher::vector_length_in_bytes(n) == 64 && 6309 !VM_Version::supports_avx512dq()) || 6310 (Matcher::vector_length_in_bytes(n) < 64 && 6311 !VM_Version::supports_avx512vldq()))); 6312 match(Set dst (MulVL src1 src2)); 6313 effect(TEMP xtmp1, TEMP xtmp2); 6314 ins_cost(500); 6315 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6316 ins_encode %{ 6317 int vlen_enc = vector_length_encoding(this); 6318 // Get the lo-hi products, only the lower 32 bits is in concerns 6319 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 6320 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6321 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 6322 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6323 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 6324 // Get the lo-lo products 6325 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6326 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6327 %} 6328 ins_pipe( pipe_slow ); 6329 %} 6330 6331 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{ 6332 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs()); 6333 match(Set dst (MulVL src1 src2)); 6334 ins_cost(100); 6335 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %} 6336 ins_encode %{ 6337 int vlen_enc = vector_length_encoding(this); 6338 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6339 %} 6340 ins_pipe( pipe_slow ); 6341 %} 6342 6343 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{ 6344 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs()); 6345 match(Set dst (MulVL src1 src2)); 6346 ins_cost(100); 6347 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %} 6348 ins_encode %{ 6349 int vlen_enc = vector_length_encoding(this); 6350 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6351 %} 6352 ins_pipe( pipe_slow ); 6353 %} 6354 6355 // Floats vector mul 6356 instruct vmulF(vec dst, vec src) %{ 6357 predicate(UseAVX == 0); 6358 match(Set dst (MulVF dst src)); 6359 format %{ "mulps $dst,$src\t! mul packedF" %} 6360 ins_encode %{ 6361 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6362 %} 6363 ins_pipe( pipe_slow ); 6364 %} 6365 6366 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 6367 predicate(UseAVX > 0); 6368 match(Set dst (MulVF src1 src2)); 6369 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 6370 ins_encode %{ 6371 int vlen_enc = vector_length_encoding(this); 6372 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6373 %} 6374 ins_pipe( pipe_slow ); 6375 %} 6376 6377 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 6378 predicate((UseAVX > 0) && 6379 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6380 match(Set dst (MulVF src (LoadVector mem))); 6381 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 6382 ins_encode %{ 6383 int vlen_enc = vector_length_encoding(this); 6384 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6385 %} 6386 ins_pipe( pipe_slow ); 6387 %} 6388 6389 // Doubles vector mul 6390 instruct vmulD(vec dst, vec src) %{ 6391 predicate(UseAVX == 0); 6392 match(Set dst (MulVD dst src)); 6393 format %{ "mulpd $dst,$src\t! mul packedD" %} 6394 ins_encode %{ 6395 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6396 %} 6397 ins_pipe( pipe_slow ); 6398 %} 6399 6400 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 6401 predicate(UseAVX > 0); 6402 match(Set dst (MulVD src1 src2)); 6403 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 6404 ins_encode %{ 6405 int vlen_enc = vector_length_encoding(this); 6406 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6407 %} 6408 ins_pipe( pipe_slow ); 6409 %} 6410 6411 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 6412 predicate((UseAVX > 0) && 6413 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6414 match(Set dst (MulVD src (LoadVector mem))); 6415 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 6416 ins_encode %{ 6417 int vlen_enc = vector_length_encoding(this); 6418 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6419 %} 6420 ins_pipe( pipe_slow ); 6421 %} 6422 6423 // --------------------------------- DIV -------------------------------------- 6424 6425 // Floats vector div 6426 instruct vdivF(vec dst, vec src) %{ 6427 predicate(UseAVX == 0); 6428 match(Set dst (DivVF dst src)); 6429 format %{ "divps $dst,$src\t! div packedF" %} 6430 ins_encode %{ 6431 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6432 %} 6433 ins_pipe( pipe_slow ); 6434 %} 6435 6436 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 6437 predicate(UseAVX > 0); 6438 match(Set dst (DivVF src1 src2)); 6439 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 6440 ins_encode %{ 6441 int vlen_enc = vector_length_encoding(this); 6442 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6443 %} 6444 ins_pipe( pipe_slow ); 6445 %} 6446 6447 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6448 predicate((UseAVX > 0) && 6449 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6450 match(Set dst (DivVF src (LoadVector mem))); 6451 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6452 ins_encode %{ 6453 int vlen_enc = vector_length_encoding(this); 6454 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6455 %} 6456 ins_pipe( pipe_slow ); 6457 %} 6458 6459 // Doubles vector div 6460 instruct vdivD(vec dst, vec src) %{ 6461 predicate(UseAVX == 0); 6462 match(Set dst (DivVD dst src)); 6463 format %{ "divpd $dst,$src\t! div packedD" %} 6464 ins_encode %{ 6465 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6466 %} 6467 ins_pipe( pipe_slow ); 6468 %} 6469 6470 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6471 predicate(UseAVX > 0); 6472 match(Set dst (DivVD src1 src2)); 6473 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6474 ins_encode %{ 6475 int vlen_enc = vector_length_encoding(this); 6476 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6477 %} 6478 ins_pipe( pipe_slow ); 6479 %} 6480 6481 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6482 predicate((UseAVX > 0) && 6483 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6484 match(Set dst (DivVD src (LoadVector mem))); 6485 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6486 ins_encode %{ 6487 int vlen_enc = vector_length_encoding(this); 6488 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6489 %} 6490 ins_pipe( pipe_slow ); 6491 %} 6492 6493 // ------------------------------ MinMax --------------------------------------- 6494 6495 // Byte, Short, Int vector Min/Max 6496 instruct minmax_reg_sse(vec dst, vec src) %{ 6497 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6498 UseAVX == 0); 6499 match(Set dst (MinV dst src)); 6500 match(Set dst (MaxV dst src)); 6501 format %{ "vector_minmax $dst,$src\t! " %} 6502 ins_encode %{ 6503 assert(UseSSE >= 4, "required"); 6504 6505 int opcode = this->ideal_Opcode(); 6506 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6507 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6508 %} 6509 ins_pipe( pipe_slow ); 6510 %} 6511 6512 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6513 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6514 UseAVX > 0); 6515 match(Set dst (MinV src1 src2)); 6516 match(Set dst (MaxV src1 src2)); 6517 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6518 ins_encode %{ 6519 int opcode = this->ideal_Opcode(); 6520 int vlen_enc = vector_length_encoding(this); 6521 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6522 6523 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6524 %} 6525 ins_pipe( pipe_slow ); 6526 %} 6527 6528 // Long vector Min/Max 6529 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6530 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6531 UseAVX == 0); 6532 match(Set dst (MinV dst src)); 6533 match(Set dst (MaxV src dst)); 6534 effect(TEMP dst, TEMP tmp); 6535 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6536 ins_encode %{ 6537 assert(UseSSE >= 4, "required"); 6538 6539 int opcode = this->ideal_Opcode(); 6540 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6541 assert(elem_bt == T_LONG, "sanity"); 6542 6543 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6544 %} 6545 ins_pipe( pipe_slow ); 6546 %} 6547 6548 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6549 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6550 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6551 match(Set dst (MinV src1 src2)); 6552 match(Set dst (MaxV src1 src2)); 6553 effect(TEMP dst); 6554 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6555 ins_encode %{ 6556 int vlen_enc = vector_length_encoding(this); 6557 int opcode = this->ideal_Opcode(); 6558 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6559 assert(elem_bt == T_LONG, "sanity"); 6560 6561 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6562 %} 6563 ins_pipe( pipe_slow ); 6564 %} 6565 6566 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6567 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6568 Matcher::vector_element_basic_type(n) == T_LONG); 6569 match(Set dst (MinV src1 src2)); 6570 match(Set dst (MaxV src1 src2)); 6571 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6572 ins_encode %{ 6573 assert(UseAVX > 2, "required"); 6574 6575 int vlen_enc = vector_length_encoding(this); 6576 int opcode = this->ideal_Opcode(); 6577 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6578 assert(elem_bt == T_LONG, "sanity"); 6579 6580 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6581 %} 6582 ins_pipe( pipe_slow ); 6583 %} 6584 6585 // Float/Double vector Min/Max 6586 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6587 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6588 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6589 UseAVX > 0); 6590 match(Set dst (MinV a b)); 6591 match(Set dst (MaxV a b)); 6592 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6593 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6594 ins_encode %{ 6595 assert(UseAVX > 0, "required"); 6596 6597 int opcode = this->ideal_Opcode(); 6598 int vlen_enc = vector_length_encoding(this); 6599 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6600 6601 __ vminmax_fp(opcode, elem_bt, 6602 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6603 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6604 %} 6605 ins_pipe( pipe_slow ); 6606 %} 6607 6608 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6609 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6610 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6611 match(Set dst (MinV a b)); 6612 match(Set dst (MaxV a b)); 6613 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6614 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6615 ins_encode %{ 6616 assert(UseAVX > 2, "required"); 6617 6618 int opcode = this->ideal_Opcode(); 6619 int vlen_enc = vector_length_encoding(this); 6620 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6621 6622 __ evminmax_fp(opcode, elem_bt, 6623 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6624 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6625 %} 6626 ins_pipe( pipe_slow ); 6627 %} 6628 6629 // ------------------------------ Unsigned vector Min/Max ---------------------- 6630 6631 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{ 6632 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6633 match(Set dst (UMinV a b)); 6634 match(Set dst (UMaxV a b)); 6635 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6636 ins_encode %{ 6637 int opcode = this->ideal_Opcode(); 6638 int vlen_enc = vector_length_encoding(this); 6639 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6640 assert(is_integral_type(elem_bt), ""); 6641 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); 6642 %} 6643 ins_pipe( pipe_slow ); 6644 %} 6645 6646 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{ 6647 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6648 match(Set dst (UMinV a (LoadVector b))); 6649 match(Set dst (UMaxV a (LoadVector b))); 6650 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6651 ins_encode %{ 6652 int opcode = this->ideal_Opcode(); 6653 int vlen_enc = vector_length_encoding(this); 6654 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6655 assert(is_integral_type(elem_bt), ""); 6656 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc); 6657 %} 6658 ins_pipe( pipe_slow ); 6659 %} 6660 6661 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{ 6662 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG); 6663 match(Set dst (UMinV a b)); 6664 match(Set dst (UMaxV a b)); 6665 effect(TEMP xtmp1, TEMP xtmp2); 6666 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %} 6667 ins_encode %{ 6668 int opcode = this->ideal_Opcode(); 6669 int vlen_enc = vector_length_encoding(this); 6670 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6671 %} 6672 ins_pipe( pipe_slow ); 6673 %} 6674 6675 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{ 6676 match(Set dst (UMinV (Binary dst src2) mask)); 6677 match(Set dst (UMaxV (Binary dst src2) mask)); 6678 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6679 ins_encode %{ 6680 int vlen_enc = vector_length_encoding(this); 6681 BasicType bt = Matcher::vector_element_basic_type(this); 6682 int opc = this->ideal_Opcode(); 6683 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6684 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 6685 %} 6686 ins_pipe( pipe_slow ); 6687 %} 6688 6689 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{ 6690 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask)); 6691 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask)); 6692 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6693 ins_encode %{ 6694 int vlen_enc = vector_length_encoding(this); 6695 BasicType bt = Matcher::vector_element_basic_type(this); 6696 int opc = this->ideal_Opcode(); 6697 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6698 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 6699 %} 6700 ins_pipe( pipe_slow ); 6701 %} 6702 6703 // --------------------------------- Signum/CopySign --------------------------- 6704 6705 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6706 match(Set dst (SignumF dst (Binary zero one))); 6707 effect(KILL cr); 6708 format %{ "signumF $dst, $dst" %} 6709 ins_encode %{ 6710 int opcode = this->ideal_Opcode(); 6711 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6712 %} 6713 ins_pipe( pipe_slow ); 6714 %} 6715 6716 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6717 match(Set dst (SignumD dst (Binary zero one))); 6718 effect(KILL cr); 6719 format %{ "signumD $dst, $dst" %} 6720 ins_encode %{ 6721 int opcode = this->ideal_Opcode(); 6722 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6723 %} 6724 ins_pipe( pipe_slow ); 6725 %} 6726 6727 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6728 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6729 match(Set dst (SignumVF src (Binary zero one))); 6730 match(Set dst (SignumVD src (Binary zero one))); 6731 effect(TEMP dst, TEMP xtmp1); 6732 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6733 ins_encode %{ 6734 int opcode = this->ideal_Opcode(); 6735 int vec_enc = vector_length_encoding(this); 6736 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6737 $xtmp1$$XMMRegister, vec_enc); 6738 %} 6739 ins_pipe( pipe_slow ); 6740 %} 6741 6742 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6743 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6744 match(Set dst (SignumVF src (Binary zero one))); 6745 match(Set dst (SignumVD src (Binary zero one))); 6746 effect(TEMP dst, TEMP ktmp1); 6747 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6748 ins_encode %{ 6749 int opcode = this->ideal_Opcode(); 6750 int vec_enc = vector_length_encoding(this); 6751 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6752 $ktmp1$$KRegister, vec_enc); 6753 %} 6754 ins_pipe( pipe_slow ); 6755 %} 6756 6757 // --------------------------------------- 6758 // For copySign use 0xE4 as writemask for vpternlog 6759 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6760 // C (xmm2) is set to 0x7FFFFFFF 6761 // Wherever xmm2 is 0, we want to pick from B (sign) 6762 // Wherever xmm2 is 1, we want to pick from A (src) 6763 // 6764 // A B C Result 6765 // 0 0 0 0 6766 // 0 0 1 0 6767 // 0 1 0 1 6768 // 0 1 1 0 6769 // 1 0 0 0 6770 // 1 0 1 1 6771 // 1 1 0 1 6772 // 1 1 1 1 6773 // 6774 // Result going from high bit to low bit is 0x11100100 = 0xe4 6775 // --------------------------------------- 6776 6777 #ifdef _LP64 6778 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6779 match(Set dst (CopySignF dst src)); 6780 effect(TEMP tmp1, TEMP tmp2); 6781 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6782 ins_encode %{ 6783 __ movl($tmp2$$Register, 0x7FFFFFFF); 6784 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6785 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6786 %} 6787 ins_pipe( pipe_slow ); 6788 %} 6789 6790 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6791 match(Set dst (CopySignD dst (Binary src zero))); 6792 ins_cost(100); 6793 effect(TEMP tmp1, TEMP tmp2); 6794 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6795 ins_encode %{ 6796 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6797 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6798 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6799 %} 6800 ins_pipe( pipe_slow ); 6801 %} 6802 6803 #endif // _LP64 6804 6805 //----------------------------- CompressBits/ExpandBits ------------------------ 6806 6807 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6808 predicate(n->bottom_type()->isa_int()); 6809 match(Set dst (CompressBits src mask)); 6810 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6811 ins_encode %{ 6812 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6813 %} 6814 ins_pipe( pipe_slow ); 6815 %} 6816 6817 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6818 predicate(n->bottom_type()->isa_int()); 6819 match(Set dst (ExpandBits src mask)); 6820 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6821 ins_encode %{ 6822 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6823 %} 6824 ins_pipe( pipe_slow ); 6825 %} 6826 6827 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6828 predicate(n->bottom_type()->isa_int()); 6829 match(Set dst (CompressBits src (LoadI mask))); 6830 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6831 ins_encode %{ 6832 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6833 %} 6834 ins_pipe( pipe_slow ); 6835 %} 6836 6837 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6838 predicate(n->bottom_type()->isa_int()); 6839 match(Set dst (ExpandBits src (LoadI mask))); 6840 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6841 ins_encode %{ 6842 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6843 %} 6844 ins_pipe( pipe_slow ); 6845 %} 6846 6847 // --------------------------------- Sqrt -------------------------------------- 6848 6849 instruct vsqrtF_reg(vec dst, vec src) %{ 6850 match(Set dst (SqrtVF src)); 6851 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6852 ins_encode %{ 6853 assert(UseAVX > 0, "required"); 6854 int vlen_enc = vector_length_encoding(this); 6855 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6856 %} 6857 ins_pipe( pipe_slow ); 6858 %} 6859 6860 instruct vsqrtF_mem(vec dst, memory mem) %{ 6861 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6862 match(Set dst (SqrtVF (LoadVector mem))); 6863 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6864 ins_encode %{ 6865 assert(UseAVX > 0, "required"); 6866 int vlen_enc = vector_length_encoding(this); 6867 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6868 %} 6869 ins_pipe( pipe_slow ); 6870 %} 6871 6872 // Floating point vector sqrt 6873 instruct vsqrtD_reg(vec dst, vec src) %{ 6874 match(Set dst (SqrtVD src)); 6875 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6876 ins_encode %{ 6877 assert(UseAVX > 0, "required"); 6878 int vlen_enc = vector_length_encoding(this); 6879 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6880 %} 6881 ins_pipe( pipe_slow ); 6882 %} 6883 6884 instruct vsqrtD_mem(vec dst, memory mem) %{ 6885 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6886 match(Set dst (SqrtVD (LoadVector mem))); 6887 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6888 ins_encode %{ 6889 assert(UseAVX > 0, "required"); 6890 int vlen_enc = vector_length_encoding(this); 6891 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6892 %} 6893 ins_pipe( pipe_slow ); 6894 %} 6895 6896 // ------------------------------ Shift --------------------------------------- 6897 6898 // Left and right shift count vectors are the same on x86 6899 // (only lowest bits of xmm reg are used for count). 6900 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6901 match(Set dst (LShiftCntV cnt)); 6902 match(Set dst (RShiftCntV cnt)); 6903 format %{ "movdl $dst,$cnt\t! load shift count" %} 6904 ins_encode %{ 6905 __ movdl($dst$$XMMRegister, $cnt$$Register); 6906 %} 6907 ins_pipe( pipe_slow ); 6908 %} 6909 6910 // Byte vector shift 6911 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6912 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6913 match(Set dst ( LShiftVB src shift)); 6914 match(Set dst ( RShiftVB src shift)); 6915 match(Set dst (URShiftVB src shift)); 6916 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6917 format %{"vector_byte_shift $dst,$src,$shift" %} 6918 ins_encode %{ 6919 assert(UseSSE > 3, "required"); 6920 int opcode = this->ideal_Opcode(); 6921 bool sign = (opcode != Op_URShiftVB); 6922 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6923 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6924 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6925 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6926 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6927 %} 6928 ins_pipe( pipe_slow ); 6929 %} 6930 6931 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6932 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6933 UseAVX <= 1); 6934 match(Set dst ( LShiftVB src shift)); 6935 match(Set dst ( RShiftVB src shift)); 6936 match(Set dst (URShiftVB src shift)); 6937 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6938 format %{"vector_byte_shift $dst,$src,$shift" %} 6939 ins_encode %{ 6940 assert(UseSSE > 3, "required"); 6941 int opcode = this->ideal_Opcode(); 6942 bool sign = (opcode != Op_URShiftVB); 6943 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6944 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6945 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6946 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6947 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6948 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6949 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6950 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6951 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6952 %} 6953 ins_pipe( pipe_slow ); 6954 %} 6955 6956 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6957 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6958 UseAVX > 1); 6959 match(Set dst ( LShiftVB src shift)); 6960 match(Set dst ( RShiftVB src shift)); 6961 match(Set dst (URShiftVB src shift)); 6962 effect(TEMP dst, TEMP tmp); 6963 format %{"vector_byte_shift $dst,$src,$shift" %} 6964 ins_encode %{ 6965 int opcode = this->ideal_Opcode(); 6966 bool sign = (opcode != Op_URShiftVB); 6967 int vlen_enc = Assembler::AVX_256bit; 6968 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6969 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6970 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6971 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6972 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6973 %} 6974 ins_pipe( pipe_slow ); 6975 %} 6976 6977 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6978 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6979 match(Set dst ( LShiftVB src shift)); 6980 match(Set dst ( RShiftVB src shift)); 6981 match(Set dst (URShiftVB src shift)); 6982 effect(TEMP dst, TEMP tmp); 6983 format %{"vector_byte_shift $dst,$src,$shift" %} 6984 ins_encode %{ 6985 assert(UseAVX > 1, "required"); 6986 int opcode = this->ideal_Opcode(); 6987 bool sign = (opcode != Op_URShiftVB); 6988 int vlen_enc = Assembler::AVX_256bit; 6989 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6990 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6991 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6992 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6993 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6994 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6995 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6996 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6997 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6998 %} 6999 ins_pipe( pipe_slow ); 7000 %} 7001 7002 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 7003 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 7004 match(Set dst ( LShiftVB src shift)); 7005 match(Set dst (RShiftVB src shift)); 7006 match(Set dst (URShiftVB src shift)); 7007 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 7008 format %{"vector_byte_shift $dst,$src,$shift" %} 7009 ins_encode %{ 7010 assert(UseAVX > 2, "required"); 7011 int opcode = this->ideal_Opcode(); 7012 bool sign = (opcode != Op_URShiftVB); 7013 int vlen_enc = Assembler::AVX_512bit; 7014 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 7015 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 7016 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7017 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7018 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7019 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 7020 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7021 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7022 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7023 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 7024 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 7025 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7026 %} 7027 ins_pipe( pipe_slow ); 7028 %} 7029 7030 // Shorts vector logical right shift produces incorrect Java result 7031 // for negative data because java code convert short value into int with 7032 // sign extension before a shift. But char vectors are fine since chars are 7033 // unsigned values. 7034 // Shorts/Chars vector left shift 7035 instruct vshiftS(vec dst, vec src, vec shift) %{ 7036 predicate(!n->as_ShiftV()->is_var_shift()); 7037 match(Set dst ( LShiftVS src shift)); 7038 match(Set dst ( RShiftVS src shift)); 7039 match(Set dst (URShiftVS src shift)); 7040 effect(TEMP dst, USE src, USE shift); 7041 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 7042 ins_encode %{ 7043 int opcode = this->ideal_Opcode(); 7044 if (UseAVX > 0) { 7045 int vlen_enc = vector_length_encoding(this); 7046 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7047 } else { 7048 int vlen = Matcher::vector_length(this); 7049 if (vlen == 2) { 7050 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7051 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7052 } else if (vlen == 4) { 7053 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7054 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7055 } else { 7056 assert (vlen == 8, "sanity"); 7057 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7058 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7059 } 7060 } 7061 %} 7062 ins_pipe( pipe_slow ); 7063 %} 7064 7065 // Integers vector left shift 7066 instruct vshiftI(vec dst, vec src, vec shift) %{ 7067 predicate(!n->as_ShiftV()->is_var_shift()); 7068 match(Set dst ( LShiftVI src shift)); 7069 match(Set dst ( RShiftVI src shift)); 7070 match(Set dst (URShiftVI src shift)); 7071 effect(TEMP dst, USE src, USE shift); 7072 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 7073 ins_encode %{ 7074 int opcode = this->ideal_Opcode(); 7075 if (UseAVX > 0) { 7076 int vlen_enc = vector_length_encoding(this); 7077 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7078 } else { 7079 int vlen = Matcher::vector_length(this); 7080 if (vlen == 2) { 7081 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7082 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7083 } else { 7084 assert(vlen == 4, "sanity"); 7085 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7086 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7087 } 7088 } 7089 %} 7090 ins_pipe( pipe_slow ); 7091 %} 7092 7093 // Integers vector left constant shift 7094 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 7095 match(Set dst (LShiftVI src (LShiftCntV shift))); 7096 match(Set dst (RShiftVI src (RShiftCntV shift))); 7097 match(Set dst (URShiftVI src (RShiftCntV shift))); 7098 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 7099 ins_encode %{ 7100 int opcode = this->ideal_Opcode(); 7101 if (UseAVX > 0) { 7102 int vector_len = vector_length_encoding(this); 7103 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 7104 } else { 7105 int vlen = Matcher::vector_length(this); 7106 if (vlen == 2) { 7107 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7108 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7109 } else { 7110 assert(vlen == 4, "sanity"); 7111 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7112 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7113 } 7114 } 7115 %} 7116 ins_pipe( pipe_slow ); 7117 %} 7118 7119 // Longs vector shift 7120 instruct vshiftL(vec dst, vec src, vec shift) %{ 7121 predicate(!n->as_ShiftV()->is_var_shift()); 7122 match(Set dst ( LShiftVL src shift)); 7123 match(Set dst (URShiftVL src shift)); 7124 effect(TEMP dst, USE src, USE shift); 7125 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 7126 ins_encode %{ 7127 int opcode = this->ideal_Opcode(); 7128 if (UseAVX > 0) { 7129 int vlen_enc = vector_length_encoding(this); 7130 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7131 } else { 7132 assert(Matcher::vector_length(this) == 2, ""); 7133 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7134 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7135 } 7136 %} 7137 ins_pipe( pipe_slow ); 7138 %} 7139 7140 // Longs vector constant shift 7141 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 7142 match(Set dst (LShiftVL src (LShiftCntV shift))); 7143 match(Set dst (URShiftVL src (RShiftCntV shift))); 7144 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 7145 ins_encode %{ 7146 int opcode = this->ideal_Opcode(); 7147 if (UseAVX > 0) { 7148 int vector_len = vector_length_encoding(this); 7149 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 7150 } else { 7151 assert(Matcher::vector_length(this) == 2, ""); 7152 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7153 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7154 } 7155 %} 7156 ins_pipe( pipe_slow ); 7157 %} 7158 7159 // -------------------ArithmeticRightShift ----------------------------------- 7160 // Long vector arithmetic right shift 7161 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 7162 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 7163 match(Set dst (RShiftVL src shift)); 7164 effect(TEMP dst, TEMP tmp); 7165 format %{ "vshiftq $dst,$src,$shift" %} 7166 ins_encode %{ 7167 uint vlen = Matcher::vector_length(this); 7168 if (vlen == 2) { 7169 assert(UseSSE >= 2, "required"); 7170 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7171 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 7172 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 7173 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 7174 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 7175 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 7176 } else { 7177 assert(vlen == 4, "sanity"); 7178 assert(UseAVX > 1, "required"); 7179 int vlen_enc = Assembler::AVX_256bit; 7180 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7181 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 7182 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7183 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7184 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7185 } 7186 %} 7187 ins_pipe( pipe_slow ); 7188 %} 7189 7190 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 7191 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 7192 match(Set dst (RShiftVL src shift)); 7193 format %{ "vshiftq $dst,$src,$shift" %} 7194 ins_encode %{ 7195 int vlen_enc = vector_length_encoding(this); 7196 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7197 %} 7198 ins_pipe( pipe_slow ); 7199 %} 7200 7201 // ------------------- Variable Shift ----------------------------- 7202 // Byte variable shift 7203 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7204 predicate(Matcher::vector_length(n) <= 8 && 7205 n->as_ShiftV()->is_var_shift() && 7206 !VM_Version::supports_avx512bw()); 7207 match(Set dst ( LShiftVB src shift)); 7208 match(Set dst ( RShiftVB src shift)); 7209 match(Set dst (URShiftVB src shift)); 7210 effect(TEMP dst, TEMP vtmp); 7211 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7212 ins_encode %{ 7213 assert(UseAVX >= 2, "required"); 7214 7215 int opcode = this->ideal_Opcode(); 7216 int vlen_enc = Assembler::AVX_128bit; 7217 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7218 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7219 %} 7220 ins_pipe( pipe_slow ); 7221 %} 7222 7223 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7224 predicate(Matcher::vector_length(n) == 16 && 7225 n->as_ShiftV()->is_var_shift() && 7226 !VM_Version::supports_avx512bw()); 7227 match(Set dst ( LShiftVB src shift)); 7228 match(Set dst ( RShiftVB src shift)); 7229 match(Set dst (URShiftVB src shift)); 7230 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7231 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7232 ins_encode %{ 7233 assert(UseAVX >= 2, "required"); 7234 7235 int opcode = this->ideal_Opcode(); 7236 int vlen_enc = Assembler::AVX_128bit; 7237 // Shift lower half and get word result in dst 7238 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7239 7240 // Shift upper half and get word result in vtmp1 7241 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7242 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7243 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7244 7245 // Merge and down convert the two word results to byte in dst 7246 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7247 %} 7248 ins_pipe( pipe_slow ); 7249 %} 7250 7251 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 7252 predicate(Matcher::vector_length(n) == 32 && 7253 n->as_ShiftV()->is_var_shift() && 7254 !VM_Version::supports_avx512bw()); 7255 match(Set dst ( LShiftVB src shift)); 7256 match(Set dst ( RShiftVB src shift)); 7257 match(Set dst (URShiftVB src shift)); 7258 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 7259 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 7260 ins_encode %{ 7261 assert(UseAVX >= 2, "required"); 7262 7263 int opcode = this->ideal_Opcode(); 7264 int vlen_enc = Assembler::AVX_128bit; 7265 // Process lower 128 bits and get result in dst 7266 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7267 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7268 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7269 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7270 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7271 7272 // Process higher 128 bits and get result in vtmp3 7273 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7274 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7275 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 7276 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 7277 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 7278 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7279 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 7280 7281 // Merge the two results in dst 7282 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7283 %} 7284 ins_pipe( pipe_slow ); 7285 %} 7286 7287 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 7288 predicate(Matcher::vector_length(n) <= 32 && 7289 n->as_ShiftV()->is_var_shift() && 7290 VM_Version::supports_avx512bw()); 7291 match(Set dst ( LShiftVB src shift)); 7292 match(Set dst ( RShiftVB src shift)); 7293 match(Set dst (URShiftVB src shift)); 7294 effect(TEMP dst, TEMP vtmp); 7295 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7296 ins_encode %{ 7297 assert(UseAVX > 2, "required"); 7298 7299 int opcode = this->ideal_Opcode(); 7300 int vlen_enc = vector_length_encoding(this); 7301 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7302 %} 7303 ins_pipe( pipe_slow ); 7304 %} 7305 7306 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7307 predicate(Matcher::vector_length(n) == 64 && 7308 n->as_ShiftV()->is_var_shift() && 7309 VM_Version::supports_avx512bw()); 7310 match(Set dst ( LShiftVB src shift)); 7311 match(Set dst ( RShiftVB src shift)); 7312 match(Set dst (URShiftVB src shift)); 7313 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7314 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7315 ins_encode %{ 7316 assert(UseAVX > 2, "required"); 7317 7318 int opcode = this->ideal_Opcode(); 7319 int vlen_enc = Assembler::AVX_256bit; 7320 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7321 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7322 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7323 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7324 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7325 %} 7326 ins_pipe( pipe_slow ); 7327 %} 7328 7329 // Short variable shift 7330 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7331 predicate(Matcher::vector_length(n) <= 8 && 7332 n->as_ShiftV()->is_var_shift() && 7333 !VM_Version::supports_avx512bw()); 7334 match(Set dst ( LShiftVS src shift)); 7335 match(Set dst ( RShiftVS src shift)); 7336 match(Set dst (URShiftVS src shift)); 7337 effect(TEMP dst, TEMP vtmp); 7338 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7339 ins_encode %{ 7340 assert(UseAVX >= 2, "required"); 7341 7342 int opcode = this->ideal_Opcode(); 7343 bool sign = (opcode != Op_URShiftVS); 7344 int vlen_enc = Assembler::AVX_256bit; 7345 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 7346 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 7347 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7348 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7349 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 7350 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7351 %} 7352 ins_pipe( pipe_slow ); 7353 %} 7354 7355 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7356 predicate(Matcher::vector_length(n) == 16 && 7357 n->as_ShiftV()->is_var_shift() && 7358 !VM_Version::supports_avx512bw()); 7359 match(Set dst ( LShiftVS src shift)); 7360 match(Set dst ( RShiftVS src shift)); 7361 match(Set dst (URShiftVS src shift)); 7362 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7363 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7364 ins_encode %{ 7365 assert(UseAVX >= 2, "required"); 7366 7367 int opcode = this->ideal_Opcode(); 7368 bool sign = (opcode != Op_URShiftVS); 7369 int vlen_enc = Assembler::AVX_256bit; 7370 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 7371 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7372 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7373 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7374 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7375 7376 // Shift upper half, with result in dst using vtmp1 as TEMP 7377 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 7378 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 7379 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7380 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7381 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7382 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7383 7384 // Merge lower and upper half result into dst 7385 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7386 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 7387 %} 7388 ins_pipe( pipe_slow ); 7389 %} 7390 7391 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 7392 predicate(n->as_ShiftV()->is_var_shift() && 7393 VM_Version::supports_avx512bw()); 7394 match(Set dst ( LShiftVS src shift)); 7395 match(Set dst ( RShiftVS src shift)); 7396 match(Set dst (URShiftVS src shift)); 7397 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 7398 ins_encode %{ 7399 assert(UseAVX > 2, "required"); 7400 7401 int opcode = this->ideal_Opcode(); 7402 int vlen_enc = vector_length_encoding(this); 7403 if (!VM_Version::supports_avx512vl()) { 7404 vlen_enc = Assembler::AVX_512bit; 7405 } 7406 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7407 %} 7408 ins_pipe( pipe_slow ); 7409 %} 7410 7411 //Integer variable shift 7412 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 7413 predicate(n->as_ShiftV()->is_var_shift()); 7414 match(Set dst ( LShiftVI src shift)); 7415 match(Set dst ( RShiftVI src shift)); 7416 match(Set dst (URShiftVI src shift)); 7417 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 7418 ins_encode %{ 7419 assert(UseAVX >= 2, "required"); 7420 7421 int opcode = this->ideal_Opcode(); 7422 int vlen_enc = vector_length_encoding(this); 7423 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7424 %} 7425 ins_pipe( pipe_slow ); 7426 %} 7427 7428 //Long variable shift 7429 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 7430 predicate(n->as_ShiftV()->is_var_shift()); 7431 match(Set dst ( LShiftVL src shift)); 7432 match(Set dst (URShiftVL src shift)); 7433 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 7434 ins_encode %{ 7435 assert(UseAVX >= 2, "required"); 7436 7437 int opcode = this->ideal_Opcode(); 7438 int vlen_enc = vector_length_encoding(this); 7439 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7440 %} 7441 ins_pipe( pipe_slow ); 7442 %} 7443 7444 //Long variable right shift arithmetic 7445 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 7446 predicate(Matcher::vector_length(n) <= 4 && 7447 n->as_ShiftV()->is_var_shift() && 7448 UseAVX == 2); 7449 match(Set dst (RShiftVL src shift)); 7450 effect(TEMP dst, TEMP vtmp); 7451 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 7452 ins_encode %{ 7453 int opcode = this->ideal_Opcode(); 7454 int vlen_enc = vector_length_encoding(this); 7455 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 7456 $vtmp$$XMMRegister); 7457 %} 7458 ins_pipe( pipe_slow ); 7459 %} 7460 7461 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 7462 predicate(n->as_ShiftV()->is_var_shift() && 7463 UseAVX > 2); 7464 match(Set dst (RShiftVL src shift)); 7465 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 7466 ins_encode %{ 7467 int opcode = this->ideal_Opcode(); 7468 int vlen_enc = vector_length_encoding(this); 7469 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7470 %} 7471 ins_pipe( pipe_slow ); 7472 %} 7473 7474 // --------------------------------- AND -------------------------------------- 7475 7476 instruct vand(vec dst, vec src) %{ 7477 predicate(UseAVX == 0); 7478 match(Set dst (AndV dst src)); 7479 format %{ "pand $dst,$src\t! and vectors" %} 7480 ins_encode %{ 7481 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7482 %} 7483 ins_pipe( pipe_slow ); 7484 %} 7485 7486 instruct vand_reg(vec dst, vec src1, vec src2) %{ 7487 predicate(UseAVX > 0); 7488 match(Set dst (AndV src1 src2)); 7489 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 7490 ins_encode %{ 7491 int vlen_enc = vector_length_encoding(this); 7492 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7493 %} 7494 ins_pipe( pipe_slow ); 7495 %} 7496 7497 instruct vand_mem(vec dst, vec src, memory mem) %{ 7498 predicate((UseAVX > 0) && 7499 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7500 match(Set dst (AndV src (LoadVector mem))); 7501 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 7502 ins_encode %{ 7503 int vlen_enc = vector_length_encoding(this); 7504 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7505 %} 7506 ins_pipe( pipe_slow ); 7507 %} 7508 7509 // --------------------------------- OR --------------------------------------- 7510 7511 instruct vor(vec dst, vec src) %{ 7512 predicate(UseAVX == 0); 7513 match(Set dst (OrV dst src)); 7514 format %{ "por $dst,$src\t! or vectors" %} 7515 ins_encode %{ 7516 __ por($dst$$XMMRegister, $src$$XMMRegister); 7517 %} 7518 ins_pipe( pipe_slow ); 7519 %} 7520 7521 instruct vor_reg(vec dst, vec src1, vec src2) %{ 7522 predicate(UseAVX > 0); 7523 match(Set dst (OrV src1 src2)); 7524 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7525 ins_encode %{ 7526 int vlen_enc = vector_length_encoding(this); 7527 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7528 %} 7529 ins_pipe( pipe_slow ); 7530 %} 7531 7532 instruct vor_mem(vec dst, vec src, memory mem) %{ 7533 predicate((UseAVX > 0) && 7534 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7535 match(Set dst (OrV src (LoadVector mem))); 7536 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7537 ins_encode %{ 7538 int vlen_enc = vector_length_encoding(this); 7539 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7540 %} 7541 ins_pipe( pipe_slow ); 7542 %} 7543 7544 // --------------------------------- XOR -------------------------------------- 7545 7546 instruct vxor(vec dst, vec src) %{ 7547 predicate(UseAVX == 0); 7548 match(Set dst (XorV dst src)); 7549 format %{ "pxor $dst,$src\t! xor vectors" %} 7550 ins_encode %{ 7551 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7552 %} 7553 ins_pipe( pipe_slow ); 7554 %} 7555 7556 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7557 predicate(UseAVX > 0); 7558 match(Set dst (XorV src1 src2)); 7559 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7560 ins_encode %{ 7561 int vlen_enc = vector_length_encoding(this); 7562 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7563 %} 7564 ins_pipe( pipe_slow ); 7565 %} 7566 7567 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7568 predicate((UseAVX > 0) && 7569 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7570 match(Set dst (XorV src (LoadVector mem))); 7571 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7572 ins_encode %{ 7573 int vlen_enc = vector_length_encoding(this); 7574 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7575 %} 7576 ins_pipe( pipe_slow ); 7577 %} 7578 7579 // --------------------------------- VectorCast -------------------------------------- 7580 7581 instruct vcastBtoX(vec dst, vec src) %{ 7582 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE); 7583 match(Set dst (VectorCastB2X src)); 7584 format %{ "vector_cast_b2x $dst,$src\t!" %} 7585 ins_encode %{ 7586 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7587 int vlen_enc = vector_length_encoding(this); 7588 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7589 %} 7590 ins_pipe( pipe_slow ); 7591 %} 7592 7593 instruct vcastBtoD(legVec dst, legVec src) %{ 7594 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE); 7595 match(Set dst (VectorCastB2X src)); 7596 format %{ "vector_cast_b2x $dst,$src\t!" %} 7597 ins_encode %{ 7598 int vlen_enc = vector_length_encoding(this); 7599 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7600 %} 7601 ins_pipe( pipe_slow ); 7602 %} 7603 7604 instruct castStoX(vec dst, vec src) %{ 7605 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7606 Matcher::vector_length(n->in(1)) <= 8 && // src 7607 Matcher::vector_element_basic_type(n) == T_BYTE); 7608 match(Set dst (VectorCastS2X src)); 7609 format %{ "vector_cast_s2x $dst,$src" %} 7610 ins_encode %{ 7611 assert(UseAVX > 0, "required"); 7612 7613 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7614 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7615 %} 7616 ins_pipe( pipe_slow ); 7617 %} 7618 7619 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7620 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7621 Matcher::vector_length(n->in(1)) == 16 && // src 7622 Matcher::vector_element_basic_type(n) == T_BYTE); 7623 effect(TEMP dst, TEMP vtmp); 7624 match(Set dst (VectorCastS2X src)); 7625 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7626 ins_encode %{ 7627 assert(UseAVX > 0, "required"); 7628 7629 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7630 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7631 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7632 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7633 %} 7634 ins_pipe( pipe_slow ); 7635 %} 7636 7637 instruct vcastStoX_evex(vec dst, vec src) %{ 7638 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7639 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7640 match(Set dst (VectorCastS2X src)); 7641 format %{ "vector_cast_s2x $dst,$src\t!" %} 7642 ins_encode %{ 7643 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7644 int src_vlen_enc = vector_length_encoding(this, $src); 7645 int vlen_enc = vector_length_encoding(this); 7646 switch (to_elem_bt) { 7647 case T_BYTE: 7648 if (!VM_Version::supports_avx512vl()) { 7649 vlen_enc = Assembler::AVX_512bit; 7650 } 7651 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7652 break; 7653 case T_INT: 7654 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7655 break; 7656 case T_FLOAT: 7657 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7658 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7659 break; 7660 case T_LONG: 7661 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7662 break; 7663 case T_DOUBLE: { 7664 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7665 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7666 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7667 break; 7668 } 7669 default: 7670 ShouldNotReachHere(); 7671 } 7672 %} 7673 ins_pipe( pipe_slow ); 7674 %} 7675 7676 instruct castItoX(vec dst, vec src) %{ 7677 predicate(UseAVX <= 2 && 7678 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7679 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7680 match(Set dst (VectorCastI2X src)); 7681 format %{ "vector_cast_i2x $dst,$src" %} 7682 ins_encode %{ 7683 assert(UseAVX > 0, "required"); 7684 7685 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7686 int vlen_enc = vector_length_encoding(this, $src); 7687 7688 if (to_elem_bt == T_BYTE) { 7689 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7690 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7691 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7692 } else { 7693 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7694 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7695 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7696 } 7697 %} 7698 ins_pipe( pipe_slow ); 7699 %} 7700 7701 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7702 predicate(UseAVX <= 2 && 7703 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7704 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7705 match(Set dst (VectorCastI2X src)); 7706 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7707 effect(TEMP dst, TEMP vtmp); 7708 ins_encode %{ 7709 assert(UseAVX > 0, "required"); 7710 7711 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7712 int vlen_enc = vector_length_encoding(this, $src); 7713 7714 if (to_elem_bt == T_BYTE) { 7715 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7716 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7717 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7718 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7719 } else { 7720 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7721 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7722 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7723 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7724 } 7725 %} 7726 ins_pipe( pipe_slow ); 7727 %} 7728 7729 instruct vcastItoX_evex(vec dst, vec src) %{ 7730 predicate(UseAVX > 2 || 7731 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7732 match(Set dst (VectorCastI2X src)); 7733 format %{ "vector_cast_i2x $dst,$src\t!" %} 7734 ins_encode %{ 7735 assert(UseAVX > 0, "required"); 7736 7737 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7738 int src_vlen_enc = vector_length_encoding(this, $src); 7739 int dst_vlen_enc = vector_length_encoding(this); 7740 switch (dst_elem_bt) { 7741 case T_BYTE: 7742 if (!VM_Version::supports_avx512vl()) { 7743 src_vlen_enc = Assembler::AVX_512bit; 7744 } 7745 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7746 break; 7747 case T_SHORT: 7748 if (!VM_Version::supports_avx512vl()) { 7749 src_vlen_enc = Assembler::AVX_512bit; 7750 } 7751 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7752 break; 7753 case T_FLOAT: 7754 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7755 break; 7756 case T_LONG: 7757 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7758 break; 7759 case T_DOUBLE: 7760 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7761 break; 7762 default: 7763 ShouldNotReachHere(); 7764 } 7765 %} 7766 ins_pipe( pipe_slow ); 7767 %} 7768 7769 instruct vcastLtoBS(vec dst, vec src) %{ 7770 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7771 UseAVX <= 2); 7772 match(Set dst (VectorCastL2X src)); 7773 format %{ "vector_cast_l2x $dst,$src" %} 7774 ins_encode %{ 7775 assert(UseAVX > 0, "required"); 7776 7777 int vlen = Matcher::vector_length_in_bytes(this, $src); 7778 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7779 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7780 : ExternalAddress(vector_int_to_short_mask()); 7781 if (vlen <= 16) { 7782 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7783 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7784 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7785 } else { 7786 assert(vlen <= 32, "required"); 7787 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7788 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7789 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7790 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7791 } 7792 if (to_elem_bt == T_BYTE) { 7793 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7794 } 7795 %} 7796 ins_pipe( pipe_slow ); 7797 %} 7798 7799 instruct vcastLtoX_evex(vec dst, vec src) %{ 7800 predicate(UseAVX > 2 || 7801 (Matcher::vector_element_basic_type(n) == T_INT || 7802 Matcher::vector_element_basic_type(n) == T_FLOAT || 7803 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7804 match(Set dst (VectorCastL2X src)); 7805 format %{ "vector_cast_l2x $dst,$src\t!" %} 7806 ins_encode %{ 7807 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7808 int vlen = Matcher::vector_length_in_bytes(this, $src); 7809 int vlen_enc = vector_length_encoding(this, $src); 7810 switch (to_elem_bt) { 7811 case T_BYTE: 7812 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7813 vlen_enc = Assembler::AVX_512bit; 7814 } 7815 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7816 break; 7817 case T_SHORT: 7818 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7819 vlen_enc = Assembler::AVX_512bit; 7820 } 7821 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7822 break; 7823 case T_INT: 7824 if (vlen == 8) { 7825 if ($dst$$XMMRegister != $src$$XMMRegister) { 7826 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7827 } 7828 } else if (vlen == 16) { 7829 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7830 } else if (vlen == 32) { 7831 if (UseAVX > 2) { 7832 if (!VM_Version::supports_avx512vl()) { 7833 vlen_enc = Assembler::AVX_512bit; 7834 } 7835 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7836 } else { 7837 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7838 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7839 } 7840 } else { // vlen == 64 7841 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7842 } 7843 break; 7844 case T_FLOAT: 7845 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7846 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7847 break; 7848 case T_DOUBLE: 7849 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7850 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7851 break; 7852 7853 default: assert(false, "%s", type2name(to_elem_bt)); 7854 } 7855 %} 7856 ins_pipe( pipe_slow ); 7857 %} 7858 7859 instruct vcastFtoD_reg(vec dst, vec src) %{ 7860 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7861 match(Set dst (VectorCastF2X src)); 7862 format %{ "vector_cast_f2d $dst,$src\t!" %} 7863 ins_encode %{ 7864 int vlen_enc = vector_length_encoding(this); 7865 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7866 %} 7867 ins_pipe( pipe_slow ); 7868 %} 7869 7870 7871 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7872 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7873 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7874 match(Set dst (VectorCastF2X src)); 7875 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7876 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7877 ins_encode %{ 7878 int vlen_enc = vector_length_encoding(this, $src); 7879 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7880 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7881 // 32 bit addresses for register indirect addressing mode since stub constants 7882 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7883 // However, targets are free to increase this limit, but having a large code cache size 7884 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7885 // cap we save a temporary register allocation which in limiting case can prevent 7886 // spilling in high register pressure blocks. 7887 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7888 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7889 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7890 %} 7891 ins_pipe( pipe_slow ); 7892 %} 7893 7894 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7895 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7896 is_integral_type(Matcher::vector_element_basic_type(n))); 7897 match(Set dst (VectorCastF2X src)); 7898 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7899 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7900 ins_encode %{ 7901 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7902 if (to_elem_bt == T_LONG) { 7903 int vlen_enc = vector_length_encoding(this); 7904 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7905 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7906 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7907 } else { 7908 int vlen_enc = vector_length_encoding(this, $src); 7909 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7910 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7911 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7912 } 7913 %} 7914 ins_pipe( pipe_slow ); 7915 %} 7916 7917 instruct vcastDtoF_reg(vec dst, vec src) %{ 7918 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7919 match(Set dst (VectorCastD2X src)); 7920 format %{ "vector_cast_d2x $dst,$src\t!" %} 7921 ins_encode %{ 7922 int vlen_enc = vector_length_encoding(this, $src); 7923 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7924 %} 7925 ins_pipe( pipe_slow ); 7926 %} 7927 7928 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7929 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7930 is_integral_type(Matcher::vector_element_basic_type(n))); 7931 match(Set dst (VectorCastD2X src)); 7932 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7933 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7934 ins_encode %{ 7935 int vlen_enc = vector_length_encoding(this, $src); 7936 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7937 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7938 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7939 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7940 %} 7941 ins_pipe( pipe_slow ); 7942 %} 7943 7944 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7945 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7946 is_integral_type(Matcher::vector_element_basic_type(n))); 7947 match(Set dst (VectorCastD2X src)); 7948 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7949 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7950 ins_encode %{ 7951 int vlen_enc = vector_length_encoding(this, $src); 7952 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7953 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7954 ExternalAddress(vector_float_signflip()); 7955 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7956 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7957 %} 7958 ins_pipe( pipe_slow ); 7959 %} 7960 7961 instruct vucast(vec dst, vec src) %{ 7962 match(Set dst (VectorUCastB2X src)); 7963 match(Set dst (VectorUCastS2X src)); 7964 match(Set dst (VectorUCastI2X src)); 7965 format %{ "vector_ucast $dst,$src\t!" %} 7966 ins_encode %{ 7967 assert(UseAVX > 0, "required"); 7968 7969 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7970 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7971 int vlen_enc = vector_length_encoding(this); 7972 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7973 %} 7974 ins_pipe( pipe_slow ); 7975 %} 7976 7977 #ifdef _LP64 7978 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7979 predicate(!VM_Version::supports_avx512vl() && 7980 Matcher::vector_length_in_bytes(n) < 64 && 7981 Matcher::vector_element_basic_type(n) == T_INT); 7982 match(Set dst (RoundVF src)); 7983 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7984 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7985 ins_encode %{ 7986 int vlen_enc = vector_length_encoding(this); 7987 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7988 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7989 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7990 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7991 %} 7992 ins_pipe( pipe_slow ); 7993 %} 7994 7995 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7996 predicate((VM_Version::supports_avx512vl() || 7997 Matcher::vector_length_in_bytes(n) == 64) && 7998 Matcher::vector_element_basic_type(n) == T_INT); 7999 match(Set dst (RoundVF src)); 8000 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 8001 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 8002 ins_encode %{ 8003 int vlen_enc = vector_length_encoding(this); 8004 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 8005 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 8006 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 8007 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 8008 %} 8009 ins_pipe( pipe_slow ); 8010 %} 8011 8012 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 8013 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 8014 match(Set dst (RoundVD src)); 8015 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 8016 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 8017 ins_encode %{ 8018 int vlen_enc = vector_length_encoding(this); 8019 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 8020 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 8021 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 8022 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 8023 %} 8024 ins_pipe( pipe_slow ); 8025 %} 8026 8027 #endif // _LP64 8028 8029 // --------------------------------- VectorMaskCmp -------------------------------------- 8030 8031 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 8032 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8033 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 8034 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8035 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 8036 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8037 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 8038 ins_encode %{ 8039 int vlen_enc = vector_length_encoding(this, $src1); 8040 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 8041 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 8042 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8043 } else { 8044 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8045 } 8046 %} 8047 ins_pipe( pipe_slow ); 8048 %} 8049 8050 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 8051 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 8052 n->bottom_type()->isa_vectmask() == nullptr && 8053 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 8054 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8055 effect(TEMP ktmp); 8056 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 8057 ins_encode %{ 8058 int vlen_enc = Assembler::AVX_512bit; 8059 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 8060 KRegister mask = k0; // The comparison itself is not being masked. 8061 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 8062 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8063 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 8064 } else { 8065 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8066 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 8067 } 8068 %} 8069 ins_pipe( pipe_slow ); 8070 %} 8071 8072 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 8073 predicate(n->bottom_type()->isa_vectmask() && 8074 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 8075 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8076 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 8077 ins_encode %{ 8078 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 8079 int vlen_enc = vector_length_encoding(this, $src1); 8080 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 8081 KRegister mask = k0; // The comparison itself is not being masked. 8082 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 8083 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8084 } else { 8085 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8086 } 8087 %} 8088 ins_pipe( pipe_slow ); 8089 %} 8090 8091 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 8092 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8093 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8094 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8095 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8096 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 8097 (n->in(2)->get_int() == BoolTest::eq || 8098 n->in(2)->get_int() == BoolTest::lt || 8099 n->in(2)->get_int() == BoolTest::gt)); // cond 8100 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8101 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 8102 ins_encode %{ 8103 int vlen_enc = vector_length_encoding(this, $src1); 8104 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8105 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8106 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 8107 %} 8108 ins_pipe( pipe_slow ); 8109 %} 8110 8111 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 8112 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8113 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8114 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8115 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8116 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 8117 (n->in(2)->get_int() == BoolTest::ne || 8118 n->in(2)->get_int() == BoolTest::le || 8119 n->in(2)->get_int() == BoolTest::ge)); // cond 8120 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8121 effect(TEMP dst, TEMP xtmp); 8122 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 8123 ins_encode %{ 8124 int vlen_enc = vector_length_encoding(this, $src1); 8125 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8126 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8127 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 8128 %} 8129 ins_pipe( pipe_slow ); 8130 %} 8131 8132 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 8133 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8134 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8135 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8136 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8137 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8138 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8139 effect(TEMP dst, TEMP xtmp); 8140 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 8141 ins_encode %{ 8142 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 8143 int vlen_enc = vector_length_encoding(this, $src1); 8144 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8145 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8146 8147 if (vlen_enc == Assembler::AVX_128bit) { 8148 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 8149 } else { 8150 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 8151 } 8152 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8153 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8154 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 8155 %} 8156 ins_pipe( pipe_slow ); 8157 %} 8158 8159 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 8160 predicate((n->bottom_type()->isa_vectmask() == nullptr && 8161 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 8162 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8163 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8164 effect(TEMP ktmp); 8165 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 8166 ins_encode %{ 8167 assert(UseAVX > 2, "required"); 8168 8169 int vlen_enc = vector_length_encoding(this, $src1); 8170 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8171 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8172 KRegister mask = k0; // The comparison itself is not being masked. 8173 bool merge = false; 8174 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8175 8176 switch (src1_elem_bt) { 8177 case T_INT: { 8178 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8179 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8180 break; 8181 } 8182 case T_LONG: { 8183 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8184 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8185 break; 8186 } 8187 default: assert(false, "%s", type2name(src1_elem_bt)); 8188 } 8189 %} 8190 ins_pipe( pipe_slow ); 8191 %} 8192 8193 8194 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 8195 predicate(n->bottom_type()->isa_vectmask() && 8196 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8197 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8198 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 8199 ins_encode %{ 8200 assert(UseAVX > 2, "required"); 8201 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 8202 8203 int vlen_enc = vector_length_encoding(this, $src1); 8204 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8205 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8206 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8207 8208 // Comparison i 8209 switch (src1_elem_bt) { 8210 case T_BYTE: { 8211 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8212 break; 8213 } 8214 case T_SHORT: { 8215 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8216 break; 8217 } 8218 case T_INT: { 8219 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8220 break; 8221 } 8222 case T_LONG: { 8223 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8224 break; 8225 } 8226 default: assert(false, "%s", type2name(src1_elem_bt)); 8227 } 8228 %} 8229 ins_pipe( pipe_slow ); 8230 %} 8231 8232 // Extract 8233 8234 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 8235 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 8236 match(Set dst (ExtractI src idx)); 8237 match(Set dst (ExtractS src idx)); 8238 #ifdef _LP64 8239 match(Set dst (ExtractB src idx)); 8240 #endif 8241 format %{ "extractI $dst,$src,$idx\t!" %} 8242 ins_encode %{ 8243 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8244 8245 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8246 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8247 %} 8248 ins_pipe( pipe_slow ); 8249 %} 8250 8251 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 8252 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 8253 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 8254 match(Set dst (ExtractI src idx)); 8255 match(Set dst (ExtractS src idx)); 8256 #ifdef _LP64 8257 match(Set dst (ExtractB src idx)); 8258 #endif 8259 effect(TEMP vtmp); 8260 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 8261 ins_encode %{ 8262 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8263 8264 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8265 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8266 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 8267 %} 8268 ins_pipe( pipe_slow ); 8269 %} 8270 8271 #ifdef _LP64 8272 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 8273 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 8274 match(Set dst (ExtractL src idx)); 8275 format %{ "extractL $dst,$src,$idx\t!" %} 8276 ins_encode %{ 8277 assert(UseSSE >= 4, "required"); 8278 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8279 8280 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8281 %} 8282 ins_pipe( pipe_slow ); 8283 %} 8284 8285 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 8286 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8287 Matcher::vector_length(n->in(1)) == 8); // src 8288 match(Set dst (ExtractL src idx)); 8289 effect(TEMP vtmp); 8290 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 8291 ins_encode %{ 8292 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8293 8294 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8295 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 8296 %} 8297 ins_pipe( pipe_slow ); 8298 %} 8299 #endif 8300 8301 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8302 predicate(Matcher::vector_length(n->in(1)) <= 4); 8303 match(Set dst (ExtractF src idx)); 8304 effect(TEMP dst, TEMP vtmp); 8305 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8306 ins_encode %{ 8307 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8308 8309 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 8310 %} 8311 ins_pipe( pipe_slow ); 8312 %} 8313 8314 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8315 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 8316 Matcher::vector_length(n->in(1)/*src*/) == 16); 8317 match(Set dst (ExtractF src idx)); 8318 effect(TEMP vtmp); 8319 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8320 ins_encode %{ 8321 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8322 8323 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8324 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 8325 %} 8326 ins_pipe( pipe_slow ); 8327 %} 8328 8329 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 8330 predicate(Matcher::vector_length(n->in(1)) == 2); // src 8331 match(Set dst (ExtractD src idx)); 8332 format %{ "extractD $dst,$src,$idx\t!" %} 8333 ins_encode %{ 8334 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8335 8336 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8337 %} 8338 ins_pipe( pipe_slow ); 8339 %} 8340 8341 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 8342 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8343 Matcher::vector_length(n->in(1)) == 8); // src 8344 match(Set dst (ExtractD src idx)); 8345 effect(TEMP vtmp); 8346 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 8347 ins_encode %{ 8348 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8349 8350 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8351 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 8352 %} 8353 ins_pipe( pipe_slow ); 8354 %} 8355 8356 // --------------------------------- Vector Blend -------------------------------------- 8357 8358 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 8359 predicate(UseAVX == 0); 8360 match(Set dst (VectorBlend (Binary dst src) mask)); 8361 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 8362 effect(TEMP tmp); 8363 ins_encode %{ 8364 assert(UseSSE >= 4, "required"); 8365 8366 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 8367 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 8368 } 8369 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 8370 %} 8371 ins_pipe( pipe_slow ); 8372 %} 8373 8374 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8375 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8376 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8377 Matcher::vector_length_in_bytes(n) <= 32 && 8378 is_integral_type(Matcher::vector_element_basic_type(n))); 8379 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8380 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8381 ins_encode %{ 8382 int vlen_enc = vector_length_encoding(this); 8383 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8384 %} 8385 ins_pipe( pipe_slow ); 8386 %} 8387 8388 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8389 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8390 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8391 Matcher::vector_length_in_bytes(n) <= 32 && 8392 !is_integral_type(Matcher::vector_element_basic_type(n))); 8393 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8394 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8395 ins_encode %{ 8396 int vlen_enc = vector_length_encoding(this); 8397 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8398 %} 8399 ins_pipe( pipe_slow ); 8400 %} 8401 8402 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ 8403 predicate(UseAVX > 0 && EnableX86ECoreOpts && 8404 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8405 Matcher::vector_length_in_bytes(n) <= 32); 8406 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8407 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} 8408 effect(TEMP vtmp, TEMP dst); 8409 ins_encode %{ 8410 int vlen_enc = vector_length_encoding(this); 8411 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8412 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8413 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8414 %} 8415 ins_pipe( pipe_slow ); 8416 %} 8417 8418 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 8419 predicate(Matcher::vector_length_in_bytes(n) == 64 && 8420 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 8421 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8422 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8423 effect(TEMP ktmp); 8424 ins_encode %{ 8425 int vlen_enc = Assembler::AVX_512bit; 8426 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8427 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 8428 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8429 %} 8430 ins_pipe( pipe_slow ); 8431 %} 8432 8433 8434 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 8435 predicate(n->in(2)->bottom_type()->isa_vectmask() && 8436 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 8437 VM_Version::supports_avx512bw())); 8438 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8439 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8440 ins_encode %{ 8441 int vlen_enc = vector_length_encoding(this); 8442 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8443 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8444 %} 8445 ins_pipe( pipe_slow ); 8446 %} 8447 8448 // --------------------------------- ABS -------------------------------------- 8449 // a = |a| 8450 instruct vabsB_reg(vec dst, vec src) %{ 8451 match(Set dst (AbsVB src)); 8452 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 8453 ins_encode %{ 8454 uint vlen = Matcher::vector_length(this); 8455 if (vlen <= 16) { 8456 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8457 } else { 8458 int vlen_enc = vector_length_encoding(this); 8459 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8460 } 8461 %} 8462 ins_pipe( pipe_slow ); 8463 %} 8464 8465 instruct vabsS_reg(vec dst, vec src) %{ 8466 match(Set dst (AbsVS src)); 8467 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 8468 ins_encode %{ 8469 uint vlen = Matcher::vector_length(this); 8470 if (vlen <= 8) { 8471 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8472 } else { 8473 int vlen_enc = vector_length_encoding(this); 8474 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8475 } 8476 %} 8477 ins_pipe( pipe_slow ); 8478 %} 8479 8480 instruct vabsI_reg(vec dst, vec src) %{ 8481 match(Set dst (AbsVI src)); 8482 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 8483 ins_encode %{ 8484 uint vlen = Matcher::vector_length(this); 8485 if (vlen <= 4) { 8486 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8487 } else { 8488 int vlen_enc = vector_length_encoding(this); 8489 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8490 } 8491 %} 8492 ins_pipe( pipe_slow ); 8493 %} 8494 8495 instruct vabsL_reg(vec dst, vec src) %{ 8496 match(Set dst (AbsVL src)); 8497 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 8498 ins_encode %{ 8499 assert(UseAVX > 2, "required"); 8500 int vlen_enc = vector_length_encoding(this); 8501 if (!VM_Version::supports_avx512vl()) { 8502 vlen_enc = Assembler::AVX_512bit; 8503 } 8504 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8505 %} 8506 ins_pipe( pipe_slow ); 8507 %} 8508 8509 // --------------------------------- ABSNEG -------------------------------------- 8510 8511 instruct vabsnegF(vec dst, vec src) %{ 8512 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 8513 match(Set dst (AbsVF src)); 8514 match(Set dst (NegVF src)); 8515 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 8516 ins_cost(150); 8517 ins_encode %{ 8518 int opcode = this->ideal_Opcode(); 8519 int vlen = Matcher::vector_length(this); 8520 if (vlen == 2) { 8521 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8522 } else { 8523 assert(vlen == 8 || vlen == 16, "required"); 8524 int vlen_enc = vector_length_encoding(this); 8525 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8526 } 8527 %} 8528 ins_pipe( pipe_slow ); 8529 %} 8530 8531 instruct vabsneg4F(vec dst) %{ 8532 predicate(Matcher::vector_length(n) == 4); 8533 match(Set dst (AbsVF dst)); 8534 match(Set dst (NegVF dst)); 8535 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 8536 ins_cost(150); 8537 ins_encode %{ 8538 int opcode = this->ideal_Opcode(); 8539 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 8540 %} 8541 ins_pipe( pipe_slow ); 8542 %} 8543 8544 instruct vabsnegD(vec dst, vec src) %{ 8545 match(Set dst (AbsVD src)); 8546 match(Set dst (NegVD src)); 8547 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 8548 ins_encode %{ 8549 int opcode = this->ideal_Opcode(); 8550 uint vlen = Matcher::vector_length(this); 8551 if (vlen == 2) { 8552 assert(UseSSE >= 2, "required"); 8553 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8554 } else { 8555 int vlen_enc = vector_length_encoding(this); 8556 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8557 } 8558 %} 8559 ins_pipe( pipe_slow ); 8560 %} 8561 8562 //------------------------------------- VectorTest -------------------------------------------- 8563 8564 #ifdef _LP64 8565 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8566 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8567 match(Set cr (VectorTest src1 src2)); 8568 effect(TEMP vtmp); 8569 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8570 ins_encode %{ 8571 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8572 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8573 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8574 %} 8575 ins_pipe( pipe_slow ); 8576 %} 8577 8578 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8579 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8580 match(Set cr (VectorTest src1 src2)); 8581 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8582 ins_encode %{ 8583 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8584 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8585 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8586 %} 8587 ins_pipe( pipe_slow ); 8588 %} 8589 8590 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8591 predicate((Matcher::vector_length(n->in(1)) < 8 || 8592 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8593 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8594 match(Set cr (VectorTest src1 src2)); 8595 effect(TEMP tmp); 8596 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8597 ins_encode %{ 8598 uint masklen = Matcher::vector_length(this, $src1); 8599 __ kmovwl($tmp$$Register, $src1$$KRegister); 8600 __ andl($tmp$$Register, (1 << masklen) - 1); 8601 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8602 %} 8603 ins_pipe( pipe_slow ); 8604 %} 8605 8606 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8607 predicate((Matcher::vector_length(n->in(1)) < 8 || 8608 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8609 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8610 match(Set cr (VectorTest src1 src2)); 8611 effect(TEMP tmp); 8612 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8613 ins_encode %{ 8614 uint masklen = Matcher::vector_length(this, $src1); 8615 __ kmovwl($tmp$$Register, $src1$$KRegister); 8616 __ andl($tmp$$Register, (1 << masklen) - 1); 8617 %} 8618 ins_pipe( pipe_slow ); 8619 %} 8620 8621 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8622 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8623 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8624 match(Set cr (VectorTest src1 src2)); 8625 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8626 ins_encode %{ 8627 uint masklen = Matcher::vector_length(this, $src1); 8628 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8629 %} 8630 ins_pipe( pipe_slow ); 8631 %} 8632 #endif 8633 8634 //------------------------------------- LoadMask -------------------------------------------- 8635 8636 instruct loadMask(legVec dst, legVec src) %{ 8637 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8638 match(Set dst (VectorLoadMask src)); 8639 effect(TEMP dst); 8640 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8641 ins_encode %{ 8642 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8643 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8644 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8645 %} 8646 ins_pipe( pipe_slow ); 8647 %} 8648 8649 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8650 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8651 match(Set dst (VectorLoadMask src)); 8652 effect(TEMP xtmp); 8653 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8654 ins_encode %{ 8655 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8656 true, Assembler::AVX_512bit); 8657 %} 8658 ins_pipe( pipe_slow ); 8659 %} 8660 8661 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8662 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8663 match(Set dst (VectorLoadMask src)); 8664 effect(TEMP xtmp); 8665 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8666 ins_encode %{ 8667 int vlen_enc = vector_length_encoding(in(1)); 8668 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8669 false, vlen_enc); 8670 %} 8671 ins_pipe( pipe_slow ); 8672 %} 8673 8674 //------------------------------------- StoreMask -------------------------------------------- 8675 8676 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8677 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8678 match(Set dst (VectorStoreMask src size)); 8679 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8680 ins_encode %{ 8681 int vlen = Matcher::vector_length(this); 8682 if (vlen <= 16 && UseAVX <= 2) { 8683 assert(UseSSE >= 3, "required"); 8684 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8685 } else { 8686 assert(UseAVX > 0, "required"); 8687 int src_vlen_enc = vector_length_encoding(this, $src); 8688 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8689 } 8690 %} 8691 ins_pipe( pipe_slow ); 8692 %} 8693 8694 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8695 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8696 match(Set dst (VectorStoreMask src size)); 8697 effect(TEMP_DEF dst, TEMP xtmp); 8698 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8699 ins_encode %{ 8700 int vlen_enc = Assembler::AVX_128bit; 8701 int vlen = Matcher::vector_length(this); 8702 if (vlen <= 8) { 8703 assert(UseSSE >= 3, "required"); 8704 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8705 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8706 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8707 } else { 8708 assert(UseAVX > 0, "required"); 8709 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8710 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8711 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8712 } 8713 %} 8714 ins_pipe( pipe_slow ); 8715 %} 8716 8717 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8718 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8719 match(Set dst (VectorStoreMask src size)); 8720 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8721 effect(TEMP_DEF dst, TEMP xtmp); 8722 ins_encode %{ 8723 int vlen_enc = Assembler::AVX_128bit; 8724 int vlen = Matcher::vector_length(this); 8725 if (vlen <= 4) { 8726 assert(UseSSE >= 3, "required"); 8727 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8728 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8729 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8730 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8731 } else { 8732 assert(UseAVX > 0, "required"); 8733 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8734 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8735 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8736 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8737 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8738 } 8739 %} 8740 ins_pipe( pipe_slow ); 8741 %} 8742 8743 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8744 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8745 match(Set dst (VectorStoreMask src size)); 8746 effect(TEMP_DEF dst, TEMP xtmp); 8747 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8748 ins_encode %{ 8749 assert(UseSSE >= 3, "required"); 8750 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8751 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8752 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8753 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8754 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8755 %} 8756 ins_pipe( pipe_slow ); 8757 %} 8758 8759 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8760 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8761 match(Set dst (VectorStoreMask src size)); 8762 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8763 effect(TEMP_DEF dst, TEMP vtmp); 8764 ins_encode %{ 8765 int vlen_enc = Assembler::AVX_128bit; 8766 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8767 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8768 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8769 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8770 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8771 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8772 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8773 %} 8774 ins_pipe( pipe_slow ); 8775 %} 8776 8777 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8778 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8779 match(Set dst (VectorStoreMask src size)); 8780 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8781 ins_encode %{ 8782 int src_vlen_enc = vector_length_encoding(this, $src); 8783 int dst_vlen_enc = vector_length_encoding(this); 8784 if (!VM_Version::supports_avx512vl()) { 8785 src_vlen_enc = Assembler::AVX_512bit; 8786 } 8787 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8788 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8789 %} 8790 ins_pipe( pipe_slow ); 8791 %} 8792 8793 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8794 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8795 match(Set dst (VectorStoreMask src size)); 8796 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8797 ins_encode %{ 8798 int src_vlen_enc = vector_length_encoding(this, $src); 8799 int dst_vlen_enc = vector_length_encoding(this); 8800 if (!VM_Version::supports_avx512vl()) { 8801 src_vlen_enc = Assembler::AVX_512bit; 8802 } 8803 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8804 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8805 %} 8806 ins_pipe( pipe_slow ); 8807 %} 8808 8809 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8810 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8811 match(Set dst (VectorStoreMask mask size)); 8812 effect(TEMP_DEF dst); 8813 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8814 ins_encode %{ 8815 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8816 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8817 false, Assembler::AVX_512bit, noreg); 8818 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8819 %} 8820 ins_pipe( pipe_slow ); 8821 %} 8822 8823 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8824 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8825 match(Set dst (VectorStoreMask mask size)); 8826 effect(TEMP_DEF dst); 8827 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8828 ins_encode %{ 8829 int dst_vlen_enc = vector_length_encoding(this); 8830 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8831 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8832 %} 8833 ins_pipe( pipe_slow ); 8834 %} 8835 8836 instruct vmaskcast_evex(kReg dst) %{ 8837 match(Set dst (VectorMaskCast dst)); 8838 ins_cost(0); 8839 format %{ "vector_mask_cast $dst" %} 8840 ins_encode %{ 8841 // empty 8842 %} 8843 ins_pipe(empty); 8844 %} 8845 8846 instruct vmaskcast(vec dst) %{ 8847 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8848 match(Set dst (VectorMaskCast dst)); 8849 ins_cost(0); 8850 format %{ "vector_mask_cast $dst" %} 8851 ins_encode %{ 8852 // empty 8853 %} 8854 ins_pipe(empty); 8855 %} 8856 8857 instruct vmaskcast_avx(vec dst, vec src) %{ 8858 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8859 match(Set dst (VectorMaskCast src)); 8860 format %{ "vector_mask_cast $dst, $src" %} 8861 ins_encode %{ 8862 int vlen = Matcher::vector_length(this); 8863 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8864 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8865 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8866 %} 8867 ins_pipe(pipe_slow); 8868 %} 8869 8870 //-------------------------------- Load Iota Indices ---------------------------------- 8871 8872 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8873 match(Set dst (VectorLoadConst src)); 8874 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8875 ins_encode %{ 8876 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8877 BasicType bt = Matcher::vector_element_basic_type(this); 8878 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8879 %} 8880 ins_pipe( pipe_slow ); 8881 %} 8882 8883 #ifdef _LP64 8884 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8885 match(Set dst (PopulateIndex src1 src2)); 8886 effect(TEMP dst, TEMP vtmp); 8887 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8888 ins_encode %{ 8889 assert($src2$$constant == 1, "required"); 8890 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8891 int vlen_enc = vector_length_encoding(this); 8892 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8893 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8894 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8895 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8896 %} 8897 ins_pipe( pipe_slow ); 8898 %} 8899 8900 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8901 match(Set dst (PopulateIndex src1 src2)); 8902 effect(TEMP dst, TEMP vtmp); 8903 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8904 ins_encode %{ 8905 assert($src2$$constant == 1, "required"); 8906 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8907 int vlen_enc = vector_length_encoding(this); 8908 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8909 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8910 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8911 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8912 %} 8913 ins_pipe( pipe_slow ); 8914 %} 8915 #endif 8916 //-------------------------------- Rearrange ---------------------------------- 8917 8918 // LoadShuffle/Rearrange for Byte 8919 instruct rearrangeB(vec dst, vec shuffle) %{ 8920 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8921 Matcher::vector_length(n) < 32); 8922 match(Set dst (VectorRearrange dst shuffle)); 8923 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8924 ins_encode %{ 8925 assert(UseSSE >= 4, "required"); 8926 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8927 %} 8928 ins_pipe( pipe_slow ); 8929 %} 8930 8931 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8932 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8933 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8934 match(Set dst (VectorRearrange src shuffle)); 8935 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8936 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8937 ins_encode %{ 8938 assert(UseAVX >= 2, "required"); 8939 // Swap src into vtmp1 8940 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8941 // Shuffle swapped src to get entries from other 128 bit lane 8942 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8943 // Shuffle original src to get entries from self 128 bit lane 8944 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8945 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8946 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8947 // Perform the blend 8948 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8949 %} 8950 ins_pipe( pipe_slow ); 8951 %} 8952 8953 8954 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8955 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8956 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8957 match(Set dst (VectorRearrange src shuffle)); 8958 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8959 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8960 ins_encode %{ 8961 int vlen_enc = vector_length_encoding(this); 8962 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8963 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8964 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8965 %} 8966 ins_pipe( pipe_slow ); 8967 %} 8968 8969 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8970 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8971 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8972 match(Set dst (VectorRearrange src shuffle)); 8973 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8974 ins_encode %{ 8975 int vlen_enc = vector_length_encoding(this); 8976 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8977 %} 8978 ins_pipe( pipe_slow ); 8979 %} 8980 8981 // LoadShuffle/Rearrange for Short 8982 8983 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8984 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8985 !VM_Version::supports_avx512bw()); 8986 match(Set dst (VectorLoadShuffle src)); 8987 effect(TEMP dst, TEMP vtmp); 8988 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8989 ins_encode %{ 8990 // Create a byte shuffle mask from short shuffle mask 8991 // only byte shuffle instruction available on these platforms 8992 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8993 if (UseAVX == 0) { 8994 assert(vlen_in_bytes <= 16, "required"); 8995 // Multiply each shuffle by two to get byte index 8996 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 8997 __ psllw($vtmp$$XMMRegister, 1); 8998 8999 // Duplicate to create 2 copies of byte index 9000 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 9001 __ psllw($dst$$XMMRegister, 8); 9002 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 9003 9004 // Add one to get alternate byte index 9005 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 9006 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 9007 } else { 9008 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 9009 int vlen_enc = vector_length_encoding(this); 9010 // Multiply each shuffle by two to get byte index 9011 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 9012 9013 // Duplicate to create 2 copies of byte index 9014 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 9015 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 9016 9017 // Add one to get alternate byte index 9018 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 9019 } 9020 %} 9021 ins_pipe( pipe_slow ); 9022 %} 9023 9024 instruct rearrangeS(vec dst, vec shuffle) %{ 9025 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 9026 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 9027 match(Set dst (VectorRearrange dst shuffle)); 9028 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 9029 ins_encode %{ 9030 assert(UseSSE >= 4, "required"); 9031 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 9032 %} 9033 ins_pipe( pipe_slow ); 9034 %} 9035 9036 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 9037 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 9038 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 9039 match(Set dst (VectorRearrange src shuffle)); 9040 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 9041 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 9042 ins_encode %{ 9043 assert(UseAVX >= 2, "required"); 9044 // Swap src into vtmp1 9045 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 9046 // Shuffle swapped src to get entries from other 128 bit lane 9047 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 9048 // Shuffle original src to get entries from self 128 bit lane 9049 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 9050 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 9051 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 9052 // Perform the blend 9053 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 9054 %} 9055 ins_pipe( pipe_slow ); 9056 %} 9057 9058 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 9059 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 9060 VM_Version::supports_avx512bw()); 9061 match(Set dst (VectorRearrange src shuffle)); 9062 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9063 ins_encode %{ 9064 int vlen_enc = vector_length_encoding(this); 9065 if (!VM_Version::supports_avx512vl()) { 9066 vlen_enc = Assembler::AVX_512bit; 9067 } 9068 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9069 %} 9070 ins_pipe( pipe_slow ); 9071 %} 9072 9073 // LoadShuffle/Rearrange for Integer and Float 9074 9075 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 9076 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9077 Matcher::vector_length(n) == 4 && UseAVX == 0); 9078 match(Set dst (VectorLoadShuffle src)); 9079 effect(TEMP dst, TEMP vtmp); 9080 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 9081 ins_encode %{ 9082 assert(UseSSE >= 4, "required"); 9083 9084 // Create a byte shuffle mask from int shuffle mask 9085 // only byte shuffle instruction available on these platforms 9086 9087 // Duplicate and multiply each shuffle by 4 9088 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 9089 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 9090 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 9091 __ psllw($vtmp$$XMMRegister, 2); 9092 9093 // Duplicate again to create 4 copies of byte index 9094 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 9095 __ psllw($dst$$XMMRegister, 8); 9096 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 9097 9098 // Add 3,2,1,0 to get alternate byte index 9099 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 9100 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 9101 %} 9102 ins_pipe( pipe_slow ); 9103 %} 9104 9105 instruct rearrangeI(vec dst, vec shuffle) %{ 9106 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9107 UseAVX == 0); 9108 match(Set dst (VectorRearrange dst shuffle)); 9109 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 9110 ins_encode %{ 9111 assert(UseSSE >= 4, "required"); 9112 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 9113 %} 9114 ins_pipe( pipe_slow ); 9115 %} 9116 9117 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 9118 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9119 UseAVX > 0); 9120 match(Set dst (VectorRearrange src shuffle)); 9121 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9122 ins_encode %{ 9123 int vlen_enc = vector_length_encoding(this); 9124 BasicType bt = Matcher::vector_element_basic_type(this); 9125 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9126 %} 9127 ins_pipe( pipe_slow ); 9128 %} 9129 9130 // LoadShuffle/Rearrange for Long and Double 9131 9132 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 9133 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9134 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 9135 match(Set dst (VectorLoadShuffle src)); 9136 effect(TEMP dst, TEMP vtmp); 9137 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 9138 ins_encode %{ 9139 assert(UseAVX >= 2, "required"); 9140 9141 int vlen_enc = vector_length_encoding(this); 9142 // Create a double word shuffle mask from long shuffle mask 9143 // only double word shuffle instruction available on these platforms 9144 9145 // Multiply each shuffle by two to get double word index 9146 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 9147 9148 // Duplicate each double word shuffle 9149 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 9150 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 9151 9152 // Add one to get alternate double word index 9153 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 9154 %} 9155 ins_pipe( pipe_slow ); 9156 %} 9157 9158 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 9159 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9160 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 9161 match(Set dst (VectorRearrange src shuffle)); 9162 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9163 ins_encode %{ 9164 assert(UseAVX >= 2, "required"); 9165 9166 int vlen_enc = vector_length_encoding(this); 9167 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9168 %} 9169 ins_pipe( pipe_slow ); 9170 %} 9171 9172 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 9173 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9174 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 9175 match(Set dst (VectorRearrange src shuffle)); 9176 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9177 ins_encode %{ 9178 assert(UseAVX > 2, "required"); 9179 9180 int vlen_enc = vector_length_encoding(this); 9181 if (vlen_enc == Assembler::AVX_128bit) { 9182 vlen_enc = Assembler::AVX_256bit; 9183 } 9184 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9185 %} 9186 ins_pipe( pipe_slow ); 9187 %} 9188 9189 // --------------------------------- FMA -------------------------------------- 9190 // a * b + c 9191 9192 instruct vfmaF_reg(vec a, vec b, vec c) %{ 9193 match(Set c (FmaVF c (Binary a b))); 9194 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9195 ins_cost(150); 9196 ins_encode %{ 9197 assert(UseFMA, "not enabled"); 9198 int vlen_enc = vector_length_encoding(this); 9199 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9200 %} 9201 ins_pipe( pipe_slow ); 9202 %} 9203 9204 instruct vfmaF_mem(vec a, memory b, vec c) %{ 9205 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9206 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9207 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9208 ins_cost(150); 9209 ins_encode %{ 9210 assert(UseFMA, "not enabled"); 9211 int vlen_enc = vector_length_encoding(this); 9212 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9213 %} 9214 ins_pipe( pipe_slow ); 9215 %} 9216 9217 instruct vfmaD_reg(vec a, vec b, vec c) %{ 9218 match(Set c (FmaVD c (Binary a b))); 9219 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9220 ins_cost(150); 9221 ins_encode %{ 9222 assert(UseFMA, "not enabled"); 9223 int vlen_enc = vector_length_encoding(this); 9224 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9225 %} 9226 ins_pipe( pipe_slow ); 9227 %} 9228 9229 instruct vfmaD_mem(vec a, memory b, vec c) %{ 9230 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9231 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9232 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9233 ins_cost(150); 9234 ins_encode %{ 9235 assert(UseFMA, "not enabled"); 9236 int vlen_enc = vector_length_encoding(this); 9237 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9238 %} 9239 ins_pipe( pipe_slow ); 9240 %} 9241 9242 // --------------------------------- Vector Multiply Add -------------------------------------- 9243 9244 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 9245 predicate(UseAVX == 0); 9246 match(Set dst (MulAddVS2VI dst src1)); 9247 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 9248 ins_encode %{ 9249 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9250 %} 9251 ins_pipe( pipe_slow ); 9252 %} 9253 9254 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 9255 predicate(UseAVX > 0); 9256 match(Set dst (MulAddVS2VI src1 src2)); 9257 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 9258 ins_encode %{ 9259 int vlen_enc = vector_length_encoding(this); 9260 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9261 %} 9262 ins_pipe( pipe_slow ); 9263 %} 9264 9265 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9266 9267 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 9268 predicate(VM_Version::supports_avx512_vnni()); 9269 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9270 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 9271 ins_encode %{ 9272 assert(UseAVX > 2, "required"); 9273 int vlen_enc = vector_length_encoding(this); 9274 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9275 %} 9276 ins_pipe( pipe_slow ); 9277 ins_cost(10); 9278 %} 9279 9280 // --------------------------------- PopCount -------------------------------------- 9281 9282 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 9283 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9284 match(Set dst (PopCountVI src)); 9285 match(Set dst (PopCountVL src)); 9286 format %{ "vector_popcount_integral $dst, $src" %} 9287 ins_encode %{ 9288 int opcode = this->ideal_Opcode(); 9289 int vlen_enc = vector_length_encoding(this, $src); 9290 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9291 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 9292 %} 9293 ins_pipe( pipe_slow ); 9294 %} 9295 9296 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9297 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9298 match(Set dst (PopCountVI src mask)); 9299 match(Set dst (PopCountVL src mask)); 9300 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 9301 ins_encode %{ 9302 int vlen_enc = vector_length_encoding(this, $src); 9303 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9304 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9305 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 9306 %} 9307 ins_pipe( pipe_slow ); 9308 %} 9309 9310 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 9311 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9312 match(Set dst (PopCountVI src)); 9313 match(Set dst (PopCountVL src)); 9314 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9315 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 9316 ins_encode %{ 9317 int opcode = this->ideal_Opcode(); 9318 int vlen_enc = vector_length_encoding(this, $src); 9319 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9320 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9321 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 9322 %} 9323 ins_pipe( pipe_slow ); 9324 %} 9325 9326 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 9327 9328 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 9329 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9330 Matcher::vector_length_in_bytes(n->in(1)))); 9331 match(Set dst (CountTrailingZerosV src)); 9332 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 9333 ins_cost(400); 9334 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 9335 ins_encode %{ 9336 int vlen_enc = vector_length_encoding(this, $src); 9337 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9338 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9339 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9340 %} 9341 ins_pipe( pipe_slow ); 9342 %} 9343 9344 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9345 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9346 VM_Version::supports_avx512cd() && 9347 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9348 match(Set dst (CountTrailingZerosV src)); 9349 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9350 ins_cost(400); 9351 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 9352 ins_encode %{ 9353 int vlen_enc = vector_length_encoding(this, $src); 9354 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9355 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9356 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9357 %} 9358 ins_pipe( pipe_slow ); 9359 %} 9360 9361 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 9362 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9363 match(Set dst (CountTrailingZerosV src)); 9364 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 9365 ins_cost(400); 9366 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 9367 ins_encode %{ 9368 int vlen_enc = vector_length_encoding(this, $src); 9369 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9370 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9371 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 9372 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 9373 %} 9374 ins_pipe( pipe_slow ); 9375 %} 9376 9377 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9378 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9379 match(Set dst (CountTrailingZerosV src)); 9380 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9381 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9382 ins_encode %{ 9383 int vlen_enc = vector_length_encoding(this, $src); 9384 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9385 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9386 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9387 %} 9388 ins_pipe( pipe_slow ); 9389 %} 9390 9391 9392 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 9393 9394 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 9395 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 9396 effect(TEMP dst); 9397 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9398 ins_encode %{ 9399 int vector_len = vector_length_encoding(this); 9400 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 9401 %} 9402 ins_pipe( pipe_slow ); 9403 %} 9404 9405 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 9406 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 9407 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 9408 effect(TEMP dst); 9409 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9410 ins_encode %{ 9411 int vector_len = vector_length_encoding(this); 9412 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 9413 %} 9414 ins_pipe( pipe_slow ); 9415 %} 9416 9417 // --------------------------------- Rotation Operations ---------------------------------- 9418 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 9419 match(Set dst (RotateLeftV src shift)); 9420 match(Set dst (RotateRightV src shift)); 9421 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 9422 ins_encode %{ 9423 int opcode = this->ideal_Opcode(); 9424 int vector_len = vector_length_encoding(this); 9425 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9426 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 9427 %} 9428 ins_pipe( pipe_slow ); 9429 %} 9430 9431 instruct vprorate(vec dst, vec src, vec shift) %{ 9432 match(Set dst (RotateLeftV src shift)); 9433 match(Set dst (RotateRightV src shift)); 9434 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 9435 ins_encode %{ 9436 int opcode = this->ideal_Opcode(); 9437 int vector_len = vector_length_encoding(this); 9438 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9439 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9440 %} 9441 ins_pipe( pipe_slow ); 9442 %} 9443 9444 // ---------------------------------- Masked Operations ------------------------------------ 9445 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 9446 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 9447 match(Set dst (LoadVectorMasked mem mask)); 9448 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9449 ins_encode %{ 9450 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9451 int vlen_enc = vector_length_encoding(this); 9452 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 9453 %} 9454 ins_pipe( pipe_slow ); 9455 %} 9456 9457 9458 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 9459 predicate(n->in(3)->bottom_type()->isa_vectmask()); 9460 match(Set dst (LoadVectorMasked mem mask)); 9461 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9462 ins_encode %{ 9463 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9464 int vector_len = vector_length_encoding(this); 9465 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 9466 %} 9467 ins_pipe( pipe_slow ); 9468 %} 9469 9470 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 9471 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9472 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9473 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9474 ins_encode %{ 9475 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9476 int vlen_enc = vector_length_encoding(src_node); 9477 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9478 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 9479 %} 9480 ins_pipe( pipe_slow ); 9481 %} 9482 9483 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 9484 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9485 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9486 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9487 ins_encode %{ 9488 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9489 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9490 int vlen_enc = vector_length_encoding(src_node); 9491 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 9492 %} 9493 ins_pipe( pipe_slow ); 9494 %} 9495 9496 #ifdef _LP64 9497 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{ 9498 match(Set addr (VerifyVectorAlignment addr mask)); 9499 effect(KILL cr); 9500 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %} 9501 ins_encode %{ 9502 Label Lskip; 9503 // check if masked bits of addr are zero 9504 __ testq($addr$$Register, $mask$$constant); 9505 __ jccb(Assembler::equal, Lskip); 9506 __ stop("verify_vector_alignment found a misaligned vector memory access"); 9507 __ bind(Lskip); 9508 %} 9509 ins_pipe(pipe_slow); 9510 %} 9511 9512 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9513 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9514 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9515 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9516 ins_encode %{ 9517 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9518 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9519 9520 Label DONE; 9521 int vlen_enc = vector_length_encoding(this, $src1); 9522 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9523 9524 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9525 __ mov64($dst$$Register, -1L); 9526 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9527 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9528 __ jccb(Assembler::carrySet, DONE); 9529 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9530 __ notq($dst$$Register); 9531 __ tzcntq($dst$$Register, $dst$$Register); 9532 __ bind(DONE); 9533 %} 9534 ins_pipe( pipe_slow ); 9535 %} 9536 9537 9538 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9539 match(Set dst (VectorMaskGen len)); 9540 effect(TEMP temp, KILL cr); 9541 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9542 ins_encode %{ 9543 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9544 %} 9545 ins_pipe( pipe_slow ); 9546 %} 9547 9548 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9549 match(Set dst (VectorMaskGen len)); 9550 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9551 effect(TEMP temp); 9552 ins_encode %{ 9553 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9554 __ kmovql($dst$$KRegister, $temp$$Register); 9555 %} 9556 ins_pipe( pipe_slow ); 9557 %} 9558 9559 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9560 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9561 match(Set dst (VectorMaskToLong mask)); 9562 effect(TEMP dst, KILL cr); 9563 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9564 ins_encode %{ 9565 int opcode = this->ideal_Opcode(); 9566 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9567 int mask_len = Matcher::vector_length(this, $mask); 9568 int mask_size = mask_len * type2aelembytes(mbt); 9569 int vlen_enc = vector_length_encoding(this, $mask); 9570 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9571 $dst$$Register, mask_len, mask_size, vlen_enc); 9572 %} 9573 ins_pipe( pipe_slow ); 9574 %} 9575 9576 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9577 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9578 match(Set dst (VectorMaskToLong mask)); 9579 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9580 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9581 ins_encode %{ 9582 int opcode = this->ideal_Opcode(); 9583 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9584 int mask_len = Matcher::vector_length(this, $mask); 9585 int vlen_enc = vector_length_encoding(this, $mask); 9586 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9587 $dst$$Register, mask_len, mbt, vlen_enc); 9588 %} 9589 ins_pipe( pipe_slow ); 9590 %} 9591 9592 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9593 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9594 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9595 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9596 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9597 ins_encode %{ 9598 int opcode = this->ideal_Opcode(); 9599 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9600 int mask_len = Matcher::vector_length(this, $mask); 9601 int vlen_enc = vector_length_encoding(this, $mask); 9602 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9603 $dst$$Register, mask_len, mbt, vlen_enc); 9604 %} 9605 ins_pipe( pipe_slow ); 9606 %} 9607 9608 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9609 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9610 match(Set dst (VectorMaskTrueCount mask)); 9611 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9612 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9613 ins_encode %{ 9614 int opcode = this->ideal_Opcode(); 9615 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9616 int mask_len = Matcher::vector_length(this, $mask); 9617 int mask_size = mask_len * type2aelembytes(mbt); 9618 int vlen_enc = vector_length_encoding(this, $mask); 9619 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9620 $tmp$$Register, mask_len, mask_size, vlen_enc); 9621 %} 9622 ins_pipe( pipe_slow ); 9623 %} 9624 9625 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9626 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9627 match(Set dst (VectorMaskTrueCount mask)); 9628 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9629 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9630 ins_encode %{ 9631 int opcode = this->ideal_Opcode(); 9632 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9633 int mask_len = Matcher::vector_length(this, $mask); 9634 int vlen_enc = vector_length_encoding(this, $mask); 9635 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9636 $tmp$$Register, mask_len, mbt, vlen_enc); 9637 %} 9638 ins_pipe( pipe_slow ); 9639 %} 9640 9641 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9642 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9643 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9644 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9645 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9646 ins_encode %{ 9647 int opcode = this->ideal_Opcode(); 9648 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9649 int mask_len = Matcher::vector_length(this, $mask); 9650 int vlen_enc = vector_length_encoding(this, $mask); 9651 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9652 $tmp$$Register, mask_len, mbt, vlen_enc); 9653 %} 9654 ins_pipe( pipe_slow ); 9655 %} 9656 9657 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9658 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9659 match(Set dst (VectorMaskFirstTrue mask)); 9660 match(Set dst (VectorMaskLastTrue mask)); 9661 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9662 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9663 ins_encode %{ 9664 int opcode = this->ideal_Opcode(); 9665 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9666 int mask_len = Matcher::vector_length(this, $mask); 9667 int mask_size = mask_len * type2aelembytes(mbt); 9668 int vlen_enc = vector_length_encoding(this, $mask); 9669 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9670 $tmp$$Register, mask_len, mask_size, vlen_enc); 9671 %} 9672 ins_pipe( pipe_slow ); 9673 %} 9674 9675 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9676 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9677 match(Set dst (VectorMaskFirstTrue mask)); 9678 match(Set dst (VectorMaskLastTrue mask)); 9679 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9680 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9681 ins_encode %{ 9682 int opcode = this->ideal_Opcode(); 9683 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9684 int mask_len = Matcher::vector_length(this, $mask); 9685 int vlen_enc = vector_length_encoding(this, $mask); 9686 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9687 $tmp$$Register, mask_len, mbt, vlen_enc); 9688 %} 9689 ins_pipe( pipe_slow ); 9690 %} 9691 9692 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9693 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9694 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9695 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9696 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9697 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9698 ins_encode %{ 9699 int opcode = this->ideal_Opcode(); 9700 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9701 int mask_len = Matcher::vector_length(this, $mask); 9702 int vlen_enc = vector_length_encoding(this, $mask); 9703 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9704 $tmp$$Register, mask_len, mbt, vlen_enc); 9705 %} 9706 ins_pipe( pipe_slow ); 9707 %} 9708 9709 // --------------------------------- Compress/Expand Operations --------------------------- 9710 #ifdef _LP64 9711 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{ 9712 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 9713 match(Set dst (CompressV src mask)); 9714 match(Set dst (ExpandV src mask)); 9715 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr); 9716 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %} 9717 ins_encode %{ 9718 int opcode = this->ideal_Opcode(); 9719 int vlen_enc = vector_length_encoding(this); 9720 BasicType bt = Matcher::vector_element_basic_type(this); 9721 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register, 9722 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc); 9723 %} 9724 ins_pipe( pipe_slow ); 9725 %} 9726 #endif 9727 9728 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9729 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 9730 match(Set dst (CompressV src mask)); 9731 match(Set dst (ExpandV src mask)); 9732 format %{ "vector_compress_expand $dst, $src, $mask" %} 9733 ins_encode %{ 9734 int opcode = this->ideal_Opcode(); 9735 int vector_len = vector_length_encoding(this); 9736 BasicType bt = Matcher::vector_element_basic_type(this); 9737 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9738 %} 9739 ins_pipe( pipe_slow ); 9740 %} 9741 9742 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9743 match(Set dst (CompressM mask)); 9744 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9745 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9746 ins_encode %{ 9747 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9748 int mask_len = Matcher::vector_length(this); 9749 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9750 %} 9751 ins_pipe( pipe_slow ); 9752 %} 9753 9754 #endif // _LP64 9755 9756 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9757 9758 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9759 predicate(!VM_Version::supports_gfni()); 9760 match(Set dst (ReverseV src)); 9761 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9762 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9763 ins_encode %{ 9764 int vec_enc = vector_length_encoding(this); 9765 BasicType bt = Matcher::vector_element_basic_type(this); 9766 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9767 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9768 %} 9769 ins_pipe( pipe_slow ); 9770 %} 9771 9772 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9773 predicate(VM_Version::supports_gfni()); 9774 match(Set dst (ReverseV src)); 9775 effect(TEMP dst, TEMP xtmp); 9776 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9777 ins_encode %{ 9778 int vec_enc = vector_length_encoding(this); 9779 BasicType bt = Matcher::vector_element_basic_type(this); 9780 InternalAddress addr = $constantaddress(jlong(0x8040201008040201)); 9781 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9782 $xtmp$$XMMRegister); 9783 %} 9784 ins_pipe( pipe_slow ); 9785 %} 9786 9787 instruct vreverse_byte_reg(vec dst, vec src) %{ 9788 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9789 match(Set dst (ReverseBytesV src)); 9790 effect(TEMP dst); 9791 format %{ "vector_reverse_byte $dst, $src" %} 9792 ins_encode %{ 9793 int vec_enc = vector_length_encoding(this); 9794 BasicType bt = Matcher::vector_element_basic_type(this); 9795 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9796 %} 9797 ins_pipe( pipe_slow ); 9798 %} 9799 9800 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9801 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9802 match(Set dst (ReverseBytesV src)); 9803 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9804 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9805 ins_encode %{ 9806 int vec_enc = vector_length_encoding(this); 9807 BasicType bt = Matcher::vector_element_basic_type(this); 9808 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9809 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9810 %} 9811 ins_pipe( pipe_slow ); 9812 %} 9813 9814 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9815 9816 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9817 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9818 Matcher::vector_length_in_bytes(n->in(1)))); 9819 match(Set dst (CountLeadingZerosV src)); 9820 format %{ "vector_count_leading_zeros $dst, $src" %} 9821 ins_encode %{ 9822 int vlen_enc = vector_length_encoding(this, $src); 9823 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9824 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9825 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9826 %} 9827 ins_pipe( pipe_slow ); 9828 %} 9829 9830 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9831 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9832 Matcher::vector_length_in_bytes(n->in(1)))); 9833 match(Set dst (CountLeadingZerosV src mask)); 9834 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9835 ins_encode %{ 9836 int vlen_enc = vector_length_encoding(this, $src); 9837 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9838 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9839 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9840 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9841 %} 9842 ins_pipe( pipe_slow ); 9843 %} 9844 9845 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9846 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9847 VM_Version::supports_avx512cd() && 9848 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9849 match(Set dst (CountLeadingZerosV src)); 9850 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9851 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9852 ins_encode %{ 9853 int vlen_enc = vector_length_encoding(this, $src); 9854 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9855 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9856 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9857 %} 9858 ins_pipe( pipe_slow ); 9859 %} 9860 9861 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9862 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9863 match(Set dst (CountLeadingZerosV src)); 9864 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9865 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9866 ins_encode %{ 9867 int vlen_enc = vector_length_encoding(this, $src); 9868 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9869 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9870 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9871 $rtmp$$Register, true, vlen_enc); 9872 %} 9873 ins_pipe( pipe_slow ); 9874 %} 9875 9876 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9877 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9878 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9879 match(Set dst (CountLeadingZerosV src)); 9880 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9881 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9882 ins_encode %{ 9883 int vlen_enc = vector_length_encoding(this, $src); 9884 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9885 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9886 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9887 %} 9888 ins_pipe( pipe_slow ); 9889 %} 9890 9891 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9892 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9893 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9894 match(Set dst (CountLeadingZerosV src)); 9895 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9896 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9897 ins_encode %{ 9898 int vlen_enc = vector_length_encoding(this, $src); 9899 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9900 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9901 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9902 %} 9903 ins_pipe( pipe_slow ); 9904 %} 9905 9906 // ---------------------------------- Vector Masked Operations ------------------------------------ 9907 9908 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9909 match(Set dst (AddVB (Binary dst src2) mask)); 9910 match(Set dst (AddVS (Binary dst src2) mask)); 9911 match(Set dst (AddVI (Binary dst src2) mask)); 9912 match(Set dst (AddVL (Binary dst src2) mask)); 9913 match(Set dst (AddVF (Binary dst src2) mask)); 9914 match(Set dst (AddVD (Binary dst src2) mask)); 9915 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9916 ins_encode %{ 9917 int vlen_enc = vector_length_encoding(this); 9918 BasicType bt = Matcher::vector_element_basic_type(this); 9919 int opc = this->ideal_Opcode(); 9920 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9921 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9922 %} 9923 ins_pipe( pipe_slow ); 9924 %} 9925 9926 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9927 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9928 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9929 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9930 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9931 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9932 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9933 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9934 ins_encode %{ 9935 int vlen_enc = vector_length_encoding(this); 9936 BasicType bt = Matcher::vector_element_basic_type(this); 9937 int opc = this->ideal_Opcode(); 9938 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9939 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9940 %} 9941 ins_pipe( pipe_slow ); 9942 %} 9943 9944 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9945 match(Set dst (XorV (Binary dst src2) mask)); 9946 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9947 ins_encode %{ 9948 int vlen_enc = vector_length_encoding(this); 9949 BasicType bt = Matcher::vector_element_basic_type(this); 9950 int opc = this->ideal_Opcode(); 9951 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9952 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9953 %} 9954 ins_pipe( pipe_slow ); 9955 %} 9956 9957 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9958 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9959 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9960 ins_encode %{ 9961 int vlen_enc = vector_length_encoding(this); 9962 BasicType bt = Matcher::vector_element_basic_type(this); 9963 int opc = this->ideal_Opcode(); 9964 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9965 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9966 %} 9967 ins_pipe( pipe_slow ); 9968 %} 9969 9970 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9971 match(Set dst (OrV (Binary dst src2) mask)); 9972 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9973 ins_encode %{ 9974 int vlen_enc = vector_length_encoding(this); 9975 BasicType bt = Matcher::vector_element_basic_type(this); 9976 int opc = this->ideal_Opcode(); 9977 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9978 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9979 %} 9980 ins_pipe( pipe_slow ); 9981 %} 9982 9983 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9984 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9985 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9986 ins_encode %{ 9987 int vlen_enc = vector_length_encoding(this); 9988 BasicType bt = Matcher::vector_element_basic_type(this); 9989 int opc = this->ideal_Opcode(); 9990 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9991 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9992 %} 9993 ins_pipe( pipe_slow ); 9994 %} 9995 9996 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9997 match(Set dst (AndV (Binary dst src2) mask)); 9998 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9999 ins_encode %{ 10000 int vlen_enc = vector_length_encoding(this); 10001 BasicType bt = Matcher::vector_element_basic_type(this); 10002 int opc = this->ideal_Opcode(); 10003 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10004 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10005 %} 10006 ins_pipe( pipe_slow ); 10007 %} 10008 10009 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 10010 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 10011 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 10012 ins_encode %{ 10013 int vlen_enc = vector_length_encoding(this); 10014 BasicType bt = Matcher::vector_element_basic_type(this); 10015 int opc = this->ideal_Opcode(); 10016 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10017 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10018 %} 10019 ins_pipe( pipe_slow ); 10020 %} 10021 10022 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 10023 match(Set dst (SubVB (Binary dst src2) mask)); 10024 match(Set dst (SubVS (Binary dst src2) mask)); 10025 match(Set dst (SubVI (Binary dst src2) mask)); 10026 match(Set dst (SubVL (Binary dst src2) mask)); 10027 match(Set dst (SubVF (Binary dst src2) mask)); 10028 match(Set dst (SubVD (Binary dst src2) mask)); 10029 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 10030 ins_encode %{ 10031 int vlen_enc = vector_length_encoding(this); 10032 BasicType bt = Matcher::vector_element_basic_type(this); 10033 int opc = this->ideal_Opcode(); 10034 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10035 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10036 %} 10037 ins_pipe( pipe_slow ); 10038 %} 10039 10040 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 10041 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 10042 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 10043 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 10044 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 10045 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 10046 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 10047 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 10048 ins_encode %{ 10049 int vlen_enc = vector_length_encoding(this); 10050 BasicType bt = Matcher::vector_element_basic_type(this); 10051 int opc = this->ideal_Opcode(); 10052 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10053 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10054 %} 10055 ins_pipe( pipe_slow ); 10056 %} 10057 10058 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 10059 match(Set dst (MulVS (Binary dst src2) mask)); 10060 match(Set dst (MulVI (Binary dst src2) mask)); 10061 match(Set dst (MulVL (Binary dst src2) mask)); 10062 match(Set dst (MulVF (Binary dst src2) mask)); 10063 match(Set dst (MulVD (Binary dst src2) mask)); 10064 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 10065 ins_encode %{ 10066 int vlen_enc = vector_length_encoding(this); 10067 BasicType bt = Matcher::vector_element_basic_type(this); 10068 int opc = this->ideal_Opcode(); 10069 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10070 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10071 %} 10072 ins_pipe( pipe_slow ); 10073 %} 10074 10075 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 10076 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 10077 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 10078 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 10079 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 10080 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 10081 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 10082 ins_encode %{ 10083 int vlen_enc = vector_length_encoding(this); 10084 BasicType bt = Matcher::vector_element_basic_type(this); 10085 int opc = this->ideal_Opcode(); 10086 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10087 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10088 %} 10089 ins_pipe( pipe_slow ); 10090 %} 10091 10092 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 10093 match(Set dst (SqrtVF dst mask)); 10094 match(Set dst (SqrtVD dst mask)); 10095 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 10096 ins_encode %{ 10097 int vlen_enc = vector_length_encoding(this); 10098 BasicType bt = Matcher::vector_element_basic_type(this); 10099 int opc = this->ideal_Opcode(); 10100 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10101 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10102 %} 10103 ins_pipe( pipe_slow ); 10104 %} 10105 10106 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 10107 match(Set dst (DivVF (Binary dst src2) mask)); 10108 match(Set dst (DivVD (Binary dst src2) mask)); 10109 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 10110 ins_encode %{ 10111 int vlen_enc = vector_length_encoding(this); 10112 BasicType bt = Matcher::vector_element_basic_type(this); 10113 int opc = this->ideal_Opcode(); 10114 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10115 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10116 %} 10117 ins_pipe( pipe_slow ); 10118 %} 10119 10120 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 10121 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 10122 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 10123 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 10124 ins_encode %{ 10125 int vlen_enc = vector_length_encoding(this); 10126 BasicType bt = Matcher::vector_element_basic_type(this); 10127 int opc = this->ideal_Opcode(); 10128 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10129 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10130 %} 10131 ins_pipe( pipe_slow ); 10132 %} 10133 10134 10135 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10136 match(Set dst (RotateLeftV (Binary dst shift) mask)); 10137 match(Set dst (RotateRightV (Binary dst shift) mask)); 10138 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 10139 ins_encode %{ 10140 int vlen_enc = vector_length_encoding(this); 10141 BasicType bt = Matcher::vector_element_basic_type(this); 10142 int opc = this->ideal_Opcode(); 10143 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10144 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10145 %} 10146 ins_pipe( pipe_slow ); 10147 %} 10148 10149 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 10150 match(Set dst (RotateLeftV (Binary dst src2) mask)); 10151 match(Set dst (RotateRightV (Binary dst src2) mask)); 10152 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 10153 ins_encode %{ 10154 int vlen_enc = vector_length_encoding(this); 10155 BasicType bt = Matcher::vector_element_basic_type(this); 10156 int opc = this->ideal_Opcode(); 10157 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10158 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10159 %} 10160 ins_pipe( pipe_slow ); 10161 %} 10162 10163 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10164 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 10165 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 10166 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 10167 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 10168 ins_encode %{ 10169 int vlen_enc = vector_length_encoding(this); 10170 BasicType bt = Matcher::vector_element_basic_type(this); 10171 int opc = this->ideal_Opcode(); 10172 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10173 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10174 %} 10175 ins_pipe( pipe_slow ); 10176 %} 10177 10178 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10179 predicate(!n->as_ShiftV()->is_var_shift()); 10180 match(Set dst (LShiftVS (Binary dst src2) mask)); 10181 match(Set dst (LShiftVI (Binary dst src2) mask)); 10182 match(Set dst (LShiftVL (Binary dst src2) mask)); 10183 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10184 ins_encode %{ 10185 int vlen_enc = vector_length_encoding(this); 10186 BasicType bt = Matcher::vector_element_basic_type(this); 10187 int opc = this->ideal_Opcode(); 10188 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10189 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10190 %} 10191 ins_pipe( pipe_slow ); 10192 %} 10193 10194 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10195 predicate(n->as_ShiftV()->is_var_shift()); 10196 match(Set dst (LShiftVS (Binary dst src2) mask)); 10197 match(Set dst (LShiftVI (Binary dst src2) mask)); 10198 match(Set dst (LShiftVL (Binary dst src2) mask)); 10199 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10200 ins_encode %{ 10201 int vlen_enc = vector_length_encoding(this); 10202 BasicType bt = Matcher::vector_element_basic_type(this); 10203 int opc = this->ideal_Opcode(); 10204 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10205 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10206 %} 10207 ins_pipe( pipe_slow ); 10208 %} 10209 10210 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10211 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 10212 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 10213 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 10214 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 10215 ins_encode %{ 10216 int vlen_enc = vector_length_encoding(this); 10217 BasicType bt = Matcher::vector_element_basic_type(this); 10218 int opc = this->ideal_Opcode(); 10219 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10220 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10221 %} 10222 ins_pipe( pipe_slow ); 10223 %} 10224 10225 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10226 predicate(!n->as_ShiftV()->is_var_shift()); 10227 match(Set dst (RShiftVS (Binary dst src2) mask)); 10228 match(Set dst (RShiftVI (Binary dst src2) mask)); 10229 match(Set dst (RShiftVL (Binary dst src2) mask)); 10230 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10231 ins_encode %{ 10232 int vlen_enc = vector_length_encoding(this); 10233 BasicType bt = Matcher::vector_element_basic_type(this); 10234 int opc = this->ideal_Opcode(); 10235 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10236 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10237 %} 10238 ins_pipe( pipe_slow ); 10239 %} 10240 10241 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10242 predicate(n->as_ShiftV()->is_var_shift()); 10243 match(Set dst (RShiftVS (Binary dst src2) mask)); 10244 match(Set dst (RShiftVI (Binary dst src2) mask)); 10245 match(Set dst (RShiftVL (Binary dst src2) mask)); 10246 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10247 ins_encode %{ 10248 int vlen_enc = vector_length_encoding(this); 10249 BasicType bt = Matcher::vector_element_basic_type(this); 10250 int opc = this->ideal_Opcode(); 10251 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10252 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10253 %} 10254 ins_pipe( pipe_slow ); 10255 %} 10256 10257 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10258 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 10259 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 10260 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 10261 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 10262 ins_encode %{ 10263 int vlen_enc = vector_length_encoding(this); 10264 BasicType bt = Matcher::vector_element_basic_type(this); 10265 int opc = this->ideal_Opcode(); 10266 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10267 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10268 %} 10269 ins_pipe( pipe_slow ); 10270 %} 10271 10272 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10273 predicate(!n->as_ShiftV()->is_var_shift()); 10274 match(Set dst (URShiftVS (Binary dst src2) mask)); 10275 match(Set dst (URShiftVI (Binary dst src2) mask)); 10276 match(Set dst (URShiftVL (Binary dst src2) mask)); 10277 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10278 ins_encode %{ 10279 int vlen_enc = vector_length_encoding(this); 10280 BasicType bt = Matcher::vector_element_basic_type(this); 10281 int opc = this->ideal_Opcode(); 10282 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10283 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10284 %} 10285 ins_pipe( pipe_slow ); 10286 %} 10287 10288 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10289 predicate(n->as_ShiftV()->is_var_shift()); 10290 match(Set dst (URShiftVS (Binary dst src2) mask)); 10291 match(Set dst (URShiftVI (Binary dst src2) mask)); 10292 match(Set dst (URShiftVL (Binary dst src2) mask)); 10293 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10294 ins_encode %{ 10295 int vlen_enc = vector_length_encoding(this); 10296 BasicType bt = Matcher::vector_element_basic_type(this); 10297 int opc = this->ideal_Opcode(); 10298 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10299 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10300 %} 10301 ins_pipe( pipe_slow ); 10302 %} 10303 10304 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 10305 match(Set dst (MaxV (Binary dst src2) mask)); 10306 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10307 ins_encode %{ 10308 int vlen_enc = vector_length_encoding(this); 10309 BasicType bt = Matcher::vector_element_basic_type(this); 10310 int opc = this->ideal_Opcode(); 10311 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10312 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10313 %} 10314 ins_pipe( pipe_slow ); 10315 %} 10316 10317 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 10318 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 10319 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10320 ins_encode %{ 10321 int vlen_enc = vector_length_encoding(this); 10322 BasicType bt = Matcher::vector_element_basic_type(this); 10323 int opc = this->ideal_Opcode(); 10324 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10325 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10326 %} 10327 ins_pipe( pipe_slow ); 10328 %} 10329 10330 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 10331 match(Set dst (MinV (Binary dst src2) mask)); 10332 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10333 ins_encode %{ 10334 int vlen_enc = vector_length_encoding(this); 10335 BasicType bt = Matcher::vector_element_basic_type(this); 10336 int opc = this->ideal_Opcode(); 10337 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10338 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10339 %} 10340 ins_pipe( pipe_slow ); 10341 %} 10342 10343 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 10344 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 10345 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10346 ins_encode %{ 10347 int vlen_enc = vector_length_encoding(this); 10348 BasicType bt = Matcher::vector_element_basic_type(this); 10349 int opc = this->ideal_Opcode(); 10350 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10351 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10352 %} 10353 ins_pipe( pipe_slow ); 10354 %} 10355 10356 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 10357 match(Set dst (VectorRearrange (Binary dst src2) mask)); 10358 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 10359 ins_encode %{ 10360 int vlen_enc = vector_length_encoding(this); 10361 BasicType bt = Matcher::vector_element_basic_type(this); 10362 int opc = this->ideal_Opcode(); 10363 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10364 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10365 %} 10366 ins_pipe( pipe_slow ); 10367 %} 10368 10369 instruct vabs_masked(vec dst, kReg mask) %{ 10370 match(Set dst (AbsVB dst mask)); 10371 match(Set dst (AbsVS dst mask)); 10372 match(Set dst (AbsVI dst mask)); 10373 match(Set dst (AbsVL dst mask)); 10374 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 10375 ins_encode %{ 10376 int vlen_enc = vector_length_encoding(this); 10377 BasicType bt = Matcher::vector_element_basic_type(this); 10378 int opc = this->ideal_Opcode(); 10379 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10380 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10381 %} 10382 ins_pipe( pipe_slow ); 10383 %} 10384 10385 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 10386 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 10387 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 10388 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10389 ins_encode %{ 10390 assert(UseFMA, "Needs FMA instructions support."); 10391 int vlen_enc = vector_length_encoding(this); 10392 BasicType bt = Matcher::vector_element_basic_type(this); 10393 int opc = this->ideal_Opcode(); 10394 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10395 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 10396 %} 10397 ins_pipe( pipe_slow ); 10398 %} 10399 10400 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 10401 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 10402 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 10403 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10404 ins_encode %{ 10405 assert(UseFMA, "Needs FMA instructions support."); 10406 int vlen_enc = vector_length_encoding(this); 10407 BasicType bt = Matcher::vector_element_basic_type(this); 10408 int opc = this->ideal_Opcode(); 10409 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10410 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 10411 %} 10412 ins_pipe( pipe_slow ); 10413 %} 10414 10415 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 10416 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 10417 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 10418 ins_encode %{ 10419 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 10420 int vlen_enc = vector_length_encoding(this, $src1); 10421 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 10422 10423 // Comparison i 10424 switch (src1_elem_bt) { 10425 case T_BYTE: { 10426 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10427 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10428 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10429 break; 10430 } 10431 case T_SHORT: { 10432 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10433 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10434 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10435 break; 10436 } 10437 case T_INT: { 10438 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10439 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10440 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10441 break; 10442 } 10443 case T_LONG: { 10444 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10445 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10446 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10447 break; 10448 } 10449 case T_FLOAT: { 10450 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10451 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10452 break; 10453 } 10454 case T_DOUBLE: { 10455 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10456 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10457 break; 10458 } 10459 default: assert(false, "%s", type2name(src1_elem_bt)); break; 10460 } 10461 %} 10462 ins_pipe( pipe_slow ); 10463 %} 10464 10465 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 10466 predicate(Matcher::vector_length(n) <= 32); 10467 match(Set dst (MaskAll src)); 10468 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 10469 ins_encode %{ 10470 int mask_len = Matcher::vector_length(this); 10471 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 10472 %} 10473 ins_pipe( pipe_slow ); 10474 %} 10475 10476 #ifdef _LP64 10477 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10478 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10479 match(Set dst (XorVMask src (MaskAll cnt))); 10480 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10481 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10482 ins_encode %{ 10483 uint masklen = Matcher::vector_length(this); 10484 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10485 %} 10486 ins_pipe( pipe_slow ); 10487 %} 10488 10489 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10490 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10491 (Matcher::vector_length(n) == 16) || 10492 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10493 match(Set dst (XorVMask src (MaskAll cnt))); 10494 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10495 ins_encode %{ 10496 uint masklen = Matcher::vector_length(this); 10497 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10498 %} 10499 ins_pipe( pipe_slow ); 10500 %} 10501 10502 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10503 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10504 match(Set dst (VectorLongToMask src)); 10505 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10506 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10507 ins_encode %{ 10508 int mask_len = Matcher::vector_length(this); 10509 int vec_enc = vector_length_encoding(mask_len); 10510 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10511 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10512 %} 10513 ins_pipe( pipe_slow ); 10514 %} 10515 10516 10517 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10518 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10519 match(Set dst (VectorLongToMask src)); 10520 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10521 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10522 ins_encode %{ 10523 int mask_len = Matcher::vector_length(this); 10524 assert(mask_len <= 32, "invalid mask length"); 10525 int vec_enc = vector_length_encoding(mask_len); 10526 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10527 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10528 %} 10529 ins_pipe( pipe_slow ); 10530 %} 10531 10532 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10533 predicate(n->bottom_type()->isa_vectmask()); 10534 match(Set dst (VectorLongToMask src)); 10535 format %{ "long_to_mask_evex $dst, $src\t!" %} 10536 ins_encode %{ 10537 __ kmov($dst$$KRegister, $src$$Register); 10538 %} 10539 ins_pipe( pipe_slow ); 10540 %} 10541 #endif 10542 10543 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10544 match(Set dst (AndVMask src1 src2)); 10545 match(Set dst (OrVMask src1 src2)); 10546 match(Set dst (XorVMask src1 src2)); 10547 effect(TEMP kscratch); 10548 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10549 ins_encode %{ 10550 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10551 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10552 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal"); 10553 uint masklen = Matcher::vector_length(this); 10554 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10555 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10556 %} 10557 ins_pipe( pipe_slow ); 10558 %} 10559 10560 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10561 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10562 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10563 ins_encode %{ 10564 int vlen_enc = vector_length_encoding(this); 10565 BasicType bt = Matcher::vector_element_basic_type(this); 10566 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10567 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10568 %} 10569 ins_pipe( pipe_slow ); 10570 %} 10571 10572 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10573 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10574 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10575 ins_encode %{ 10576 int vlen_enc = vector_length_encoding(this); 10577 BasicType bt = Matcher::vector_element_basic_type(this); 10578 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10579 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10580 %} 10581 ins_pipe( pipe_slow ); 10582 %} 10583 10584 instruct castMM(kReg dst) 10585 %{ 10586 match(Set dst (CastVV dst)); 10587 10588 size(0); 10589 format %{ "# castVV of $dst" %} 10590 ins_encode(/* empty encoding */); 10591 ins_cost(0); 10592 ins_pipe(empty); 10593 %} 10594 10595 instruct castVV(vec dst) 10596 %{ 10597 match(Set dst (CastVV dst)); 10598 10599 size(0); 10600 format %{ "# castVV of $dst" %} 10601 ins_encode(/* empty encoding */); 10602 ins_cost(0); 10603 ins_pipe(empty); 10604 %} 10605 10606 instruct castVVLeg(legVec dst) 10607 %{ 10608 match(Set dst (CastVV dst)); 10609 10610 size(0); 10611 format %{ "# castVV of $dst" %} 10612 ins_encode(/* empty encoding */); 10613 ins_cost(0); 10614 ins_pipe(empty); 10615 %} 10616 10617 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10618 %{ 10619 match(Set dst (IsInfiniteF src)); 10620 effect(TEMP ktmp, KILL cr); 10621 format %{ "float_class_check $dst, $src" %} 10622 ins_encode %{ 10623 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10624 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10625 %} 10626 ins_pipe(pipe_slow); 10627 %} 10628 10629 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10630 %{ 10631 match(Set dst (IsInfiniteD src)); 10632 effect(TEMP ktmp, KILL cr); 10633 format %{ "double_class_check $dst, $src" %} 10634 ins_encode %{ 10635 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10636 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10637 %} 10638 ins_pipe(pipe_slow); 10639 %} 10640 10641 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2) 10642 %{ 10643 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10644 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10645 match(Set dst (SaturatingAddV src1 src2)); 10646 match(Set dst (SaturatingSubV src1 src2)); 10647 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10648 ins_encode %{ 10649 int vlen_enc = vector_length_encoding(this); 10650 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10651 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10652 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10653 %} 10654 ins_pipe(pipe_slow); 10655 %} 10656 10657 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2) 10658 %{ 10659 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10660 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10661 match(Set dst (SaturatingAddV src1 src2)); 10662 match(Set dst (SaturatingSubV src1 src2)); 10663 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10664 ins_encode %{ 10665 int vlen_enc = vector_length_encoding(this); 10666 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10667 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10668 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10669 %} 10670 ins_pipe(pipe_slow); 10671 %} 10672 10673 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2) 10674 %{ 10675 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10676 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10677 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10678 match(Set dst (SaturatingAddV src1 src2)); 10679 match(Set dst (SaturatingSubV src1 src2)); 10680 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2); 10681 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 10682 ins_encode %{ 10683 int vlen_enc = vector_length_encoding(this); 10684 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10685 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10686 $src1$$XMMRegister, $src2$$XMMRegister, 10687 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10688 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc); 10689 %} 10690 ins_pipe(pipe_slow); 10691 %} 10692 10693 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4) 10694 %{ 10695 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10696 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10697 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10698 match(Set dst (SaturatingAddV src1 src2)); 10699 match(Set dst (SaturatingSubV src1 src2)); 10700 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4); 10701 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 10702 ins_encode %{ 10703 int vlen_enc = vector_length_encoding(this); 10704 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10705 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10706 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10707 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc); 10708 %} 10709 ins_pipe(pipe_slow); 10710 %} 10711 10712 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp) 10713 %{ 10714 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10715 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10716 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10717 match(Set dst (SaturatingAddV src1 src2)); 10718 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp); 10719 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %} 10720 ins_encode %{ 10721 int vlen_enc = vector_length_encoding(this); 10722 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10723 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10724 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10725 %} 10726 ins_pipe(pipe_slow); 10727 %} 10728 10729 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3) 10730 %{ 10731 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10732 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10733 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10734 match(Set dst (SaturatingAddV src1 src2)); 10735 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 10736 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 10737 ins_encode %{ 10738 int vlen_enc = vector_length_encoding(this); 10739 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10740 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10741 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc); 10742 %} 10743 ins_pipe(pipe_slow); 10744 %} 10745 10746 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp) 10747 %{ 10748 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10749 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10750 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10751 match(Set dst (SaturatingSubV src1 src2)); 10752 effect(TEMP ktmp); 10753 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %} 10754 ins_encode %{ 10755 int vlen_enc = vector_length_encoding(this); 10756 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10757 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10758 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10759 %} 10760 ins_pipe(pipe_slow); 10761 %} 10762 10763 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) 10764 %{ 10765 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10766 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10767 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10768 match(Set dst (SaturatingSubV src1 src2)); 10769 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 10770 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %} 10771 ins_encode %{ 10772 int vlen_enc = vector_length_encoding(this); 10773 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10774 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10775 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 10776 %} 10777 ins_pipe(pipe_slow); 10778 %} 10779 10780 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2) 10781 %{ 10782 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10783 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10784 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10785 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10786 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10787 ins_encode %{ 10788 int vlen_enc = vector_length_encoding(this); 10789 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10790 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10791 $src1$$XMMRegister, $src2$$Address, false, vlen_enc); 10792 %} 10793 ins_pipe(pipe_slow); 10794 %} 10795 10796 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2) 10797 %{ 10798 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10799 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10800 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10801 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10802 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10803 ins_encode %{ 10804 int vlen_enc = vector_length_encoding(this); 10805 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10806 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10807 $src1$$XMMRegister, $src2$$Address, true, vlen_enc); 10808 %} 10809 ins_pipe(pipe_slow); 10810 %} 10811 10812 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10813 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10814 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10815 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10816 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10817 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10818 ins_encode %{ 10819 int vlen_enc = vector_length_encoding(this); 10820 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10821 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10822 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc); 10823 %} 10824 ins_pipe( pipe_slow ); 10825 %} 10826 10827 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10828 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10829 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10830 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10831 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10832 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10833 ins_encode %{ 10834 int vlen_enc = vector_length_encoding(this); 10835 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10836 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10837 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc); 10838 %} 10839 ins_pipe( pipe_slow ); 10840 %} 10841 10842 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10843 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10844 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10845 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10846 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10847 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10848 ins_encode %{ 10849 int vlen_enc = vector_length_encoding(this); 10850 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10851 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10852 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc); 10853 %} 10854 ins_pipe( pipe_slow ); 10855 %} 10856 10857 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10858 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10859 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10860 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10861 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10862 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10863 ins_encode %{ 10864 int vlen_enc = vector_length_encoding(this); 10865 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10866 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10867 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc); 10868 %} 10869 ins_pipe( pipe_slow ); 10870 %} 10871 10872 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2) 10873 %{ 10874 match(Set index (SelectFromTwoVector (Binary index src1) src2)); 10875 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %} 10876 ins_encode %{ 10877 int vlen_enc = vector_length_encoding(this); 10878 BasicType bt = Matcher::vector_element_basic_type(this); 10879 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10880 %} 10881 ins_pipe(pipe_slow); 10882 %} 10883 10884 instruct reinterpretS2HF(regF dst, rRegI src) 10885 %{ 10886 match(Set dst (ReinterpretS2HF src)); 10887 format %{ "vmovw $dst, $src" %} 10888 ins_encode %{ 10889 __ vmovw($dst$$XMMRegister, $src$$Register); 10890 %} 10891 ins_pipe(pipe_slow); 10892 %} 10893 10894 instruct convF2HFAndS2HF(regF dst, regF src) 10895 %{ 10896 match(Set dst (ReinterpretS2HF (ConvF2HF src))); 10897 format %{ "convF2HFAndS2HF $dst, $src" %} 10898 ins_encode %{ 10899 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 10900 %} 10901 ins_pipe(pipe_slow); 10902 %} 10903 10904 instruct convHF2SAndHF2F(regF dst, regF src) 10905 %{ 10906 match(Set dst (ConvHF2F (ReinterpretHF2S src))); 10907 format %{ "convHF2SAndHF2F $dst, $src" %} 10908 ins_encode %{ 10909 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit); 10910 %} 10911 ins_pipe(pipe_slow); 10912 %} 10913 10914 instruct reinterpretHF2S(rRegI dst, regF src) 10915 %{ 10916 match(Set dst (ReinterpretHF2S src)); 10917 format %{ "vmovw $dst, $src" %} 10918 ins_encode %{ 10919 __ vmovw($dst$$Register, $src$$XMMRegister); 10920 %} 10921 ins_pipe(pipe_slow); 10922 %} 10923 10924 instruct scalar_sqrt_HF_reg(regF dst, regF src) 10925 %{ 10926 match(Set dst (SqrtHF src)); 10927 format %{ "scalar_sqrt_fp16 $dst, $src" %} 10928 ins_encode %{ 10929 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister); 10930 %} 10931 ins_pipe(pipe_slow); 10932 %} 10933 10934 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2) 10935 %{ 10936 match(Set dst (AddHF src1 src2)); 10937 match(Set dst (DivHF src1 src2)); 10938 match(Set dst (MaxHF src1 src2)); 10939 match(Set dst (MinHF src1 src2)); 10940 match(Set dst (MulHF src1 src2)); 10941 match(Set dst (SubHF src1 src2)); 10942 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %} 10943 ins_encode %{ 10944 int opcode = this->ideal_Opcode(); 10945 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 10946 %} 10947 ins_pipe(pipe_slow); 10948 %} 10949 10950 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2) 10951 %{ 10952 match(Set dst (FmaHF src2 (Binary dst src1))); 10953 effect(DEF dst); 10954 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10955 ins_encode %{ 10956 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister); 10957 %} 10958 ins_pipe( pipe_slow ); 10959 %}