1 // 2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 214 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 215 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 216 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 217 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 218 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 219 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 220 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 221 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 222 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 223 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 224 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 225 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 226 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 227 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 228 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 229 230 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 231 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 232 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 233 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 234 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 235 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 236 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 237 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 238 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 239 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 240 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 241 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 242 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 243 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 244 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 245 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 246 247 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 248 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 249 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 250 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 251 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 252 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 253 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 254 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 255 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 256 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 257 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 258 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 259 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 260 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 261 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 262 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 263 264 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 265 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 266 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 267 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 268 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 269 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 270 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 271 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 272 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 273 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 274 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 275 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 276 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 277 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 278 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 279 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 280 281 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 282 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 283 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 284 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 285 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 286 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 287 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 288 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 289 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 290 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 291 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 292 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 293 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 294 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 295 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 296 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 297 298 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 299 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 300 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 301 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 302 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 303 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 304 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 305 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 306 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 307 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 308 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 309 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 310 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 311 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 312 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 313 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 314 315 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 316 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 317 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 318 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 319 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 320 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 321 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 322 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 323 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 324 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 325 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 326 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 327 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 328 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 329 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 330 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 331 332 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 333 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 334 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 335 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 336 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 337 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 338 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 339 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 340 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 341 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 342 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 343 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 344 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 345 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 346 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 347 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 348 349 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 350 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 351 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 352 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 353 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 354 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 355 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 356 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 357 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 358 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 359 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 360 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 361 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 362 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 363 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 364 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 365 366 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 367 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 368 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 369 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 370 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 371 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 372 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 373 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 374 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 375 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 376 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 377 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 378 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 379 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 380 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 381 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 382 383 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 384 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 385 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 386 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 387 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 388 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 389 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 390 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 391 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 392 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 393 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 394 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 395 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 396 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 397 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 398 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 399 400 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 401 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 402 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 403 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 404 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 405 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 406 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 407 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 408 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 409 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 410 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 411 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 412 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 413 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 414 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 415 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 416 417 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 418 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 419 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 420 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 421 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 422 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 423 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 424 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 425 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 426 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 427 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 428 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 429 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 430 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 431 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 432 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 433 434 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 435 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 436 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 437 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 438 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 439 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 440 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 441 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 442 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 443 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 444 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 445 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 446 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 447 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 448 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 449 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 450 451 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 452 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 453 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 454 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 455 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 456 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 457 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 458 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 459 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 460 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 461 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 462 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 463 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 464 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 465 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 466 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 467 468 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 469 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 470 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 471 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 472 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 473 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 474 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 475 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 476 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 477 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 478 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 479 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 480 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 481 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 482 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 483 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 484 485 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 486 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 487 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 488 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 489 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 490 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 491 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 492 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 493 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 494 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 495 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 496 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 497 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 498 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 499 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 500 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 501 502 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 503 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 504 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 505 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 506 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 507 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 508 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 509 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 510 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 511 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 512 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 513 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 514 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 515 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 516 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 517 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 518 519 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 520 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 521 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 522 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 523 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 524 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 525 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 526 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 527 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 528 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 529 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 530 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 531 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 532 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 533 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 534 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 535 536 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 537 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 538 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 539 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 540 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 541 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 542 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 543 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 544 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 545 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 546 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 547 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 548 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 549 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 550 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 551 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 552 553 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 554 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 555 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 556 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 557 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 558 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 559 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 560 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 561 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 562 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 563 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 564 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 565 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 566 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 567 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 568 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 569 570 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 571 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 572 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 573 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 574 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 575 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 576 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 577 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 578 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 579 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 580 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 581 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 582 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 583 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 584 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 585 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 586 587 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 588 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 589 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 590 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 591 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 592 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 593 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 594 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 595 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 596 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 597 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 598 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 599 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 600 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 601 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 602 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 603 604 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 605 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 606 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 607 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 608 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 609 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 610 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 611 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 612 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 613 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 614 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 615 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 616 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 617 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 618 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 619 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 620 621 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 622 623 // AVX3 Mask Registers. 624 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 625 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 626 627 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 628 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 629 630 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 631 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 632 633 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 634 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 635 636 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 637 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 638 639 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 640 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 641 642 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 643 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 644 645 646 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 647 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 648 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 649 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 650 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 651 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 652 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 653 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 654 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 655 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 656 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 657 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 658 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 659 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 660 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 661 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p, 662 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 663 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 664 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 665 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 666 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 667 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 668 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 669 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 670 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 671 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 672 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 673 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 674 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 675 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 676 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 677 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 678 679 alloc_class chunk2(K7, K7_H, 680 K6, K6_H, 681 K5, K5_H, 682 K4, K4_H, 683 K3, K3_H, 684 K2, K2_H, 685 K1, K1_H); 686 687 reg_class vectmask_reg(K1, K1_H, 688 K2, K2_H, 689 K3, K3_H, 690 K4, K4_H, 691 K5, K5_H, 692 K6, K6_H, 693 K7, K7_H); 694 695 reg_class vectmask_reg_K1(K1, K1_H); 696 reg_class vectmask_reg_K2(K2, K2_H); 697 reg_class vectmask_reg_K3(K3, K3_H); 698 reg_class vectmask_reg_K4(K4, K4_H); 699 reg_class vectmask_reg_K5(K5, K5_H); 700 reg_class vectmask_reg_K6(K6, K6_H); 701 reg_class vectmask_reg_K7(K7, K7_H); 702 703 // flags allocation class should be last. 704 alloc_class chunk3(RFLAGS); 705 706 707 // Singleton class for condition codes 708 reg_class int_flags(RFLAGS); 709 710 // Class for pre evex float registers 711 reg_class float_reg_legacy(XMM0, 712 XMM1, 713 XMM2, 714 XMM3, 715 XMM4, 716 XMM5, 717 XMM6, 718 XMM7, 719 XMM8, 720 XMM9, 721 XMM10, 722 XMM11, 723 XMM12, 724 XMM13, 725 XMM14, 726 XMM15); 727 728 // Class for evex float registers 729 reg_class float_reg_evex(XMM0, 730 XMM1, 731 XMM2, 732 XMM3, 733 XMM4, 734 XMM5, 735 XMM6, 736 XMM7, 737 XMM8, 738 XMM9, 739 XMM10, 740 XMM11, 741 XMM12, 742 XMM13, 743 XMM14, 744 XMM15, 745 XMM16, 746 XMM17, 747 XMM18, 748 XMM19, 749 XMM20, 750 XMM21, 751 XMM22, 752 XMM23, 753 XMM24, 754 XMM25, 755 XMM26, 756 XMM27, 757 XMM28, 758 XMM29, 759 XMM30, 760 XMM31); 761 762 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 763 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 764 765 // Class for pre evex double registers 766 reg_class double_reg_legacy(XMM0, XMM0b, 767 XMM1, XMM1b, 768 XMM2, XMM2b, 769 XMM3, XMM3b, 770 XMM4, XMM4b, 771 XMM5, XMM5b, 772 XMM6, XMM6b, 773 XMM7, XMM7b, 774 XMM8, XMM8b, 775 XMM9, XMM9b, 776 XMM10, XMM10b, 777 XMM11, XMM11b, 778 XMM12, XMM12b, 779 XMM13, XMM13b, 780 XMM14, XMM14b, 781 XMM15, XMM15b); 782 783 // Class for evex double registers 784 reg_class double_reg_evex(XMM0, XMM0b, 785 XMM1, XMM1b, 786 XMM2, XMM2b, 787 XMM3, XMM3b, 788 XMM4, XMM4b, 789 XMM5, XMM5b, 790 XMM6, XMM6b, 791 XMM7, XMM7b, 792 XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b, 800 XMM16, XMM16b, 801 XMM17, XMM17b, 802 XMM18, XMM18b, 803 XMM19, XMM19b, 804 XMM20, XMM20b, 805 XMM21, XMM21b, 806 XMM22, XMM22b, 807 XMM23, XMM23b, 808 XMM24, XMM24b, 809 XMM25, XMM25b, 810 XMM26, XMM26b, 811 XMM27, XMM27b, 812 XMM28, XMM28b, 813 XMM29, XMM29b, 814 XMM30, XMM30b, 815 XMM31, XMM31b); 816 817 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 818 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 819 820 // Class for pre evex 32bit vector registers 821 reg_class vectors_reg_legacy(XMM0, 822 XMM1, 823 XMM2, 824 XMM3, 825 XMM4, 826 XMM5, 827 XMM6, 828 XMM7, 829 XMM8, 830 XMM9, 831 XMM10, 832 XMM11, 833 XMM12, 834 XMM13, 835 XMM14, 836 XMM15); 837 838 // Class for evex 32bit vector registers 839 reg_class vectors_reg_evex(XMM0, 840 XMM1, 841 XMM2, 842 XMM3, 843 XMM4, 844 XMM5, 845 XMM6, 846 XMM7, 847 XMM8, 848 XMM9, 849 XMM10, 850 XMM11, 851 XMM12, 852 XMM13, 853 XMM14, 854 XMM15, 855 XMM16, 856 XMM17, 857 XMM18, 858 XMM19, 859 XMM20, 860 XMM21, 861 XMM22, 862 XMM23, 863 XMM24, 864 XMM25, 865 XMM26, 866 XMM27, 867 XMM28, 868 XMM29, 869 XMM30, 870 XMM31); 871 872 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 873 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 874 875 // Class for all 64bit vector registers 876 reg_class vectord_reg_legacy(XMM0, XMM0b, 877 XMM1, XMM1b, 878 XMM2, XMM2b, 879 XMM3, XMM3b, 880 XMM4, XMM4b, 881 XMM5, XMM5b, 882 XMM6, XMM6b, 883 XMM7, XMM7b, 884 XMM8, XMM8b, 885 XMM9, XMM9b, 886 XMM10, XMM10b, 887 XMM11, XMM11b, 888 XMM12, XMM12b, 889 XMM13, XMM13b, 890 XMM14, XMM14b, 891 XMM15, XMM15b); 892 893 // Class for all 64bit vector registers 894 reg_class vectord_reg_evex(XMM0, XMM0b, 895 XMM1, XMM1b, 896 XMM2, XMM2b, 897 XMM3, XMM3b, 898 XMM4, XMM4b, 899 XMM5, XMM5b, 900 XMM6, XMM6b, 901 XMM7, XMM7b, 902 XMM8, XMM8b, 903 XMM9, XMM9b, 904 XMM10, XMM10b, 905 XMM11, XMM11b, 906 XMM12, XMM12b, 907 XMM13, XMM13b, 908 XMM14, XMM14b, 909 XMM15, XMM15b, 910 XMM16, XMM16b, 911 XMM17, XMM17b, 912 XMM18, XMM18b, 913 XMM19, XMM19b, 914 XMM20, XMM20b, 915 XMM21, XMM21b, 916 XMM22, XMM22b, 917 XMM23, XMM23b, 918 XMM24, XMM24b, 919 XMM25, XMM25b, 920 XMM26, XMM26b, 921 XMM27, XMM27b, 922 XMM28, XMM28b, 923 XMM29, XMM29b, 924 XMM30, XMM30b, 925 XMM31, XMM31b); 926 927 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 928 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 929 930 // Class for all 128bit vector registers 931 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 932 XMM1, XMM1b, XMM1c, XMM1d, 933 XMM2, XMM2b, XMM2c, XMM2d, 934 XMM3, XMM3b, XMM3c, XMM3d, 935 XMM4, XMM4b, XMM4c, XMM4d, 936 XMM5, XMM5b, XMM5c, XMM5d, 937 XMM6, XMM6b, XMM6c, XMM6d, 938 XMM7, XMM7b, XMM7c, XMM7d, 939 XMM8, XMM8b, XMM8c, XMM8d, 940 XMM9, XMM9b, XMM9c, XMM9d, 941 XMM10, XMM10b, XMM10c, XMM10d, 942 XMM11, XMM11b, XMM11c, XMM11d, 943 XMM12, XMM12b, XMM12c, XMM12d, 944 XMM13, XMM13b, XMM13c, XMM13d, 945 XMM14, XMM14b, XMM14c, XMM14d, 946 XMM15, XMM15b, XMM15c, XMM15d); 947 948 // Class for all 128bit vector registers 949 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 950 XMM1, XMM1b, XMM1c, XMM1d, 951 XMM2, XMM2b, XMM2c, XMM2d, 952 XMM3, XMM3b, XMM3c, XMM3d, 953 XMM4, XMM4b, XMM4c, XMM4d, 954 XMM5, XMM5b, XMM5c, XMM5d, 955 XMM6, XMM6b, XMM6c, XMM6d, 956 XMM7, XMM7b, XMM7c, XMM7d, 957 XMM8, XMM8b, XMM8c, XMM8d, 958 XMM9, XMM9b, XMM9c, XMM9d, 959 XMM10, XMM10b, XMM10c, XMM10d, 960 XMM11, XMM11b, XMM11c, XMM11d, 961 XMM12, XMM12b, XMM12c, XMM12d, 962 XMM13, XMM13b, XMM13c, XMM13d, 963 XMM14, XMM14b, XMM14c, XMM14d, 964 XMM15, XMM15b, XMM15c, XMM15d, 965 XMM16, XMM16b, XMM16c, XMM16d, 966 XMM17, XMM17b, XMM17c, XMM17d, 967 XMM18, XMM18b, XMM18c, XMM18d, 968 XMM19, XMM19b, XMM19c, XMM19d, 969 XMM20, XMM20b, XMM20c, XMM20d, 970 XMM21, XMM21b, XMM21c, XMM21d, 971 XMM22, XMM22b, XMM22c, XMM22d, 972 XMM23, XMM23b, XMM23c, XMM23d, 973 XMM24, XMM24b, XMM24c, XMM24d, 974 XMM25, XMM25b, XMM25c, XMM25d, 975 XMM26, XMM26b, XMM26c, XMM26d, 976 XMM27, XMM27b, XMM27c, XMM27d, 977 XMM28, XMM28b, XMM28c, XMM28d, 978 XMM29, XMM29b, XMM29c, XMM29d, 979 XMM30, XMM30b, XMM30c, XMM30d, 980 XMM31, XMM31b, XMM31c, XMM31d); 981 982 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 983 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 984 985 // Class for all 256bit vector registers 986 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 987 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 988 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 989 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 990 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 991 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 992 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 993 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, 994 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 995 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 996 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 997 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 998 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 999 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1000 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1001 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h); 1002 1003 // Class for all 256bit vector registers 1004 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1005 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1006 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1007 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1008 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1009 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1010 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1011 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, 1012 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1013 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1014 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1015 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1016 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1017 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1018 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1019 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1020 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1021 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1022 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1023 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1024 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1025 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1026 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1027 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1028 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1029 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1030 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1031 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1032 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1033 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1034 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1035 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h); 1036 1037 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1038 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1039 1040 // Class for all 512bit vector registers 1041 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1042 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1043 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1044 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1045 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1046 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1047 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1048 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 1049 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1050 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1051 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1052 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1053 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1054 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1055 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1056 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p, 1057 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1058 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1059 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1060 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1061 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1062 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1063 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1064 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1065 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1066 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1067 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1068 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1069 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1070 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1071 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1072 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p); 1073 1074 // Class for restricted 512bit vector registers 1075 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1076 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1077 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1078 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1079 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1080 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1081 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1082 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p, 1083 XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1084 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1085 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1086 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1087 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1088 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1089 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1090 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p); 1091 1092 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1093 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1094 1095 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1096 %} 1097 1098 1099 //----------SOURCE BLOCK------------------------------------------------------- 1100 // This is a block of C++ code which provides values, functions, and 1101 // definitions necessary in the rest of the architecture description 1102 1103 source_hpp %{ 1104 // Header information of the source block. 1105 // Method declarations/definitions which are used outside 1106 // the ad-scope can conveniently be defined here. 1107 // 1108 // To keep related declarations/definitions/uses close together, 1109 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1110 1111 #include "runtime/vm_version.hpp" 1112 1113 class NativeJump; 1114 1115 class CallStubImpl { 1116 1117 //-------------------------------------------------------------- 1118 //---< Used for optimization in Compile::shorten_branches >--- 1119 //-------------------------------------------------------------- 1120 1121 public: 1122 // Size of call trampoline stub. 1123 static uint size_call_trampoline() { 1124 return 0; // no call trampolines on this platform 1125 } 1126 1127 // number of relocations needed by a call trampoline stub 1128 static uint reloc_call_trampoline() { 1129 return 0; // no call trampolines on this platform 1130 } 1131 }; 1132 1133 class HandlerImpl { 1134 1135 public: 1136 1137 static int emit_exception_handler(C2_MacroAssembler *masm); 1138 static int emit_deopt_handler(C2_MacroAssembler* masm); 1139 1140 static uint size_exception_handler() { 1141 // NativeCall instruction size is the same as NativeJump. 1142 // exception handler starts out as jump and can be patched to 1143 // a call be deoptimization. (4932387) 1144 // Note that this value is also credited (in output.cpp) to 1145 // the size of the code section. 1146 return NativeJump::instruction_size; 1147 } 1148 1149 static uint size_deopt_handler() { 1150 // three 5 byte instructions plus one move for unreachable address. 1151 return 15+3; 1152 } 1153 }; 1154 1155 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1156 switch(bytes) { 1157 case 4: // fall-through 1158 case 8: // fall-through 1159 case 16: return Assembler::AVX_128bit; 1160 case 32: return Assembler::AVX_256bit; 1161 case 64: return Assembler::AVX_512bit; 1162 1163 default: { 1164 ShouldNotReachHere(); 1165 return Assembler::AVX_NoVec; 1166 } 1167 } 1168 } 1169 1170 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1171 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1172 } 1173 1174 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1175 uint def_idx = use->operand_index(opnd); 1176 Node* def = use->in(def_idx); 1177 return vector_length_encoding(def); 1178 } 1179 1180 static inline bool is_vector_popcount_predicate(BasicType bt) { 1181 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1182 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1183 } 1184 1185 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1186 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1187 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1188 } 1189 1190 class Node::PD { 1191 public: 1192 enum NodeFlags { 1193 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1194 Flag_sets_carry_flag = Node::_last_flag << 2, 1195 Flag_sets_parity_flag = Node::_last_flag << 3, 1196 Flag_sets_zero_flag = Node::_last_flag << 4, 1197 Flag_sets_overflow_flag = Node::_last_flag << 5, 1198 Flag_sets_sign_flag = Node::_last_flag << 6, 1199 Flag_clears_carry_flag = Node::_last_flag << 7, 1200 Flag_clears_parity_flag = Node::_last_flag << 8, 1201 Flag_clears_zero_flag = Node::_last_flag << 9, 1202 Flag_clears_overflow_flag = Node::_last_flag << 10, 1203 Flag_clears_sign_flag = Node::_last_flag << 11, 1204 _last_flag = Flag_clears_sign_flag 1205 }; 1206 }; 1207 1208 %} // end source_hpp 1209 1210 source %{ 1211 1212 #include "opto/addnode.hpp" 1213 #include "c2_intelJccErratum_x86.hpp" 1214 1215 void PhaseOutput::pd_perform_mach_node_analysis() { 1216 if (VM_Version::has_intel_jcc_erratum()) { 1217 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1218 _buf_sizes._code += extra_padding; 1219 } 1220 } 1221 1222 int MachNode::pd_alignment_required() const { 1223 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1224 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1225 return IntelJccErratum::largest_jcc_size() + 1; 1226 } else { 1227 return 1; 1228 } 1229 } 1230 1231 int MachNode::compute_padding(int current_offset) const { 1232 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1233 Compile* C = Compile::current(); 1234 PhaseOutput* output = C->output(); 1235 Block* block = output->block(); 1236 int index = output->index(); 1237 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1238 } else { 1239 return 0; 1240 } 1241 } 1242 1243 // Emit exception handler code. 1244 // Stuff framesize into a register and call a VM stub routine. 1245 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) { 1246 1247 // Note that the code buffer's insts_mark is always relative to insts. 1248 // That's why we must use the macroassembler to generate a handler. 1249 address base = __ start_a_stub(size_exception_handler()); 1250 if (base == nullptr) { 1251 ciEnv::current()->record_failure("CodeCache is full"); 1252 return 0; // CodeBuffer::expand failed 1253 } 1254 int offset = __ offset(); 1255 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1256 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1257 __ end_a_stub(); 1258 return offset; 1259 } 1260 1261 // Emit deopt handler code. 1262 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { 1263 1264 // Note that the code buffer's insts_mark is always relative to insts. 1265 // That's why we must use the macroassembler to generate a handler. 1266 address base = __ start_a_stub(size_deopt_handler()); 1267 if (base == nullptr) { 1268 ciEnv::current()->record_failure("CodeCache is full"); 1269 return 0; // CodeBuffer::expand failed 1270 } 1271 int offset = __ offset(); 1272 1273 address the_pc = (address) __ pc(); 1274 Label next; 1275 // push a "the_pc" on the stack without destroying any registers 1276 // as they all may be live. 1277 1278 // push address of "next" 1279 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1280 __ bind(next); 1281 // adjust it so it matches "the_pc" 1282 __ subptr(Address(rsp, 0), __ offset() - offset); 1283 1284 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1285 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1286 __ end_a_stub(); 1287 return offset; 1288 } 1289 1290 static Assembler::Width widthForType(BasicType bt) { 1291 if (bt == T_BYTE) { 1292 return Assembler::B; 1293 } else if (bt == T_SHORT) { 1294 return Assembler::W; 1295 } else if (bt == T_INT) { 1296 return Assembler::D; 1297 } else { 1298 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1299 return Assembler::Q; 1300 } 1301 } 1302 1303 //============================================================================= 1304 1305 // Float masks come from different places depending on platform. 1306 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1307 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1308 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1309 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1310 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1311 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1312 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1313 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1314 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1315 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1316 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1317 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1318 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1319 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1320 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1321 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1322 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1323 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1324 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1325 1326 //============================================================================= 1327 bool Matcher::match_rule_supported(int opcode) { 1328 if (!has_match_rule(opcode)) { 1329 return false; // no match rule present 1330 } 1331 switch (opcode) { 1332 case Op_AbsVL: 1333 case Op_StoreVectorScatter: 1334 if (UseAVX < 3) { 1335 return false; 1336 } 1337 break; 1338 case Op_PopCountI: 1339 case Op_PopCountL: 1340 if (!UsePopCountInstruction) { 1341 return false; 1342 } 1343 break; 1344 case Op_PopCountVI: 1345 if (UseAVX < 2) { 1346 return false; 1347 } 1348 break; 1349 case Op_CompressV: 1350 case Op_ExpandV: 1351 case Op_PopCountVL: 1352 if (UseAVX < 2) { 1353 return false; 1354 } 1355 break; 1356 case Op_MulVI: 1357 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1358 return false; 1359 } 1360 break; 1361 case Op_MulVL: 1362 if (UseSSE < 4) { // only with SSE4_1 or AVX 1363 return false; 1364 } 1365 break; 1366 case Op_MulReductionVL: 1367 if (VM_Version::supports_avx512dq() == false) { 1368 return false; 1369 } 1370 break; 1371 case Op_AddReductionVL: 1372 if (UseSSE < 2) { // requires at least SSE2 1373 return false; 1374 } 1375 break; 1376 case Op_AbsVB: 1377 case Op_AbsVS: 1378 case Op_AbsVI: 1379 case Op_AddReductionVI: 1380 case Op_AndReductionV: 1381 case Op_OrReductionV: 1382 case Op_XorReductionV: 1383 if (UseSSE < 3) { // requires at least SSSE3 1384 return false; 1385 } 1386 break; 1387 case Op_MaxHF: 1388 case Op_MinHF: 1389 if (!VM_Version::supports_avx512vlbw()) { 1390 return false; 1391 } // fallthrough 1392 case Op_AddHF: 1393 case Op_DivHF: 1394 case Op_FmaHF: 1395 case Op_MulHF: 1396 case Op_ReinterpretS2HF: 1397 case Op_ReinterpretHF2S: 1398 case Op_SubHF: 1399 case Op_SqrtHF: 1400 if (!VM_Version::supports_avx512_fp16()) { 1401 return false; 1402 } 1403 break; 1404 case Op_VectorLoadShuffle: 1405 case Op_VectorRearrange: 1406 case Op_MulReductionVI: 1407 if (UseSSE < 4) { // requires at least SSE4 1408 return false; 1409 } 1410 break; 1411 case Op_IsInfiniteF: 1412 case Op_IsInfiniteD: 1413 if (!VM_Version::supports_avx512dq()) { 1414 return false; 1415 } 1416 break; 1417 case Op_SqrtVD: 1418 case Op_SqrtVF: 1419 case Op_VectorMaskCmp: 1420 case Op_VectorCastB2X: 1421 case Op_VectorCastS2X: 1422 case Op_VectorCastI2X: 1423 case Op_VectorCastL2X: 1424 case Op_VectorCastF2X: 1425 case Op_VectorCastD2X: 1426 case Op_VectorUCastB2X: 1427 case Op_VectorUCastS2X: 1428 case Op_VectorUCastI2X: 1429 case Op_VectorMaskCast: 1430 if (UseAVX < 1) { // enabled for AVX only 1431 return false; 1432 } 1433 break; 1434 case Op_PopulateIndex: 1435 if (UseAVX < 2) { 1436 return false; 1437 } 1438 break; 1439 case Op_RoundVF: 1440 if (UseAVX < 2) { // enabled for AVX2 only 1441 return false; 1442 } 1443 break; 1444 case Op_RoundVD: 1445 if (UseAVX < 3) { 1446 return false; // enabled for AVX3 only 1447 } 1448 break; 1449 case Op_CompareAndSwapL: 1450 case Op_CompareAndSwapP: 1451 break; 1452 case Op_StrIndexOf: 1453 if (!UseSSE42Intrinsics) { 1454 return false; 1455 } 1456 break; 1457 case Op_StrIndexOfChar: 1458 if (!UseSSE42Intrinsics) { 1459 return false; 1460 } 1461 break; 1462 case Op_OnSpinWait: 1463 if (VM_Version::supports_on_spin_wait() == false) { 1464 return false; 1465 } 1466 break; 1467 case Op_MulVB: 1468 case Op_LShiftVB: 1469 case Op_RShiftVB: 1470 case Op_URShiftVB: 1471 case Op_VectorInsert: 1472 case Op_VectorLoadMask: 1473 case Op_VectorStoreMask: 1474 case Op_VectorBlend: 1475 if (UseSSE < 4) { 1476 return false; 1477 } 1478 break; 1479 case Op_MaxD: 1480 case Op_MaxF: 1481 case Op_MinD: 1482 case Op_MinF: 1483 if (UseAVX < 1) { // enabled for AVX only 1484 return false; 1485 } 1486 break; 1487 case Op_CacheWB: 1488 case Op_CacheWBPreSync: 1489 case Op_CacheWBPostSync: 1490 if (!VM_Version::supports_data_cache_line_flush()) { 1491 return false; 1492 } 1493 break; 1494 case Op_ExtractB: 1495 case Op_ExtractL: 1496 case Op_ExtractI: 1497 case Op_RoundDoubleMode: 1498 if (UseSSE < 4) { 1499 return false; 1500 } 1501 break; 1502 case Op_RoundDoubleModeV: 1503 if (VM_Version::supports_avx() == false) { 1504 return false; // 128bit vroundpd is not available 1505 } 1506 break; 1507 case Op_LoadVectorGather: 1508 case Op_LoadVectorGatherMasked: 1509 if (UseAVX < 2) { 1510 return false; 1511 } 1512 break; 1513 case Op_FmaF: 1514 case Op_FmaD: 1515 case Op_FmaVD: 1516 case Op_FmaVF: 1517 if (!UseFMA) { 1518 return false; 1519 } 1520 break; 1521 case Op_MacroLogicV: 1522 if (UseAVX < 3 || !UseVectorMacroLogic) { 1523 return false; 1524 } 1525 break; 1526 1527 case Op_VectorCmpMasked: 1528 case Op_VectorMaskGen: 1529 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1530 return false; 1531 } 1532 break; 1533 case Op_VectorMaskFirstTrue: 1534 case Op_VectorMaskLastTrue: 1535 case Op_VectorMaskTrueCount: 1536 case Op_VectorMaskToLong: 1537 if (UseAVX < 1) { 1538 return false; 1539 } 1540 break; 1541 case Op_RoundF: 1542 case Op_RoundD: 1543 break; 1544 case Op_CopySignD: 1545 case Op_CopySignF: 1546 if (UseAVX < 3) { 1547 return false; 1548 } 1549 if (!VM_Version::supports_avx512vl()) { 1550 return false; 1551 } 1552 break; 1553 case Op_CompressBits: 1554 case Op_ExpandBits: 1555 if (!VM_Version::supports_bmi2()) { 1556 return false; 1557 } 1558 break; 1559 case Op_SignumF: 1560 if (UseSSE < 1) { 1561 return false; 1562 } 1563 break; 1564 case Op_SignumD: 1565 if (UseSSE < 2) { 1566 return false; 1567 } 1568 break; 1569 case Op_CompressM: 1570 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1571 return false; 1572 } 1573 break; 1574 case Op_SqrtF: 1575 if (UseSSE < 1) { 1576 return false; 1577 } 1578 break; 1579 case Op_SqrtD: 1580 if (UseSSE < 2) { 1581 return false; 1582 } 1583 break; 1584 case Op_ConvF2HF: 1585 case Op_ConvHF2F: 1586 if (!VM_Version::supports_float16()) { 1587 return false; 1588 } 1589 break; 1590 case Op_VectorCastF2HF: 1591 case Op_VectorCastHF2F: 1592 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1593 return false; 1594 } 1595 break; 1596 } 1597 return true; // Match rules are supported by default. 1598 } 1599 1600 //------------------------------------------------------------------------ 1601 1602 static inline bool is_pop_count_instr_target(BasicType bt) { 1603 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1604 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1605 } 1606 1607 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) { 1608 return match_rule_supported_vector(opcode, vlen, bt); 1609 } 1610 1611 // Identify extra cases that we might want to provide match rules for vector nodes and 1612 // other intrinsics guarded with vector length (vlen) and element type (bt). 1613 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1614 if (!match_rule_supported(opcode)) { 1615 return false; 1616 } 1617 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1618 // * SSE2 supports 128bit vectors for all types; 1619 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1620 // * AVX2 supports 256bit vectors for all types; 1621 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1622 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1623 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1624 // And MaxVectorSize is taken into account as well. 1625 if (!vector_size_supported(bt, vlen)) { 1626 return false; 1627 } 1628 // Special cases which require vector length follow: 1629 // * implementation limitations 1630 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1631 // * 128bit vroundpd instruction is present only in AVX1 1632 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1633 switch (opcode) { 1634 case Op_AbsVF: 1635 case Op_NegVF: 1636 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1637 return false; // 512bit vandps and vxorps are not available 1638 } 1639 break; 1640 case Op_AbsVD: 1641 case Op_NegVD: 1642 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1643 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1644 } 1645 break; 1646 case Op_RotateRightV: 1647 case Op_RotateLeftV: 1648 if (bt != T_INT && bt != T_LONG) { 1649 return false; 1650 } // fallthrough 1651 case Op_MacroLogicV: 1652 if (!VM_Version::supports_evex() || 1653 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1654 return false; 1655 } 1656 break; 1657 case Op_ClearArray: 1658 case Op_VectorMaskGen: 1659 case Op_VectorCmpMasked: 1660 if (!VM_Version::supports_avx512bw()) { 1661 return false; 1662 } 1663 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1664 return false; 1665 } 1666 break; 1667 case Op_LoadVectorMasked: 1668 case Op_StoreVectorMasked: 1669 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1670 return false; 1671 } 1672 break; 1673 case Op_UMinV: 1674 case Op_UMaxV: 1675 if (UseAVX == 0) { 1676 return false; 1677 } 1678 break; 1679 case Op_MaxV: 1680 case Op_MinV: 1681 if (UseSSE < 4 && is_integral_type(bt)) { 1682 return false; 1683 } 1684 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1685 // Float/Double intrinsics are enabled for AVX family currently. 1686 if (UseAVX == 0) { 1687 return false; 1688 } 1689 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1690 return false; 1691 } 1692 } 1693 break; 1694 case Op_CallLeafVector: 1695 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1696 return false; 1697 } 1698 break; 1699 case Op_AddReductionVI: 1700 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1701 return false; 1702 } 1703 // fallthrough 1704 case Op_AndReductionV: 1705 case Op_OrReductionV: 1706 case Op_XorReductionV: 1707 if (is_subword_type(bt) && (UseSSE < 4)) { 1708 return false; 1709 } 1710 break; 1711 case Op_MinReductionV: 1712 case Op_MaxReductionV: 1713 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1714 return false; 1715 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1716 return false; 1717 } 1718 // Float/Double intrinsics enabled for AVX family. 1719 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1720 return false; 1721 } 1722 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1723 return false; 1724 } 1725 break; 1726 case Op_VectorTest: 1727 if (UseSSE < 4) { 1728 return false; // Implementation limitation 1729 } else if (size_in_bits < 32) { 1730 return false; // Implementation limitation 1731 } 1732 break; 1733 case Op_VectorLoadShuffle: 1734 case Op_VectorRearrange: 1735 if(vlen == 2) { 1736 return false; // Implementation limitation due to how shuffle is loaded 1737 } else if (size_in_bits == 256 && UseAVX < 2) { 1738 return false; // Implementation limitation 1739 } 1740 break; 1741 case Op_VectorLoadMask: 1742 case Op_VectorMaskCast: 1743 if (size_in_bits == 256 && UseAVX < 2) { 1744 return false; // Implementation limitation 1745 } 1746 // fallthrough 1747 case Op_VectorStoreMask: 1748 if (vlen == 2) { 1749 return false; // Implementation limitation 1750 } 1751 break; 1752 case Op_PopulateIndex: 1753 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1754 return false; 1755 } 1756 break; 1757 case Op_VectorCastB2X: 1758 case Op_VectorCastS2X: 1759 case Op_VectorCastI2X: 1760 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1761 return false; 1762 } 1763 break; 1764 case Op_VectorCastL2X: 1765 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1766 return false; 1767 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1768 return false; 1769 } 1770 break; 1771 case Op_VectorCastF2X: { 1772 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1773 // happen after intermediate conversion to integer and special handling 1774 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1775 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1776 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1777 return false; 1778 } 1779 } 1780 // fallthrough 1781 case Op_VectorCastD2X: 1782 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1783 return false; 1784 } 1785 break; 1786 case Op_VectorCastF2HF: 1787 case Op_VectorCastHF2F: 1788 if (!VM_Version::supports_f16c() && 1789 ((!VM_Version::supports_evex() || 1790 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1791 return false; 1792 } 1793 break; 1794 case Op_RoundVD: 1795 if (!VM_Version::supports_avx512dq()) { 1796 return false; 1797 } 1798 break; 1799 case Op_MulReductionVI: 1800 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1801 return false; 1802 } 1803 break; 1804 case Op_LoadVectorGatherMasked: 1805 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1806 return false; 1807 } 1808 if (is_subword_type(bt) && 1809 ((size_in_bits > 256 && !VM_Version::supports_avx512bw()) || 1810 (size_in_bits < 64) || 1811 (bt == T_SHORT && !VM_Version::supports_bmi2()))) { 1812 return false; 1813 } 1814 break; 1815 case Op_StoreVectorScatterMasked: 1816 case Op_StoreVectorScatter: 1817 if (is_subword_type(bt)) { 1818 return false; 1819 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1820 return false; 1821 } 1822 // fallthrough 1823 case Op_LoadVectorGather: 1824 if (!is_subword_type(bt) && size_in_bits == 64) { 1825 return false; 1826 } 1827 if (is_subword_type(bt) && size_in_bits < 64) { 1828 return false; 1829 } 1830 break; 1831 case Op_SaturatingAddV: 1832 case Op_SaturatingSubV: 1833 if (UseAVX < 1) { 1834 return false; // Implementation limitation 1835 } 1836 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1837 return false; 1838 } 1839 break; 1840 case Op_SelectFromTwoVector: 1841 if (size_in_bits < 128 || (size_in_bits < 512 && !VM_Version::supports_avx512vl())) { 1842 return false; 1843 } 1844 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 1845 return false; 1846 } 1847 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 1848 return false; 1849 } 1850 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) { 1851 return false; 1852 } 1853 break; 1854 case Op_MaskAll: 1855 if (!VM_Version::supports_evex()) { 1856 return false; 1857 } 1858 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1859 return false; 1860 } 1861 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1862 return false; 1863 } 1864 break; 1865 case Op_VectorMaskCmp: 1866 if (vlen < 2 || size_in_bits < 32) { 1867 return false; 1868 } 1869 break; 1870 case Op_CompressM: 1871 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1872 return false; 1873 } 1874 break; 1875 case Op_CompressV: 1876 case Op_ExpandV: 1877 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1878 return false; 1879 } 1880 if (size_in_bits < 128 ) { 1881 return false; 1882 } 1883 case Op_VectorLongToMask: 1884 if (UseAVX < 1) { 1885 return false; 1886 } 1887 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1888 return false; 1889 } 1890 break; 1891 case Op_SignumVD: 1892 case Op_SignumVF: 1893 if (UseAVX < 1) { 1894 return false; 1895 } 1896 break; 1897 case Op_PopCountVI: 1898 case Op_PopCountVL: { 1899 if (!is_pop_count_instr_target(bt) && 1900 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 1901 return false; 1902 } 1903 } 1904 break; 1905 case Op_ReverseV: 1906 case Op_ReverseBytesV: 1907 if (UseAVX < 2) { 1908 return false; 1909 } 1910 break; 1911 case Op_CountTrailingZerosV: 1912 case Op_CountLeadingZerosV: 1913 if (UseAVX < 2) { 1914 return false; 1915 } 1916 break; 1917 } 1918 return true; // Per default match rules are supported. 1919 } 1920 1921 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 1922 // ADLC based match_rule_supported routine checks for the existence of pattern based 1923 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 1924 // of their non-masked counterpart with mask edge being the differentiator. 1925 // This routine does a strict check on the existence of masked operation patterns 1926 // by returning a default false value for all the other opcodes apart from the 1927 // ones whose masked instruction patterns are defined in this file. 1928 if (!match_rule_supported_vector(opcode, vlen, bt)) { 1929 return false; 1930 } 1931 1932 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1933 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 1934 return false; 1935 } 1936 switch(opcode) { 1937 // Unary masked operations 1938 case Op_AbsVB: 1939 case Op_AbsVS: 1940 if(!VM_Version::supports_avx512bw()) { 1941 return false; // Implementation limitation 1942 } 1943 case Op_AbsVI: 1944 case Op_AbsVL: 1945 return true; 1946 1947 // Ternary masked operations 1948 case Op_FmaVF: 1949 case Op_FmaVD: 1950 return true; 1951 1952 case Op_MacroLogicV: 1953 if(bt != T_INT && bt != T_LONG) { 1954 return false; 1955 } 1956 return true; 1957 1958 // Binary masked operations 1959 case Op_AddVB: 1960 case Op_AddVS: 1961 case Op_SubVB: 1962 case Op_SubVS: 1963 case Op_MulVS: 1964 case Op_LShiftVS: 1965 case Op_RShiftVS: 1966 case Op_URShiftVS: 1967 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1968 if (!VM_Version::supports_avx512bw()) { 1969 return false; // Implementation limitation 1970 } 1971 return true; 1972 1973 case Op_MulVL: 1974 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1975 if (!VM_Version::supports_avx512dq()) { 1976 return false; // Implementation limitation 1977 } 1978 return true; 1979 1980 case Op_AndV: 1981 case Op_OrV: 1982 case Op_XorV: 1983 case Op_RotateRightV: 1984 case Op_RotateLeftV: 1985 if (bt != T_INT && bt != T_LONG) { 1986 return false; // Implementation limitation 1987 } 1988 return true; 1989 1990 case Op_VectorLoadMask: 1991 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1992 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 1993 return false; 1994 } 1995 return true; 1996 1997 case Op_AddVI: 1998 case Op_AddVL: 1999 case Op_AddVF: 2000 case Op_AddVD: 2001 case Op_SubVI: 2002 case Op_SubVL: 2003 case Op_SubVF: 2004 case Op_SubVD: 2005 case Op_MulVI: 2006 case Op_MulVF: 2007 case Op_MulVD: 2008 case Op_DivVF: 2009 case Op_DivVD: 2010 case Op_SqrtVF: 2011 case Op_SqrtVD: 2012 case Op_LShiftVI: 2013 case Op_LShiftVL: 2014 case Op_RShiftVI: 2015 case Op_RShiftVL: 2016 case Op_URShiftVI: 2017 case Op_URShiftVL: 2018 case Op_LoadVectorMasked: 2019 case Op_StoreVectorMasked: 2020 case Op_LoadVectorGatherMasked: 2021 case Op_StoreVectorScatterMasked: 2022 return true; 2023 2024 case Op_UMinV: 2025 case Op_UMaxV: 2026 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 2027 return false; 2028 } // fallthrough 2029 case Op_MaxV: 2030 case Op_MinV: 2031 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2032 return false; // Implementation limitation 2033 } 2034 if (is_floating_point_type(bt)) { 2035 return false; // Implementation limitation 2036 } 2037 return true; 2038 case Op_SaturatingAddV: 2039 case Op_SaturatingSubV: 2040 if (!is_subword_type(bt)) { 2041 return false; 2042 } 2043 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) { 2044 return false; // Implementation limitation 2045 } 2046 return true; 2047 2048 case Op_VectorMaskCmp: 2049 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2050 return false; // Implementation limitation 2051 } 2052 return true; 2053 2054 case Op_VectorRearrange: 2055 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2056 return false; // Implementation limitation 2057 } 2058 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2059 return false; // Implementation limitation 2060 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2061 return false; // Implementation limitation 2062 } 2063 return true; 2064 2065 // Binary Logical operations 2066 case Op_AndVMask: 2067 case Op_OrVMask: 2068 case Op_XorVMask: 2069 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2070 return false; // Implementation limitation 2071 } 2072 return true; 2073 2074 case Op_PopCountVI: 2075 case Op_PopCountVL: 2076 if (!is_pop_count_instr_target(bt)) { 2077 return false; 2078 } 2079 return true; 2080 2081 case Op_MaskAll: 2082 return true; 2083 2084 case Op_CountLeadingZerosV: 2085 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2086 return true; 2087 } 2088 default: 2089 return false; 2090 } 2091 } 2092 2093 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2094 return false; 2095 } 2096 2097 // Return true if Vector::rearrange needs preparation of the shuffle argument 2098 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) { 2099 switch (elem_bt) { 2100 case T_BYTE: return false; 2101 case T_SHORT: return !VM_Version::supports_avx512bw(); 2102 case T_INT: return !VM_Version::supports_avx(); 2103 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl(); 2104 default: 2105 ShouldNotReachHere(); 2106 return false; 2107 } 2108 } 2109 2110 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2111 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2112 bool legacy = (generic_opnd->opcode() == LEGVEC); 2113 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2114 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2115 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2116 return new legVecZOper(); 2117 } 2118 if (legacy) { 2119 switch (ideal_reg) { 2120 case Op_VecS: return new legVecSOper(); 2121 case Op_VecD: return new legVecDOper(); 2122 case Op_VecX: return new legVecXOper(); 2123 case Op_VecY: return new legVecYOper(); 2124 case Op_VecZ: return new legVecZOper(); 2125 } 2126 } else { 2127 switch (ideal_reg) { 2128 case Op_VecS: return new vecSOper(); 2129 case Op_VecD: return new vecDOper(); 2130 case Op_VecX: return new vecXOper(); 2131 case Op_VecY: return new vecYOper(); 2132 case Op_VecZ: return new vecZOper(); 2133 } 2134 } 2135 ShouldNotReachHere(); 2136 return nullptr; 2137 } 2138 2139 bool Matcher::is_reg2reg_move(MachNode* m) { 2140 switch (m->rule()) { 2141 case MoveVec2Leg_rule: 2142 case MoveLeg2Vec_rule: 2143 case MoveF2VL_rule: 2144 case MoveF2LEG_rule: 2145 case MoveVL2F_rule: 2146 case MoveLEG2F_rule: 2147 case MoveD2VL_rule: 2148 case MoveD2LEG_rule: 2149 case MoveVL2D_rule: 2150 case MoveLEG2D_rule: 2151 return true; 2152 default: 2153 return false; 2154 } 2155 } 2156 2157 bool Matcher::is_generic_vector(MachOper* opnd) { 2158 switch (opnd->opcode()) { 2159 case VEC: 2160 case LEGVEC: 2161 return true; 2162 default: 2163 return false; 2164 } 2165 } 2166 2167 //------------------------------------------------------------------------ 2168 2169 const RegMask* Matcher::predicate_reg_mask(void) { 2170 return &_VECTMASK_REG_mask; 2171 } 2172 2173 // Max vector size in bytes. 0 if not supported. 2174 int Matcher::vector_width_in_bytes(BasicType bt) { 2175 assert(is_java_primitive(bt), "only primitive type vectors"); 2176 if (UseSSE < 2) return 0; 2177 // SSE2 supports 128bit vectors for all types. 2178 // AVX2 supports 256bit vectors for all types. 2179 // AVX2/EVEX supports 512bit vectors for all types. 2180 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2181 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2182 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2183 size = (UseAVX > 2) ? 64 : 32; 2184 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2185 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2186 // Use flag to limit vector size. 2187 size = MIN2(size,(int)MaxVectorSize); 2188 // Minimum 2 values in vector (or 4 for bytes). 2189 switch (bt) { 2190 case T_DOUBLE: 2191 case T_LONG: 2192 if (size < 16) return 0; 2193 break; 2194 case T_FLOAT: 2195 case T_INT: 2196 if (size < 8) return 0; 2197 break; 2198 case T_BOOLEAN: 2199 if (size < 4) return 0; 2200 break; 2201 case T_CHAR: 2202 if (size < 4) return 0; 2203 break; 2204 case T_BYTE: 2205 if (size < 4) return 0; 2206 break; 2207 case T_SHORT: 2208 if (size < 4) return 0; 2209 break; 2210 default: 2211 ShouldNotReachHere(); 2212 } 2213 return size; 2214 } 2215 2216 // Limits on vector size (number of elements) loaded into vector. 2217 int Matcher::max_vector_size(const BasicType bt) { 2218 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2219 } 2220 int Matcher::min_vector_size(const BasicType bt) { 2221 int max_size = max_vector_size(bt); 2222 // Min size which can be loaded into vector is 4 bytes. 2223 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2224 // Support for calling svml double64 vectors 2225 if (bt == T_DOUBLE) { 2226 size = 1; 2227 } 2228 return MIN2(size,max_size); 2229 } 2230 2231 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) { 2232 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2233 // by default on Cascade Lake 2234 if (VM_Version::is_default_intel_cascade_lake()) { 2235 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2236 } 2237 return Matcher::max_vector_size(bt); 2238 } 2239 2240 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2241 return -1; 2242 } 2243 2244 // Vector ideal reg corresponding to specified size in bytes 2245 uint Matcher::vector_ideal_reg(int size) { 2246 assert(MaxVectorSize >= size, ""); 2247 switch(size) { 2248 case 4: return Op_VecS; 2249 case 8: return Op_VecD; 2250 case 16: return Op_VecX; 2251 case 32: return Op_VecY; 2252 case 64: return Op_VecZ; 2253 } 2254 ShouldNotReachHere(); 2255 return 0; 2256 } 2257 2258 // Check for shift by small constant as well 2259 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2260 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2261 shift->in(2)->get_int() <= 3 && 2262 // Are there other uses besides address expressions? 2263 !matcher->is_visited(shift)) { 2264 address_visited.set(shift->_idx); // Flag as address_visited 2265 mstack.push(shift->in(2), Matcher::Visit); 2266 Node *conv = shift->in(1); 2267 // Allow Matcher to match the rule which bypass 2268 // ConvI2L operation for an array index on LP64 2269 // if the index value is positive. 2270 if (conv->Opcode() == Op_ConvI2L && 2271 conv->as_Type()->type()->is_long()->_lo >= 0 && 2272 // Are there other uses besides address expressions? 2273 !matcher->is_visited(conv)) { 2274 address_visited.set(conv->_idx); // Flag as address_visited 2275 mstack.push(conv->in(1), Matcher::Pre_Visit); 2276 } else { 2277 mstack.push(conv, Matcher::Pre_Visit); 2278 } 2279 return true; 2280 } 2281 return false; 2282 } 2283 2284 // This function identifies sub-graphs in which a 'load' node is 2285 // input to two different nodes, and such that it can be matched 2286 // with BMI instructions like blsi, blsr, etc. 2287 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2288 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2289 // refers to the same node. 2290 // 2291 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2292 // This is a temporary solution until we make DAGs expressible in ADL. 2293 template<typename ConType> 2294 class FusedPatternMatcher { 2295 Node* _op1_node; 2296 Node* _mop_node; 2297 int _con_op; 2298 2299 static int match_next(Node* n, int next_op, int next_op_idx) { 2300 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2301 return -1; 2302 } 2303 2304 if (next_op_idx == -1) { // n is commutative, try rotations 2305 if (n->in(1)->Opcode() == next_op) { 2306 return 1; 2307 } else if (n->in(2)->Opcode() == next_op) { 2308 return 2; 2309 } 2310 } else { 2311 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2312 if (n->in(next_op_idx)->Opcode() == next_op) { 2313 return next_op_idx; 2314 } 2315 } 2316 return -1; 2317 } 2318 2319 public: 2320 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2321 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2322 2323 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2324 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2325 typename ConType::NativeType con_value) { 2326 if (_op1_node->Opcode() != op1) { 2327 return false; 2328 } 2329 if (_mop_node->outcnt() > 2) { 2330 return false; 2331 } 2332 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2333 if (op1_op2_idx == -1) { 2334 return false; 2335 } 2336 // Memory operation must be the other edge 2337 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2338 2339 // Check that the mop node is really what we want 2340 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2341 Node* op2_node = _op1_node->in(op1_op2_idx); 2342 if (op2_node->outcnt() > 1) { 2343 return false; 2344 } 2345 assert(op2_node->Opcode() == op2, "Should be"); 2346 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2347 if (op2_con_idx == -1) { 2348 return false; 2349 } 2350 // Memory operation must be the other edge 2351 int op2_mop_idx = (op2_con_idx & 1) + 1; 2352 // Check that the memory operation is the same node 2353 if (op2_node->in(op2_mop_idx) == _mop_node) { 2354 // Now check the constant 2355 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2356 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2357 return true; 2358 } 2359 } 2360 } 2361 return false; 2362 } 2363 }; 2364 2365 static bool is_bmi_pattern(Node* n, Node* m) { 2366 assert(UseBMI1Instructions, "sanity"); 2367 if (n != nullptr && m != nullptr) { 2368 if (m->Opcode() == Op_LoadI) { 2369 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2370 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2371 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2372 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2373 } else if (m->Opcode() == Op_LoadL) { 2374 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2375 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2376 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2377 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2378 } 2379 } 2380 return false; 2381 } 2382 2383 // Should the matcher clone input 'm' of node 'n'? 2384 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2385 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2386 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2387 mstack.push(m, Visit); 2388 return true; 2389 } 2390 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2391 mstack.push(m, Visit); // m = ShiftCntV 2392 return true; 2393 } 2394 if (is_encode_and_store_pattern(n, m)) { 2395 mstack.push(m, Visit); 2396 return true; 2397 } 2398 return false; 2399 } 2400 2401 // Should the Matcher clone shifts on addressing modes, expecting them 2402 // to be subsumed into complex addressing expressions or compute them 2403 // into registers? 2404 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2405 Node *off = m->in(AddPNode::Offset); 2406 if (off->is_Con()) { 2407 address_visited.test_set(m->_idx); // Flag as address_visited 2408 Node *adr = m->in(AddPNode::Address); 2409 2410 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset. 2411 // AtomicAdd is not an addressing expression. 2412 // Cheap to find it by looking for screwy base. 2413 if (adr->is_AddP() && 2414 !adr->in(AddPNode::Base)->is_top() && 2415 !adr->in(AddPNode::Offset)->is_Con() && 2416 off->get_long() == (int) (off->get_long()) && // immL32 2417 // Are there other uses besides address expressions? 2418 !is_visited(adr)) { 2419 address_visited.set(adr->_idx); // Flag as address_visited 2420 Node *shift = adr->in(AddPNode::Offset); 2421 if (!clone_shift(shift, this, mstack, address_visited)) { 2422 mstack.push(shift, Pre_Visit); 2423 } 2424 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2425 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2426 } else { 2427 mstack.push(adr, Pre_Visit); 2428 } 2429 2430 // Clone X+offset as it also folds into most addressing expressions 2431 mstack.push(off, Visit); 2432 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2433 return true; 2434 } else if (clone_shift(off, this, mstack, address_visited)) { 2435 address_visited.test_set(m->_idx); // Flag as address_visited 2436 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2437 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2438 return true; 2439 } 2440 return false; 2441 } 2442 2443 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2444 switch (bt) { 2445 case BoolTest::eq: 2446 return Assembler::eq; 2447 case BoolTest::ne: 2448 return Assembler::neq; 2449 case BoolTest::le: 2450 case BoolTest::ule: 2451 return Assembler::le; 2452 case BoolTest::ge: 2453 case BoolTest::uge: 2454 return Assembler::nlt; 2455 case BoolTest::lt: 2456 case BoolTest::ult: 2457 return Assembler::lt; 2458 case BoolTest::gt: 2459 case BoolTest::ugt: 2460 return Assembler::nle; 2461 default : ShouldNotReachHere(); return Assembler::_false; 2462 } 2463 } 2464 2465 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2466 switch (bt) { 2467 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2468 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2469 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2470 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2471 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2472 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2473 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2474 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2475 } 2476 } 2477 2478 // Helper methods for MachSpillCopyNode::implementation(). 2479 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 2480 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2481 assert(ireg == Op_VecS || // 32bit vector 2482 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2483 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi), 2484 "no non-adjacent vector moves" ); 2485 if (masm) { 2486 switch (ireg) { 2487 case Op_VecS: // copy whole register 2488 case Op_VecD: 2489 case Op_VecX: 2490 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2491 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2492 } else { 2493 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2494 } 2495 break; 2496 case Op_VecY: 2497 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2498 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2499 } else { 2500 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2501 } 2502 break; 2503 case Op_VecZ: 2504 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2505 break; 2506 default: 2507 ShouldNotReachHere(); 2508 } 2509 #ifndef PRODUCT 2510 } else { 2511 switch (ireg) { 2512 case Op_VecS: 2513 case Op_VecD: 2514 case Op_VecX: 2515 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2516 break; 2517 case Op_VecY: 2518 case Op_VecZ: 2519 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2520 break; 2521 default: 2522 ShouldNotReachHere(); 2523 } 2524 #endif 2525 } 2526 } 2527 2528 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 2529 int stack_offset, int reg, uint ireg, outputStream* st) { 2530 if (masm) { 2531 if (is_load) { 2532 switch (ireg) { 2533 case Op_VecS: 2534 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2535 break; 2536 case Op_VecD: 2537 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2538 break; 2539 case Op_VecX: 2540 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2541 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2542 } else { 2543 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2544 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2545 } 2546 break; 2547 case Op_VecY: 2548 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2549 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2550 } else { 2551 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2552 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2553 } 2554 break; 2555 case Op_VecZ: 2556 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2557 break; 2558 default: 2559 ShouldNotReachHere(); 2560 } 2561 } else { // store 2562 switch (ireg) { 2563 case Op_VecS: 2564 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2565 break; 2566 case Op_VecD: 2567 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2568 break; 2569 case Op_VecX: 2570 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2571 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2572 } 2573 else { 2574 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2575 } 2576 break; 2577 case Op_VecY: 2578 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2579 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2580 } 2581 else { 2582 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2583 } 2584 break; 2585 case Op_VecZ: 2586 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2587 break; 2588 default: 2589 ShouldNotReachHere(); 2590 } 2591 } 2592 #ifndef PRODUCT 2593 } else { 2594 if (is_load) { 2595 switch (ireg) { 2596 case Op_VecS: 2597 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2598 break; 2599 case Op_VecD: 2600 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2601 break; 2602 case Op_VecX: 2603 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2604 break; 2605 case Op_VecY: 2606 case Op_VecZ: 2607 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2608 break; 2609 default: 2610 ShouldNotReachHere(); 2611 } 2612 } else { // store 2613 switch (ireg) { 2614 case Op_VecS: 2615 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2616 break; 2617 case Op_VecD: 2618 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2619 break; 2620 case Op_VecX: 2621 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2622 break; 2623 case Op_VecY: 2624 case Op_VecZ: 2625 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2626 break; 2627 default: 2628 ShouldNotReachHere(); 2629 } 2630 } 2631 #endif 2632 } 2633 } 2634 2635 template <class T> 2636 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) { 2637 int size = type2aelembytes(bt) * len; 2638 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0); 2639 for (int i = 0; i < len; i++) { 2640 int offset = i * type2aelembytes(bt); 2641 switch (bt) { 2642 case T_BYTE: val->at(i) = con; break; 2643 case T_SHORT: { 2644 jshort c = con; 2645 memcpy(val->adr_at(offset), &c, sizeof(jshort)); 2646 break; 2647 } 2648 case T_INT: { 2649 jint c = con; 2650 memcpy(val->adr_at(offset), &c, sizeof(jint)); 2651 break; 2652 } 2653 case T_LONG: { 2654 jlong c = con; 2655 memcpy(val->adr_at(offset), &c, sizeof(jlong)); 2656 break; 2657 } 2658 case T_FLOAT: { 2659 jfloat c = con; 2660 memcpy(val->adr_at(offset), &c, sizeof(jfloat)); 2661 break; 2662 } 2663 case T_DOUBLE: { 2664 jdouble c = con; 2665 memcpy(val->adr_at(offset), &c, sizeof(jdouble)); 2666 break; 2667 } 2668 default: assert(false, "%s", type2name(bt)); 2669 } 2670 } 2671 return val; 2672 } 2673 2674 static inline jlong high_bit_set(BasicType bt) { 2675 switch (bt) { 2676 case T_BYTE: return 0x8080808080808080; 2677 case T_SHORT: return 0x8000800080008000; 2678 case T_INT: return 0x8000000080000000; 2679 case T_LONG: return 0x8000000000000000; 2680 default: 2681 ShouldNotReachHere(); 2682 return 0; 2683 } 2684 } 2685 2686 #ifndef PRODUCT 2687 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2688 st->print("nop \t# %d bytes pad for loops and calls", _count); 2689 } 2690 #endif 2691 2692 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const { 2693 __ nop(_count); 2694 } 2695 2696 uint MachNopNode::size(PhaseRegAlloc*) const { 2697 return _count; 2698 } 2699 2700 #ifndef PRODUCT 2701 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2702 st->print("# breakpoint"); 2703 } 2704 #endif 2705 2706 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const { 2707 __ int3(); 2708 } 2709 2710 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2711 return MachNode::size(ra_); 2712 } 2713 2714 %} 2715 2716 encode %{ 2717 2718 enc_class call_epilog %{ 2719 if (VerifyStackAtCalls) { 2720 // Check that stack depth is unchanged: find majik cookie on stack 2721 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2722 Label L; 2723 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2724 __ jccb(Assembler::equal, L); 2725 // Die if stack mismatch 2726 __ int3(); 2727 __ bind(L); 2728 } 2729 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) { 2730 // The last return value is not set by the callee but used to pass IsInit information to compiled code. 2731 // Search for the corresponding projection, get the register and emit code that initialized it. 2732 uint con = (tf()->range_cc()->cnt() - 1); 2733 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { 2734 ProjNode* proj = fast_out(i)->as_Proj(); 2735 if (proj->_con == con) { 2736 // Set IsInit if rax is non-null (a non-null value is returned buffered or scalarized) 2737 OptoReg::Name optoReg = ra_->get_reg_first(proj); 2738 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP)); 2739 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1; 2740 __ testq(rax, rax); 2741 __ setb(Assembler::notZero, toReg); 2742 __ movzbl(toReg, toReg); 2743 if (reg->is_stack()) { 2744 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size; 2745 __ movq(Address(rsp, st_off), toReg); 2746 } 2747 break; 2748 } 2749 } 2750 if (return_value_is_used()) { 2751 // An inline type is returned as fields in multiple registers. 2752 // Rax either contains an oop if the inline type is buffered or a pointer 2753 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax 2754 // if the lowest bit is set to allow C2 to use the oop after null checking. 2755 // rax &= (rax & 1) - 1 2756 __ movptr(rscratch1, rax); 2757 __ andptr(rscratch1, 0x1); 2758 __ subptr(rscratch1, 0x1); 2759 __ andptr(rax, rscratch1); 2760 } 2761 } 2762 %} 2763 2764 %} 2765 2766 // Operands for bound floating pointer register arguments 2767 operand rxmm0() %{ 2768 constraint(ALLOC_IN_RC(xmm0_reg)); 2769 match(VecX); 2770 format%{%} 2771 interface(REG_INTER); 2772 %} 2773 2774 //----------OPERANDS----------------------------------------------------------- 2775 // Operand definitions must precede instruction definitions for correct parsing 2776 // in the ADLC because operands constitute user defined types which are used in 2777 // instruction definitions. 2778 2779 // Vectors 2780 2781 // Dummy generic vector class. Should be used for all vector operands. 2782 // Replaced with vec[SDXYZ] during post-selection pass. 2783 operand vec() %{ 2784 constraint(ALLOC_IN_RC(dynamic)); 2785 match(VecX); 2786 match(VecY); 2787 match(VecZ); 2788 match(VecS); 2789 match(VecD); 2790 2791 format %{ %} 2792 interface(REG_INTER); 2793 %} 2794 2795 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2796 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2797 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2798 // runtime code generation via reg_class_dynamic. 2799 operand legVec() %{ 2800 constraint(ALLOC_IN_RC(dynamic)); 2801 match(VecX); 2802 match(VecY); 2803 match(VecZ); 2804 match(VecS); 2805 match(VecD); 2806 2807 format %{ %} 2808 interface(REG_INTER); 2809 %} 2810 2811 // Replaces vec during post-selection cleanup. See above. 2812 operand vecS() %{ 2813 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2814 match(VecS); 2815 2816 format %{ %} 2817 interface(REG_INTER); 2818 %} 2819 2820 // Replaces legVec during post-selection cleanup. See above. 2821 operand legVecS() %{ 2822 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2823 match(VecS); 2824 2825 format %{ %} 2826 interface(REG_INTER); 2827 %} 2828 2829 // Replaces vec during post-selection cleanup. See above. 2830 operand vecD() %{ 2831 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2832 match(VecD); 2833 2834 format %{ %} 2835 interface(REG_INTER); 2836 %} 2837 2838 // Replaces legVec during post-selection cleanup. See above. 2839 operand legVecD() %{ 2840 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2841 match(VecD); 2842 2843 format %{ %} 2844 interface(REG_INTER); 2845 %} 2846 2847 // Replaces vec during post-selection cleanup. See above. 2848 operand vecX() %{ 2849 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2850 match(VecX); 2851 2852 format %{ %} 2853 interface(REG_INTER); 2854 %} 2855 2856 // Replaces legVec during post-selection cleanup. See above. 2857 operand legVecX() %{ 2858 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2859 match(VecX); 2860 2861 format %{ %} 2862 interface(REG_INTER); 2863 %} 2864 2865 // Replaces vec during post-selection cleanup. See above. 2866 operand vecY() %{ 2867 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2868 match(VecY); 2869 2870 format %{ %} 2871 interface(REG_INTER); 2872 %} 2873 2874 // Replaces legVec during post-selection cleanup. See above. 2875 operand legVecY() %{ 2876 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2877 match(VecY); 2878 2879 format %{ %} 2880 interface(REG_INTER); 2881 %} 2882 2883 // Replaces vec during post-selection cleanup. See above. 2884 operand vecZ() %{ 2885 constraint(ALLOC_IN_RC(vectorz_reg)); 2886 match(VecZ); 2887 2888 format %{ %} 2889 interface(REG_INTER); 2890 %} 2891 2892 // Replaces legVec during post-selection cleanup. See above. 2893 operand legVecZ() %{ 2894 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2895 match(VecZ); 2896 2897 format %{ %} 2898 interface(REG_INTER); 2899 %} 2900 2901 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2902 2903 // ============================================================================ 2904 2905 instruct ShouldNotReachHere() %{ 2906 match(Halt); 2907 format %{ "stop\t# ShouldNotReachHere" %} 2908 ins_encode %{ 2909 if (is_reachable()) { 2910 const char* str = __ code_string(_halt_reason); 2911 __ stop(str); 2912 } 2913 %} 2914 ins_pipe(pipe_slow); 2915 %} 2916 2917 // ============================================================================ 2918 2919 instruct addF_reg(regF dst, regF src) %{ 2920 predicate((UseSSE>=1) && (UseAVX == 0)); 2921 match(Set dst (AddF dst src)); 2922 2923 format %{ "addss $dst, $src" %} 2924 ins_cost(150); 2925 ins_encode %{ 2926 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2927 %} 2928 ins_pipe(pipe_slow); 2929 %} 2930 2931 instruct addF_mem(regF dst, memory src) %{ 2932 predicate((UseSSE>=1) && (UseAVX == 0)); 2933 match(Set dst (AddF dst (LoadF src))); 2934 2935 format %{ "addss $dst, $src" %} 2936 ins_cost(150); 2937 ins_encode %{ 2938 __ addss($dst$$XMMRegister, $src$$Address); 2939 %} 2940 ins_pipe(pipe_slow); 2941 %} 2942 2943 instruct addF_imm(regF dst, immF con) %{ 2944 predicate((UseSSE>=1) && (UseAVX == 0)); 2945 match(Set dst (AddF dst con)); 2946 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2947 ins_cost(150); 2948 ins_encode %{ 2949 __ addss($dst$$XMMRegister, $constantaddress($con)); 2950 %} 2951 ins_pipe(pipe_slow); 2952 %} 2953 2954 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2955 predicate(UseAVX > 0); 2956 match(Set dst (AddF src1 src2)); 2957 2958 format %{ "vaddss $dst, $src1, $src2" %} 2959 ins_cost(150); 2960 ins_encode %{ 2961 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2962 %} 2963 ins_pipe(pipe_slow); 2964 %} 2965 2966 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2967 predicate(UseAVX > 0); 2968 match(Set dst (AddF src1 (LoadF src2))); 2969 2970 format %{ "vaddss $dst, $src1, $src2" %} 2971 ins_cost(150); 2972 ins_encode %{ 2973 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2974 %} 2975 ins_pipe(pipe_slow); 2976 %} 2977 2978 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2979 predicate(UseAVX > 0); 2980 match(Set dst (AddF src con)); 2981 2982 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2983 ins_cost(150); 2984 ins_encode %{ 2985 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2986 %} 2987 ins_pipe(pipe_slow); 2988 %} 2989 2990 instruct addD_reg(regD dst, regD src) %{ 2991 predicate((UseSSE>=2) && (UseAVX == 0)); 2992 match(Set dst (AddD dst src)); 2993 2994 format %{ "addsd $dst, $src" %} 2995 ins_cost(150); 2996 ins_encode %{ 2997 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2998 %} 2999 ins_pipe(pipe_slow); 3000 %} 3001 3002 instruct addD_mem(regD dst, memory src) %{ 3003 predicate((UseSSE>=2) && (UseAVX == 0)); 3004 match(Set dst (AddD dst (LoadD src))); 3005 3006 format %{ "addsd $dst, $src" %} 3007 ins_cost(150); 3008 ins_encode %{ 3009 __ addsd($dst$$XMMRegister, $src$$Address); 3010 %} 3011 ins_pipe(pipe_slow); 3012 %} 3013 3014 instruct addD_imm(regD dst, immD con) %{ 3015 predicate((UseSSE>=2) && (UseAVX == 0)); 3016 match(Set dst (AddD dst con)); 3017 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3018 ins_cost(150); 3019 ins_encode %{ 3020 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3021 %} 3022 ins_pipe(pipe_slow); 3023 %} 3024 3025 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3026 predicate(UseAVX > 0); 3027 match(Set dst (AddD src1 src2)); 3028 3029 format %{ "vaddsd $dst, $src1, $src2" %} 3030 ins_cost(150); 3031 ins_encode %{ 3032 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3033 %} 3034 ins_pipe(pipe_slow); 3035 %} 3036 3037 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3038 predicate(UseAVX > 0); 3039 match(Set dst (AddD src1 (LoadD src2))); 3040 3041 format %{ "vaddsd $dst, $src1, $src2" %} 3042 ins_cost(150); 3043 ins_encode %{ 3044 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3045 %} 3046 ins_pipe(pipe_slow); 3047 %} 3048 3049 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3050 predicate(UseAVX > 0); 3051 match(Set dst (AddD src con)); 3052 3053 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3054 ins_cost(150); 3055 ins_encode %{ 3056 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3057 %} 3058 ins_pipe(pipe_slow); 3059 %} 3060 3061 instruct subF_reg(regF dst, regF src) %{ 3062 predicate((UseSSE>=1) && (UseAVX == 0)); 3063 match(Set dst (SubF dst src)); 3064 3065 format %{ "subss $dst, $src" %} 3066 ins_cost(150); 3067 ins_encode %{ 3068 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3069 %} 3070 ins_pipe(pipe_slow); 3071 %} 3072 3073 instruct subF_mem(regF dst, memory src) %{ 3074 predicate((UseSSE>=1) && (UseAVX == 0)); 3075 match(Set dst (SubF dst (LoadF src))); 3076 3077 format %{ "subss $dst, $src" %} 3078 ins_cost(150); 3079 ins_encode %{ 3080 __ subss($dst$$XMMRegister, $src$$Address); 3081 %} 3082 ins_pipe(pipe_slow); 3083 %} 3084 3085 instruct subF_imm(regF dst, immF con) %{ 3086 predicate((UseSSE>=1) && (UseAVX == 0)); 3087 match(Set dst (SubF dst con)); 3088 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3089 ins_cost(150); 3090 ins_encode %{ 3091 __ subss($dst$$XMMRegister, $constantaddress($con)); 3092 %} 3093 ins_pipe(pipe_slow); 3094 %} 3095 3096 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3097 predicate(UseAVX > 0); 3098 match(Set dst (SubF src1 src2)); 3099 3100 format %{ "vsubss $dst, $src1, $src2" %} 3101 ins_cost(150); 3102 ins_encode %{ 3103 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3104 %} 3105 ins_pipe(pipe_slow); 3106 %} 3107 3108 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3109 predicate(UseAVX > 0); 3110 match(Set dst (SubF src1 (LoadF src2))); 3111 3112 format %{ "vsubss $dst, $src1, $src2" %} 3113 ins_cost(150); 3114 ins_encode %{ 3115 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3116 %} 3117 ins_pipe(pipe_slow); 3118 %} 3119 3120 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3121 predicate(UseAVX > 0); 3122 match(Set dst (SubF src con)); 3123 3124 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3125 ins_cost(150); 3126 ins_encode %{ 3127 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3128 %} 3129 ins_pipe(pipe_slow); 3130 %} 3131 3132 instruct subD_reg(regD dst, regD src) %{ 3133 predicate((UseSSE>=2) && (UseAVX == 0)); 3134 match(Set dst (SubD dst src)); 3135 3136 format %{ "subsd $dst, $src" %} 3137 ins_cost(150); 3138 ins_encode %{ 3139 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3140 %} 3141 ins_pipe(pipe_slow); 3142 %} 3143 3144 instruct subD_mem(regD dst, memory src) %{ 3145 predicate((UseSSE>=2) && (UseAVX == 0)); 3146 match(Set dst (SubD dst (LoadD src))); 3147 3148 format %{ "subsd $dst, $src" %} 3149 ins_cost(150); 3150 ins_encode %{ 3151 __ subsd($dst$$XMMRegister, $src$$Address); 3152 %} 3153 ins_pipe(pipe_slow); 3154 %} 3155 3156 instruct subD_imm(regD dst, immD con) %{ 3157 predicate((UseSSE>=2) && (UseAVX == 0)); 3158 match(Set dst (SubD dst con)); 3159 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3160 ins_cost(150); 3161 ins_encode %{ 3162 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3163 %} 3164 ins_pipe(pipe_slow); 3165 %} 3166 3167 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3168 predicate(UseAVX > 0); 3169 match(Set dst (SubD src1 src2)); 3170 3171 format %{ "vsubsd $dst, $src1, $src2" %} 3172 ins_cost(150); 3173 ins_encode %{ 3174 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3175 %} 3176 ins_pipe(pipe_slow); 3177 %} 3178 3179 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3180 predicate(UseAVX > 0); 3181 match(Set dst (SubD src1 (LoadD src2))); 3182 3183 format %{ "vsubsd $dst, $src1, $src2" %} 3184 ins_cost(150); 3185 ins_encode %{ 3186 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3187 %} 3188 ins_pipe(pipe_slow); 3189 %} 3190 3191 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3192 predicate(UseAVX > 0); 3193 match(Set dst (SubD src con)); 3194 3195 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3196 ins_cost(150); 3197 ins_encode %{ 3198 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3199 %} 3200 ins_pipe(pipe_slow); 3201 %} 3202 3203 instruct mulF_reg(regF dst, regF src) %{ 3204 predicate((UseSSE>=1) && (UseAVX == 0)); 3205 match(Set dst (MulF dst src)); 3206 3207 format %{ "mulss $dst, $src" %} 3208 ins_cost(150); 3209 ins_encode %{ 3210 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3211 %} 3212 ins_pipe(pipe_slow); 3213 %} 3214 3215 instruct mulF_mem(regF dst, memory src) %{ 3216 predicate((UseSSE>=1) && (UseAVX == 0)); 3217 match(Set dst (MulF dst (LoadF src))); 3218 3219 format %{ "mulss $dst, $src" %} 3220 ins_cost(150); 3221 ins_encode %{ 3222 __ mulss($dst$$XMMRegister, $src$$Address); 3223 %} 3224 ins_pipe(pipe_slow); 3225 %} 3226 3227 instruct mulF_imm(regF dst, immF con) %{ 3228 predicate((UseSSE>=1) && (UseAVX == 0)); 3229 match(Set dst (MulF dst con)); 3230 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3231 ins_cost(150); 3232 ins_encode %{ 3233 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3234 %} 3235 ins_pipe(pipe_slow); 3236 %} 3237 3238 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3239 predicate(UseAVX > 0); 3240 match(Set dst (MulF src1 src2)); 3241 3242 format %{ "vmulss $dst, $src1, $src2" %} 3243 ins_cost(150); 3244 ins_encode %{ 3245 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3246 %} 3247 ins_pipe(pipe_slow); 3248 %} 3249 3250 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3251 predicate(UseAVX > 0); 3252 match(Set dst (MulF src1 (LoadF src2))); 3253 3254 format %{ "vmulss $dst, $src1, $src2" %} 3255 ins_cost(150); 3256 ins_encode %{ 3257 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3258 %} 3259 ins_pipe(pipe_slow); 3260 %} 3261 3262 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3263 predicate(UseAVX > 0); 3264 match(Set dst (MulF src con)); 3265 3266 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3267 ins_cost(150); 3268 ins_encode %{ 3269 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3270 %} 3271 ins_pipe(pipe_slow); 3272 %} 3273 3274 instruct mulD_reg(regD dst, regD src) %{ 3275 predicate((UseSSE>=2) && (UseAVX == 0)); 3276 match(Set dst (MulD dst src)); 3277 3278 format %{ "mulsd $dst, $src" %} 3279 ins_cost(150); 3280 ins_encode %{ 3281 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3282 %} 3283 ins_pipe(pipe_slow); 3284 %} 3285 3286 instruct mulD_mem(regD dst, memory src) %{ 3287 predicate((UseSSE>=2) && (UseAVX == 0)); 3288 match(Set dst (MulD dst (LoadD src))); 3289 3290 format %{ "mulsd $dst, $src" %} 3291 ins_cost(150); 3292 ins_encode %{ 3293 __ mulsd($dst$$XMMRegister, $src$$Address); 3294 %} 3295 ins_pipe(pipe_slow); 3296 %} 3297 3298 instruct mulD_imm(regD dst, immD con) %{ 3299 predicate((UseSSE>=2) && (UseAVX == 0)); 3300 match(Set dst (MulD dst con)); 3301 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3302 ins_cost(150); 3303 ins_encode %{ 3304 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3305 %} 3306 ins_pipe(pipe_slow); 3307 %} 3308 3309 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3310 predicate(UseAVX > 0); 3311 match(Set dst (MulD src1 src2)); 3312 3313 format %{ "vmulsd $dst, $src1, $src2" %} 3314 ins_cost(150); 3315 ins_encode %{ 3316 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3317 %} 3318 ins_pipe(pipe_slow); 3319 %} 3320 3321 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3322 predicate(UseAVX > 0); 3323 match(Set dst (MulD src1 (LoadD src2))); 3324 3325 format %{ "vmulsd $dst, $src1, $src2" %} 3326 ins_cost(150); 3327 ins_encode %{ 3328 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3329 %} 3330 ins_pipe(pipe_slow); 3331 %} 3332 3333 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3334 predicate(UseAVX > 0); 3335 match(Set dst (MulD src con)); 3336 3337 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3338 ins_cost(150); 3339 ins_encode %{ 3340 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3341 %} 3342 ins_pipe(pipe_slow); 3343 %} 3344 3345 instruct divF_reg(regF dst, regF src) %{ 3346 predicate((UseSSE>=1) && (UseAVX == 0)); 3347 match(Set dst (DivF dst src)); 3348 3349 format %{ "divss $dst, $src" %} 3350 ins_cost(150); 3351 ins_encode %{ 3352 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3353 %} 3354 ins_pipe(pipe_slow); 3355 %} 3356 3357 instruct divF_mem(regF dst, memory src) %{ 3358 predicate((UseSSE>=1) && (UseAVX == 0)); 3359 match(Set dst (DivF dst (LoadF src))); 3360 3361 format %{ "divss $dst, $src" %} 3362 ins_cost(150); 3363 ins_encode %{ 3364 __ divss($dst$$XMMRegister, $src$$Address); 3365 %} 3366 ins_pipe(pipe_slow); 3367 %} 3368 3369 instruct divF_imm(regF dst, immF con) %{ 3370 predicate((UseSSE>=1) && (UseAVX == 0)); 3371 match(Set dst (DivF dst con)); 3372 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3373 ins_cost(150); 3374 ins_encode %{ 3375 __ divss($dst$$XMMRegister, $constantaddress($con)); 3376 %} 3377 ins_pipe(pipe_slow); 3378 %} 3379 3380 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3381 predicate(UseAVX > 0); 3382 match(Set dst (DivF src1 src2)); 3383 3384 format %{ "vdivss $dst, $src1, $src2" %} 3385 ins_cost(150); 3386 ins_encode %{ 3387 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3388 %} 3389 ins_pipe(pipe_slow); 3390 %} 3391 3392 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3393 predicate(UseAVX > 0); 3394 match(Set dst (DivF src1 (LoadF src2))); 3395 3396 format %{ "vdivss $dst, $src1, $src2" %} 3397 ins_cost(150); 3398 ins_encode %{ 3399 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3400 %} 3401 ins_pipe(pipe_slow); 3402 %} 3403 3404 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3405 predicate(UseAVX > 0); 3406 match(Set dst (DivF src con)); 3407 3408 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3409 ins_cost(150); 3410 ins_encode %{ 3411 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3412 %} 3413 ins_pipe(pipe_slow); 3414 %} 3415 3416 instruct divD_reg(regD dst, regD src) %{ 3417 predicate((UseSSE>=2) && (UseAVX == 0)); 3418 match(Set dst (DivD dst src)); 3419 3420 format %{ "divsd $dst, $src" %} 3421 ins_cost(150); 3422 ins_encode %{ 3423 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3424 %} 3425 ins_pipe(pipe_slow); 3426 %} 3427 3428 instruct divD_mem(regD dst, memory src) %{ 3429 predicate((UseSSE>=2) && (UseAVX == 0)); 3430 match(Set dst (DivD dst (LoadD src))); 3431 3432 format %{ "divsd $dst, $src" %} 3433 ins_cost(150); 3434 ins_encode %{ 3435 __ divsd($dst$$XMMRegister, $src$$Address); 3436 %} 3437 ins_pipe(pipe_slow); 3438 %} 3439 3440 instruct divD_imm(regD dst, immD con) %{ 3441 predicate((UseSSE>=2) && (UseAVX == 0)); 3442 match(Set dst (DivD dst con)); 3443 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3444 ins_cost(150); 3445 ins_encode %{ 3446 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3447 %} 3448 ins_pipe(pipe_slow); 3449 %} 3450 3451 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3452 predicate(UseAVX > 0); 3453 match(Set dst (DivD src1 src2)); 3454 3455 format %{ "vdivsd $dst, $src1, $src2" %} 3456 ins_cost(150); 3457 ins_encode %{ 3458 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3459 %} 3460 ins_pipe(pipe_slow); 3461 %} 3462 3463 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3464 predicate(UseAVX > 0); 3465 match(Set dst (DivD src1 (LoadD src2))); 3466 3467 format %{ "vdivsd $dst, $src1, $src2" %} 3468 ins_cost(150); 3469 ins_encode %{ 3470 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3471 %} 3472 ins_pipe(pipe_slow); 3473 %} 3474 3475 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3476 predicate(UseAVX > 0); 3477 match(Set dst (DivD src con)); 3478 3479 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3480 ins_cost(150); 3481 ins_encode %{ 3482 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3483 %} 3484 ins_pipe(pipe_slow); 3485 %} 3486 3487 instruct absF_reg(regF dst) %{ 3488 predicate((UseSSE>=1) && (UseAVX == 0)); 3489 match(Set dst (AbsF dst)); 3490 ins_cost(150); 3491 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3492 ins_encode %{ 3493 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3494 %} 3495 ins_pipe(pipe_slow); 3496 %} 3497 3498 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3499 predicate(UseAVX > 0); 3500 match(Set dst (AbsF src)); 3501 ins_cost(150); 3502 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3503 ins_encode %{ 3504 int vlen_enc = Assembler::AVX_128bit; 3505 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3506 ExternalAddress(float_signmask()), vlen_enc); 3507 %} 3508 ins_pipe(pipe_slow); 3509 %} 3510 3511 instruct absD_reg(regD dst) %{ 3512 predicate((UseSSE>=2) && (UseAVX == 0)); 3513 match(Set dst (AbsD dst)); 3514 ins_cost(150); 3515 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3516 "# abs double by sign masking" %} 3517 ins_encode %{ 3518 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3519 %} 3520 ins_pipe(pipe_slow); 3521 %} 3522 3523 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3524 predicate(UseAVX > 0); 3525 match(Set dst (AbsD src)); 3526 ins_cost(150); 3527 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3528 "# abs double by sign masking" %} 3529 ins_encode %{ 3530 int vlen_enc = Assembler::AVX_128bit; 3531 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3532 ExternalAddress(double_signmask()), vlen_enc); 3533 %} 3534 ins_pipe(pipe_slow); 3535 %} 3536 3537 instruct negF_reg(regF dst) %{ 3538 predicate((UseSSE>=1) && (UseAVX == 0)); 3539 match(Set dst (NegF dst)); 3540 ins_cost(150); 3541 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3542 ins_encode %{ 3543 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3544 %} 3545 ins_pipe(pipe_slow); 3546 %} 3547 3548 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3549 predicate(UseAVX > 0); 3550 match(Set dst (NegF src)); 3551 ins_cost(150); 3552 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3553 ins_encode %{ 3554 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3555 ExternalAddress(float_signflip())); 3556 %} 3557 ins_pipe(pipe_slow); 3558 %} 3559 3560 instruct negD_reg(regD dst) %{ 3561 predicate((UseSSE>=2) && (UseAVX == 0)); 3562 match(Set dst (NegD dst)); 3563 ins_cost(150); 3564 format %{ "xorpd $dst, [0x8000000000000000]\t" 3565 "# neg double by sign flipping" %} 3566 ins_encode %{ 3567 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3568 %} 3569 ins_pipe(pipe_slow); 3570 %} 3571 3572 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3573 predicate(UseAVX > 0); 3574 match(Set dst (NegD src)); 3575 ins_cost(150); 3576 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3577 "# neg double by sign flipping" %} 3578 ins_encode %{ 3579 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3580 ExternalAddress(double_signflip())); 3581 %} 3582 ins_pipe(pipe_slow); 3583 %} 3584 3585 // sqrtss instruction needs destination register to be pre initialized for best performance 3586 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3587 instruct sqrtF_reg(regF dst) %{ 3588 predicate(UseSSE>=1); 3589 match(Set dst (SqrtF dst)); 3590 format %{ "sqrtss $dst, $dst" %} 3591 ins_encode %{ 3592 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3593 %} 3594 ins_pipe(pipe_slow); 3595 %} 3596 3597 // sqrtsd instruction needs destination register to be pre initialized for best performance 3598 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3599 instruct sqrtD_reg(regD dst) %{ 3600 predicate(UseSSE>=2); 3601 match(Set dst (SqrtD dst)); 3602 format %{ "sqrtsd $dst, $dst" %} 3603 ins_encode %{ 3604 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3605 %} 3606 ins_pipe(pipe_slow); 3607 %} 3608 3609 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3610 effect(TEMP tmp); 3611 match(Set dst (ConvF2HF src)); 3612 ins_cost(125); 3613 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3614 ins_encode %{ 3615 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3616 %} 3617 ins_pipe( pipe_slow ); 3618 %} 3619 3620 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3621 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3622 effect(TEMP ktmp, TEMP rtmp); 3623 match(Set mem (StoreC mem (ConvF2HF src))); 3624 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3625 ins_encode %{ 3626 __ movl($rtmp$$Register, 0x1); 3627 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3628 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3629 %} 3630 ins_pipe( pipe_slow ); 3631 %} 3632 3633 instruct vconvF2HF(vec dst, vec src) %{ 3634 match(Set dst (VectorCastF2HF src)); 3635 format %{ "vector_conv_F2HF $dst $src" %} 3636 ins_encode %{ 3637 int vlen_enc = vector_length_encoding(this, $src); 3638 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3639 %} 3640 ins_pipe( pipe_slow ); 3641 %} 3642 3643 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3644 predicate(n->as_StoreVector()->memory_size() >= 16); 3645 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3646 format %{ "vcvtps2ph $mem,$src" %} 3647 ins_encode %{ 3648 int vlen_enc = vector_length_encoding(this, $src); 3649 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3650 %} 3651 ins_pipe( pipe_slow ); 3652 %} 3653 3654 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3655 match(Set dst (ConvHF2F src)); 3656 format %{ "vcvtph2ps $dst,$src" %} 3657 ins_encode %{ 3658 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3659 %} 3660 ins_pipe( pipe_slow ); 3661 %} 3662 3663 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3664 match(Set dst (VectorCastHF2F (LoadVector mem))); 3665 format %{ "vcvtph2ps $dst,$mem" %} 3666 ins_encode %{ 3667 int vlen_enc = vector_length_encoding(this); 3668 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3669 %} 3670 ins_pipe( pipe_slow ); 3671 %} 3672 3673 instruct vconvHF2F(vec dst, vec src) %{ 3674 match(Set dst (VectorCastHF2F src)); 3675 ins_cost(125); 3676 format %{ "vector_conv_HF2F $dst,$src" %} 3677 ins_encode %{ 3678 int vlen_enc = vector_length_encoding(this); 3679 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3680 %} 3681 ins_pipe( pipe_slow ); 3682 %} 3683 3684 // ---------------------------------------- VectorReinterpret ------------------------------------ 3685 instruct reinterpret_mask(kReg dst) %{ 3686 predicate(n->bottom_type()->isa_vectmask() && 3687 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3688 match(Set dst (VectorReinterpret dst)); 3689 ins_cost(125); 3690 format %{ "vector_reinterpret $dst\t!" %} 3691 ins_encode %{ 3692 // empty 3693 %} 3694 ins_pipe( pipe_slow ); 3695 %} 3696 3697 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3698 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3699 n->bottom_type()->isa_vectmask() && 3700 n->in(1)->bottom_type()->isa_vectmask() && 3701 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3702 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3703 match(Set dst (VectorReinterpret src)); 3704 effect(TEMP xtmp); 3705 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3706 ins_encode %{ 3707 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3708 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3709 assert(src_sz == dst_sz , "src and dst size mismatch"); 3710 int vlen_enc = vector_length_encoding(src_sz); 3711 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3712 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3713 %} 3714 ins_pipe( pipe_slow ); 3715 %} 3716 3717 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3718 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3719 n->bottom_type()->isa_vectmask() && 3720 n->in(1)->bottom_type()->isa_vectmask() && 3721 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3722 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3723 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3724 match(Set dst (VectorReinterpret src)); 3725 effect(TEMP xtmp); 3726 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3727 ins_encode %{ 3728 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3729 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3730 assert(src_sz == dst_sz , "src and dst size mismatch"); 3731 int vlen_enc = vector_length_encoding(src_sz); 3732 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3733 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3734 %} 3735 ins_pipe( pipe_slow ); 3736 %} 3737 3738 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3739 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3740 n->bottom_type()->isa_vectmask() && 3741 n->in(1)->bottom_type()->isa_vectmask() && 3742 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3743 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3744 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3745 match(Set dst (VectorReinterpret src)); 3746 effect(TEMP xtmp); 3747 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3748 ins_encode %{ 3749 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3750 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3751 assert(src_sz == dst_sz , "src and dst size mismatch"); 3752 int vlen_enc = vector_length_encoding(src_sz); 3753 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3754 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3755 %} 3756 ins_pipe( pipe_slow ); 3757 %} 3758 3759 instruct reinterpret(vec dst) %{ 3760 predicate(!n->bottom_type()->isa_vectmask() && 3761 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3762 match(Set dst (VectorReinterpret dst)); 3763 ins_cost(125); 3764 format %{ "vector_reinterpret $dst\t!" %} 3765 ins_encode %{ 3766 // empty 3767 %} 3768 ins_pipe( pipe_slow ); 3769 %} 3770 3771 instruct reinterpret_expand(vec dst, vec src) %{ 3772 predicate(UseAVX == 0 && 3773 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3774 match(Set dst (VectorReinterpret src)); 3775 ins_cost(125); 3776 effect(TEMP dst); 3777 format %{ "vector_reinterpret_expand $dst,$src" %} 3778 ins_encode %{ 3779 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3780 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3781 3782 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3783 if (src_vlen_in_bytes == 4) { 3784 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3785 } else { 3786 assert(src_vlen_in_bytes == 8, ""); 3787 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3788 } 3789 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3790 %} 3791 ins_pipe( pipe_slow ); 3792 %} 3793 3794 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3795 predicate(UseAVX > 0 && 3796 !n->bottom_type()->isa_vectmask() && 3797 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3798 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3799 match(Set dst (VectorReinterpret src)); 3800 ins_cost(125); 3801 format %{ "vector_reinterpret_expand $dst,$src" %} 3802 ins_encode %{ 3803 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3804 %} 3805 ins_pipe( pipe_slow ); 3806 %} 3807 3808 3809 instruct vreinterpret_expand(legVec dst, vec src) %{ 3810 predicate(UseAVX > 0 && 3811 !n->bottom_type()->isa_vectmask() && 3812 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3813 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3814 match(Set dst (VectorReinterpret src)); 3815 ins_cost(125); 3816 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3817 ins_encode %{ 3818 switch (Matcher::vector_length_in_bytes(this, $src)) { 3819 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3820 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3821 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3822 default: ShouldNotReachHere(); 3823 } 3824 %} 3825 ins_pipe( pipe_slow ); 3826 %} 3827 3828 instruct reinterpret_shrink(vec dst, legVec src) %{ 3829 predicate(!n->bottom_type()->isa_vectmask() && 3830 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3831 match(Set dst (VectorReinterpret src)); 3832 ins_cost(125); 3833 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3834 ins_encode %{ 3835 switch (Matcher::vector_length_in_bytes(this)) { 3836 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3837 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3838 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3839 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3840 default: ShouldNotReachHere(); 3841 } 3842 %} 3843 ins_pipe( pipe_slow ); 3844 %} 3845 3846 // ---------------------------------------------------------------------------------------------------- 3847 3848 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3849 match(Set dst (RoundDoubleMode src rmode)); 3850 format %{ "roundsd $dst,$src" %} 3851 ins_cost(150); 3852 ins_encode %{ 3853 assert(UseSSE >= 4, "required"); 3854 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) { 3855 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3856 } 3857 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3858 %} 3859 ins_pipe(pipe_slow); 3860 %} 3861 3862 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3863 match(Set dst (RoundDoubleMode con rmode)); 3864 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3865 ins_cost(150); 3866 ins_encode %{ 3867 assert(UseSSE >= 4, "required"); 3868 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3869 %} 3870 ins_pipe(pipe_slow); 3871 %} 3872 3873 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3874 predicate(Matcher::vector_length(n) < 8); 3875 match(Set dst (RoundDoubleModeV src rmode)); 3876 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3877 ins_encode %{ 3878 assert(UseAVX > 0, "required"); 3879 int vlen_enc = vector_length_encoding(this); 3880 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3881 %} 3882 ins_pipe( pipe_slow ); 3883 %} 3884 3885 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3886 predicate(Matcher::vector_length(n) == 8); 3887 match(Set dst (RoundDoubleModeV src rmode)); 3888 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3889 ins_encode %{ 3890 assert(UseAVX > 2, "required"); 3891 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3892 %} 3893 ins_pipe( pipe_slow ); 3894 %} 3895 3896 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3897 predicate(Matcher::vector_length(n) < 8); 3898 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3899 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3900 ins_encode %{ 3901 assert(UseAVX > 0, "required"); 3902 int vlen_enc = vector_length_encoding(this); 3903 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3904 %} 3905 ins_pipe( pipe_slow ); 3906 %} 3907 3908 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3909 predicate(Matcher::vector_length(n) == 8); 3910 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3911 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3912 ins_encode %{ 3913 assert(UseAVX > 2, "required"); 3914 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3915 %} 3916 ins_pipe( pipe_slow ); 3917 %} 3918 3919 instruct onspinwait() %{ 3920 match(OnSpinWait); 3921 ins_cost(200); 3922 3923 format %{ 3924 $$template 3925 $$emit$$"pause\t! membar_onspinwait" 3926 %} 3927 ins_encode %{ 3928 __ pause(); 3929 %} 3930 ins_pipe(pipe_slow); 3931 %} 3932 3933 // a * b + c 3934 instruct fmaD_reg(regD a, regD b, regD c) %{ 3935 match(Set c (FmaD c (Binary a b))); 3936 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3937 ins_cost(150); 3938 ins_encode %{ 3939 assert(UseFMA, "Needs FMA instructions support."); 3940 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3941 %} 3942 ins_pipe( pipe_slow ); 3943 %} 3944 3945 // a * b + c 3946 instruct fmaF_reg(regF a, regF b, regF c) %{ 3947 match(Set c (FmaF c (Binary a b))); 3948 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3949 ins_cost(150); 3950 ins_encode %{ 3951 assert(UseFMA, "Needs FMA instructions support."); 3952 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3953 %} 3954 ins_pipe( pipe_slow ); 3955 %} 3956 3957 // ====================VECTOR INSTRUCTIONS===================================== 3958 3959 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3960 instruct MoveVec2Leg(legVec dst, vec src) %{ 3961 match(Set dst src); 3962 format %{ "" %} 3963 ins_encode %{ 3964 ShouldNotReachHere(); 3965 %} 3966 ins_pipe( fpu_reg_reg ); 3967 %} 3968 3969 instruct MoveLeg2Vec(vec dst, legVec src) %{ 3970 match(Set dst src); 3971 format %{ "" %} 3972 ins_encode %{ 3973 ShouldNotReachHere(); 3974 %} 3975 ins_pipe( fpu_reg_reg ); 3976 %} 3977 3978 // ============================================================================ 3979 3980 // Load vectors generic operand pattern 3981 instruct loadV(vec dst, memory mem) %{ 3982 match(Set dst (LoadVector mem)); 3983 ins_cost(125); 3984 format %{ "load_vector $dst,$mem" %} 3985 ins_encode %{ 3986 BasicType bt = Matcher::vector_element_basic_type(this); 3987 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 3988 %} 3989 ins_pipe( pipe_slow ); 3990 %} 3991 3992 // Store vectors generic operand pattern. 3993 instruct storeV(memory mem, vec src) %{ 3994 match(Set mem (StoreVector mem src)); 3995 ins_cost(145); 3996 format %{ "store_vector $mem,$src\n\t" %} 3997 ins_encode %{ 3998 switch (Matcher::vector_length_in_bytes(this, $src)) { 3999 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 4000 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 4001 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 4002 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 4003 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 4004 default: ShouldNotReachHere(); 4005 } 4006 %} 4007 ins_pipe( pipe_slow ); 4008 %} 4009 4010 // ---------------------------------------- Gather ------------------------------------ 4011 4012 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE 4013 4014 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 4015 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) && 4016 Matcher::vector_length_in_bytes(n) <= 32); 4017 match(Set dst (LoadVectorGather mem idx)); 4018 effect(TEMP dst, TEMP tmp, TEMP mask); 4019 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 4020 ins_encode %{ 4021 int vlen_enc = vector_length_encoding(this); 4022 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4023 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4024 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4025 __ lea($tmp$$Register, $mem$$Address); 4026 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4027 %} 4028 ins_pipe( pipe_slow ); 4029 %} 4030 4031 4032 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 4033 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4034 !is_subword_type(Matcher::vector_element_basic_type(n))); 4035 match(Set dst (LoadVectorGather mem idx)); 4036 effect(TEMP dst, TEMP tmp, TEMP ktmp); 4037 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 4038 ins_encode %{ 4039 int vlen_enc = vector_length_encoding(this); 4040 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4041 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister); 4042 __ lea($tmp$$Register, $mem$$Address); 4043 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4044 %} 4045 ins_pipe( pipe_slow ); 4046 %} 4047 4048 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4049 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4050 !is_subword_type(Matcher::vector_element_basic_type(n))); 4051 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4052 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4053 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4054 ins_encode %{ 4055 assert(UseAVX > 2, "sanity"); 4056 int vlen_enc = vector_length_encoding(this); 4057 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4058 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4059 // Note: Since gather instruction partially updates the opmask register used 4060 // for predication hense moving mask operand to a temporary. 4061 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4062 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4063 __ lea($tmp$$Register, $mem$$Address); 4064 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4065 %} 4066 ins_pipe( pipe_slow ); 4067 %} 4068 4069 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegI rtmp) %{ 4070 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4071 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4072 effect(TEMP tmp, TEMP rtmp); 4073 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %} 4074 ins_encode %{ 4075 int vlen_enc = vector_length_encoding(this); 4076 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4077 __ lea($tmp$$Register, $mem$$Address); 4078 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp$$Register, vlen_enc); 4079 %} 4080 ins_pipe( pipe_slow ); 4081 %} 4082 4083 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegP idx_base_temp, 4084 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4085 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4086 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4087 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4088 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4089 ins_encode %{ 4090 int vlen_enc = vector_length_encoding(this); 4091 int vector_len = Matcher::vector_length(this); 4092 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4093 __ lea($tmp$$Register, $mem$$Address); 4094 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4095 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, noreg, $xtmp1$$XMMRegister, 4096 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4097 %} 4098 ins_pipe( pipe_slow ); 4099 %} 4100 4101 instruct vgather_subwordLE8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegI rtmp, rFlagsReg cr) %{ 4102 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4103 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4104 effect(TEMP tmp, TEMP rtmp, KILL cr); 4105 format %{ "vector_gatherLE8_off $dst, $mem, $idx_base, $offset\t! using $tmp and $rtmp as TEMP" %} 4106 ins_encode %{ 4107 int vlen_enc = vector_length_encoding(this); 4108 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4109 __ lea($tmp$$Register, $mem$$Address); 4110 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, $rtmp$$Register, vlen_enc); 4111 %} 4112 ins_pipe( pipe_slow ); 4113 %} 4114 4115 4116 instruct vgather_subwordGT8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegP idx_base_temp, 4117 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4118 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4119 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4120 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4121 format %{ "vector_gatherGT8_off $dst, $mem, $idx_base, $offset\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4122 ins_encode %{ 4123 int vlen_enc = vector_length_encoding(this); 4124 int vector_len = Matcher::vector_length(this); 4125 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4126 __ lea($tmp$$Register, $mem$$Address); 4127 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4128 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, noreg, $xtmp1$$XMMRegister, 4129 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4130 %} 4131 ins_pipe( pipe_slow ); 4132 %} 4133 4134 4135 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4136 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4137 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4138 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4139 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4140 ins_encode %{ 4141 int vlen_enc = vector_length_encoding(this); 4142 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4143 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4144 __ lea($tmp$$Register, $mem$$Address); 4145 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4146 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4147 %} 4148 ins_pipe( pipe_slow ); 4149 %} 4150 4151 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4152 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4153 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4154 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4155 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4156 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4157 ins_encode %{ 4158 int vlen_enc = vector_length_encoding(this); 4159 int vector_len = Matcher::vector_length(this); 4160 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4161 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4162 __ lea($tmp$$Register, $mem$$Address); 4163 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4164 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4165 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4166 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4167 %} 4168 ins_pipe( pipe_slow ); 4169 %} 4170 4171 instruct vgather_masked_subwordLE8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4172 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4173 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4174 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4175 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4176 ins_encode %{ 4177 int vlen_enc = vector_length_encoding(this); 4178 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4179 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4180 __ lea($tmp$$Register, $mem$$Address); 4181 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4182 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4183 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4184 %} 4185 ins_pipe( pipe_slow ); 4186 %} 4187 4188 instruct vgather_masked_subwordGT8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4189 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4190 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4191 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4192 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4193 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4194 ins_encode %{ 4195 int vlen_enc = vector_length_encoding(this); 4196 int vector_len = Matcher::vector_length(this); 4197 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4198 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4199 __ lea($tmp$$Register, $mem$$Address); 4200 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4201 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4202 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4203 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4204 %} 4205 ins_pipe( pipe_slow ); 4206 %} 4207 4208 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4209 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4210 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4211 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4212 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4213 ins_encode %{ 4214 int vlen_enc = vector_length_encoding(this); 4215 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4216 __ lea($tmp$$Register, $mem$$Address); 4217 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4218 if (elem_bt == T_SHORT) { 4219 __ movl($mask_idx$$Register, 0x55555555); 4220 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4221 } 4222 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4223 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4224 %} 4225 ins_pipe( pipe_slow ); 4226 %} 4227 4228 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4229 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4230 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4231 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4232 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4233 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4234 ins_encode %{ 4235 int vlen_enc = vector_length_encoding(this); 4236 int vector_len = Matcher::vector_length(this); 4237 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4238 __ lea($tmp$$Register, $mem$$Address); 4239 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4240 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4241 if (elem_bt == T_SHORT) { 4242 __ movl($mask_idx$$Register, 0x55555555); 4243 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4244 } 4245 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4246 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4247 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4248 %} 4249 ins_pipe( pipe_slow ); 4250 %} 4251 4252 instruct vgather_masked_subwordLE8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4253 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4254 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4255 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4256 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4257 ins_encode %{ 4258 int vlen_enc = vector_length_encoding(this); 4259 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4260 __ lea($tmp$$Register, $mem$$Address); 4261 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4262 if (elem_bt == T_SHORT) { 4263 __ movl($mask_idx$$Register, 0x55555555); 4264 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4265 } 4266 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4267 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4268 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4269 %} 4270 ins_pipe( pipe_slow ); 4271 %} 4272 4273 instruct vgather_masked_subwordGT8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4274 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4275 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4276 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4277 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4278 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4279 ins_encode %{ 4280 int vlen_enc = vector_length_encoding(this); 4281 int vector_len = Matcher::vector_length(this); 4282 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4283 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4284 __ lea($tmp$$Register, $mem$$Address); 4285 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4286 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4287 if (elem_bt == T_SHORT) { 4288 __ movl($mask_idx$$Register, 0x55555555); 4289 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4290 } 4291 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4292 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4293 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4294 %} 4295 ins_pipe( pipe_slow ); 4296 %} 4297 4298 // ====================Scatter======================================= 4299 4300 // Scatter INT, LONG, FLOAT, DOUBLE 4301 4302 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4303 predicate(UseAVX > 2); 4304 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4305 effect(TEMP tmp, TEMP ktmp); 4306 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4307 ins_encode %{ 4308 int vlen_enc = vector_length_encoding(this, $src); 4309 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4310 4311 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4312 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4313 4314 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4315 __ lea($tmp$$Register, $mem$$Address); 4316 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4317 %} 4318 ins_pipe( pipe_slow ); 4319 %} 4320 4321 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4322 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4323 effect(TEMP tmp, TEMP ktmp); 4324 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4325 ins_encode %{ 4326 int vlen_enc = vector_length_encoding(this, $src); 4327 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4328 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4329 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4330 // Note: Since scatter instruction partially updates the opmask register used 4331 // for predication hense moving mask operand to a temporary. 4332 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4333 __ lea($tmp$$Register, $mem$$Address); 4334 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4335 %} 4336 ins_pipe( pipe_slow ); 4337 %} 4338 4339 // ====================REPLICATE======================================= 4340 4341 // Replicate byte scalar to be vector 4342 instruct vReplB_reg(vec dst, rRegI src) %{ 4343 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4344 match(Set dst (Replicate src)); 4345 format %{ "replicateB $dst,$src" %} 4346 ins_encode %{ 4347 uint vlen = Matcher::vector_length(this); 4348 if (UseAVX >= 2) { 4349 int vlen_enc = vector_length_encoding(this); 4350 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4351 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4352 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4353 } else { 4354 __ movdl($dst$$XMMRegister, $src$$Register); 4355 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4356 } 4357 } else { 4358 assert(UseAVX < 2, ""); 4359 __ movdl($dst$$XMMRegister, $src$$Register); 4360 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4361 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4362 if (vlen >= 16) { 4363 assert(vlen == 16, ""); 4364 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4365 } 4366 } 4367 %} 4368 ins_pipe( pipe_slow ); 4369 %} 4370 4371 instruct ReplB_mem(vec dst, memory mem) %{ 4372 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4373 match(Set dst (Replicate (LoadB mem))); 4374 format %{ "replicateB $dst,$mem" %} 4375 ins_encode %{ 4376 int vlen_enc = vector_length_encoding(this); 4377 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4378 %} 4379 ins_pipe( pipe_slow ); 4380 %} 4381 4382 // ====================ReplicateS======================================= 4383 4384 instruct vReplS_reg(vec dst, rRegI src) %{ 4385 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4386 match(Set dst (Replicate src)); 4387 format %{ "replicateS $dst,$src" %} 4388 ins_encode %{ 4389 uint vlen = Matcher::vector_length(this); 4390 int vlen_enc = vector_length_encoding(this); 4391 if (UseAVX >= 2) { 4392 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4393 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4394 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4395 } else { 4396 __ movdl($dst$$XMMRegister, $src$$Register); 4397 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4398 } 4399 } else { 4400 assert(UseAVX < 2, ""); 4401 __ movdl($dst$$XMMRegister, $src$$Register); 4402 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4403 if (vlen >= 8) { 4404 assert(vlen == 8, ""); 4405 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4406 } 4407 } 4408 %} 4409 ins_pipe( pipe_slow ); 4410 %} 4411 4412 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{ 4413 match(Set dst (Replicate con)); 4414 effect(TEMP rtmp); 4415 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %} 4416 ins_encode %{ 4417 int vlen_enc = vector_length_encoding(this); 4418 BasicType bt = Matcher::vector_element_basic_type(this); 4419 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, ""); 4420 __ movl($rtmp$$Register, $con$$constant); 4421 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4422 %} 4423 ins_pipe( pipe_slow ); 4424 %} 4425 4426 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{ 4427 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT); 4428 match(Set dst (Replicate src)); 4429 effect(TEMP rtmp); 4430 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %} 4431 ins_encode %{ 4432 int vlen_enc = vector_length_encoding(this); 4433 __ vmovw($rtmp$$Register, $src$$XMMRegister); 4434 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4435 %} 4436 ins_pipe( pipe_slow ); 4437 %} 4438 4439 instruct ReplS_mem(vec dst, memory mem) %{ 4440 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4441 match(Set dst (Replicate (LoadS mem))); 4442 format %{ "replicateS $dst,$mem" %} 4443 ins_encode %{ 4444 int vlen_enc = vector_length_encoding(this); 4445 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4446 %} 4447 ins_pipe( pipe_slow ); 4448 %} 4449 4450 // ====================ReplicateI======================================= 4451 4452 instruct ReplI_reg(vec dst, rRegI src) %{ 4453 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4454 match(Set dst (Replicate src)); 4455 format %{ "replicateI $dst,$src" %} 4456 ins_encode %{ 4457 uint vlen = Matcher::vector_length(this); 4458 int vlen_enc = vector_length_encoding(this); 4459 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4460 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4461 } else if (VM_Version::supports_avx2()) { 4462 __ movdl($dst$$XMMRegister, $src$$Register); 4463 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4464 } else { 4465 __ movdl($dst$$XMMRegister, $src$$Register); 4466 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4467 } 4468 %} 4469 ins_pipe( pipe_slow ); 4470 %} 4471 4472 instruct ReplI_mem(vec dst, memory mem) %{ 4473 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4474 match(Set dst (Replicate (LoadI mem))); 4475 format %{ "replicateI $dst,$mem" %} 4476 ins_encode %{ 4477 int vlen_enc = vector_length_encoding(this); 4478 if (VM_Version::supports_avx2()) { 4479 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4480 } else if (VM_Version::supports_avx()) { 4481 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4482 } else { 4483 __ movdl($dst$$XMMRegister, $mem$$Address); 4484 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4485 } 4486 %} 4487 ins_pipe( pipe_slow ); 4488 %} 4489 4490 instruct ReplI_imm(vec dst, immI con) %{ 4491 predicate(Matcher::is_non_long_integral_vector(n)); 4492 match(Set dst (Replicate con)); 4493 format %{ "replicateI $dst,$con" %} 4494 ins_encode %{ 4495 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4496 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) / 4497 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4498 BasicType bt = Matcher::vector_element_basic_type(this); 4499 int vlen = Matcher::vector_length_in_bytes(this); 4500 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4501 %} 4502 ins_pipe( pipe_slow ); 4503 %} 4504 4505 // Replicate scalar zero to be vector 4506 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4507 predicate(Matcher::is_non_long_integral_vector(n)); 4508 match(Set dst (Replicate zero)); 4509 format %{ "replicateI $dst,$zero" %} 4510 ins_encode %{ 4511 int vlen_enc = vector_length_encoding(this); 4512 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4513 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4514 } else { 4515 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4516 } 4517 %} 4518 ins_pipe( fpu_reg_reg ); 4519 %} 4520 4521 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4522 predicate(UseSSE >= 2 && Matcher::is_non_long_integral_vector(n)); 4523 match(Set dst (Replicate con)); 4524 format %{ "vallones $dst" %} 4525 ins_encode %{ 4526 int vector_len = vector_length_encoding(this); 4527 __ vallones($dst$$XMMRegister, vector_len); 4528 %} 4529 ins_pipe( pipe_slow ); 4530 %} 4531 4532 // ====================ReplicateL======================================= 4533 4534 // Replicate long (8 byte) scalar to be vector 4535 instruct ReplL_reg(vec dst, rRegL src) %{ 4536 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4537 match(Set dst (Replicate src)); 4538 format %{ "replicateL $dst,$src" %} 4539 ins_encode %{ 4540 int vlen = Matcher::vector_length(this); 4541 int vlen_enc = vector_length_encoding(this); 4542 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4543 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4544 } else if (VM_Version::supports_avx2()) { 4545 __ movdq($dst$$XMMRegister, $src$$Register); 4546 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4547 } else { 4548 __ movdq($dst$$XMMRegister, $src$$Register); 4549 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4550 } 4551 %} 4552 ins_pipe( pipe_slow ); 4553 %} 4554 4555 instruct ReplL_mem(vec dst, memory mem) %{ 4556 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4557 match(Set dst (Replicate (LoadL mem))); 4558 format %{ "replicateL $dst,$mem" %} 4559 ins_encode %{ 4560 int vlen_enc = vector_length_encoding(this); 4561 if (VM_Version::supports_avx2()) { 4562 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4563 } else if (VM_Version::supports_sse3()) { 4564 __ movddup($dst$$XMMRegister, $mem$$Address); 4565 } else { 4566 __ movq($dst$$XMMRegister, $mem$$Address); 4567 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4568 } 4569 %} 4570 ins_pipe( pipe_slow ); 4571 %} 4572 4573 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4574 instruct ReplL_imm(vec dst, immL con) %{ 4575 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4576 match(Set dst (Replicate con)); 4577 format %{ "replicateL $dst,$con" %} 4578 ins_encode %{ 4579 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4580 int vlen = Matcher::vector_length_in_bytes(this); 4581 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4582 %} 4583 ins_pipe( pipe_slow ); 4584 %} 4585 4586 instruct ReplL_zero(vec dst, immL0 zero) %{ 4587 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4588 match(Set dst (Replicate zero)); 4589 format %{ "replicateL $dst,$zero" %} 4590 ins_encode %{ 4591 int vlen_enc = vector_length_encoding(this); 4592 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4593 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4594 } else { 4595 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4596 } 4597 %} 4598 ins_pipe( fpu_reg_reg ); 4599 %} 4600 4601 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4602 predicate(UseSSE >= 2 && Matcher::vector_element_basic_type(n) == T_LONG); 4603 match(Set dst (Replicate con)); 4604 format %{ "vallones $dst" %} 4605 ins_encode %{ 4606 int vector_len = vector_length_encoding(this); 4607 __ vallones($dst$$XMMRegister, vector_len); 4608 %} 4609 ins_pipe( pipe_slow ); 4610 %} 4611 4612 // ====================ReplicateF======================================= 4613 4614 instruct vReplF_reg(vec dst, vlRegF src) %{ 4615 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4616 match(Set dst (Replicate src)); 4617 format %{ "replicateF $dst,$src" %} 4618 ins_encode %{ 4619 uint vlen = Matcher::vector_length(this); 4620 int vlen_enc = vector_length_encoding(this); 4621 if (vlen <= 4) { 4622 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4623 } else if (VM_Version::supports_avx2()) { 4624 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4625 } else { 4626 assert(vlen == 8, "sanity"); 4627 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4628 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4629 } 4630 %} 4631 ins_pipe( pipe_slow ); 4632 %} 4633 4634 instruct ReplF_reg(vec dst, vlRegF src) %{ 4635 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4636 match(Set dst (Replicate src)); 4637 format %{ "replicateF $dst,$src" %} 4638 ins_encode %{ 4639 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4640 %} 4641 ins_pipe( pipe_slow ); 4642 %} 4643 4644 instruct ReplF_mem(vec dst, memory mem) %{ 4645 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4646 match(Set dst (Replicate (LoadF mem))); 4647 format %{ "replicateF $dst,$mem" %} 4648 ins_encode %{ 4649 int vlen_enc = vector_length_encoding(this); 4650 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4651 %} 4652 ins_pipe( pipe_slow ); 4653 %} 4654 4655 // Replicate float scalar immediate to be vector by loading from const table. 4656 instruct ReplF_imm(vec dst, immF con) %{ 4657 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4658 match(Set dst (Replicate con)); 4659 format %{ "replicateF $dst,$con" %} 4660 ins_encode %{ 4661 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant, 4662 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4)); 4663 int vlen = Matcher::vector_length_in_bytes(this); 4664 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4665 %} 4666 ins_pipe( pipe_slow ); 4667 %} 4668 4669 instruct ReplF_zero(vec dst, immF0 zero) %{ 4670 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4671 match(Set dst (Replicate zero)); 4672 format %{ "replicateF $dst,$zero" %} 4673 ins_encode %{ 4674 int vlen_enc = vector_length_encoding(this); 4675 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4676 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4677 } else { 4678 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4679 } 4680 %} 4681 ins_pipe( fpu_reg_reg ); 4682 %} 4683 4684 // ====================ReplicateD======================================= 4685 4686 // Replicate double (8 bytes) scalar to be vector 4687 instruct vReplD_reg(vec dst, vlRegD src) %{ 4688 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4689 match(Set dst (Replicate src)); 4690 format %{ "replicateD $dst,$src" %} 4691 ins_encode %{ 4692 uint vlen = Matcher::vector_length(this); 4693 int vlen_enc = vector_length_encoding(this); 4694 if (vlen <= 2) { 4695 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4696 } else if (VM_Version::supports_avx2()) { 4697 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4698 } else { 4699 assert(vlen == 4, "sanity"); 4700 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4701 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4702 } 4703 %} 4704 ins_pipe( pipe_slow ); 4705 %} 4706 4707 instruct ReplD_reg(vec dst, vlRegD src) %{ 4708 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4709 match(Set dst (Replicate src)); 4710 format %{ "replicateD $dst,$src" %} 4711 ins_encode %{ 4712 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4713 %} 4714 ins_pipe( pipe_slow ); 4715 %} 4716 4717 instruct ReplD_mem(vec dst, memory mem) %{ 4718 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4719 match(Set dst (Replicate (LoadD mem))); 4720 format %{ "replicateD $dst,$mem" %} 4721 ins_encode %{ 4722 if (Matcher::vector_length(this) >= 4) { 4723 int vlen_enc = vector_length_encoding(this); 4724 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4725 } else { 4726 __ movddup($dst$$XMMRegister, $mem$$Address); 4727 } 4728 %} 4729 ins_pipe( pipe_slow ); 4730 %} 4731 4732 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4733 instruct ReplD_imm(vec dst, immD con) %{ 4734 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4735 match(Set dst (Replicate con)); 4736 format %{ "replicateD $dst,$con" %} 4737 ins_encode %{ 4738 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4739 int vlen = Matcher::vector_length_in_bytes(this); 4740 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4741 %} 4742 ins_pipe( pipe_slow ); 4743 %} 4744 4745 instruct ReplD_zero(vec dst, immD0 zero) %{ 4746 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4747 match(Set dst (Replicate zero)); 4748 format %{ "replicateD $dst,$zero" %} 4749 ins_encode %{ 4750 int vlen_enc = vector_length_encoding(this); 4751 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4752 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4753 } else { 4754 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4755 } 4756 %} 4757 ins_pipe( fpu_reg_reg ); 4758 %} 4759 4760 // ====================VECTOR INSERT======================================= 4761 4762 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4763 predicate(Matcher::vector_length_in_bytes(n) < 32); 4764 match(Set dst (VectorInsert (Binary dst val) idx)); 4765 format %{ "vector_insert $dst,$val,$idx" %} 4766 ins_encode %{ 4767 assert(UseSSE >= 4, "required"); 4768 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4769 4770 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4771 4772 assert(is_integral_type(elem_bt), ""); 4773 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4774 4775 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4776 %} 4777 ins_pipe( pipe_slow ); 4778 %} 4779 4780 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4781 predicate(Matcher::vector_length_in_bytes(n) == 32); 4782 match(Set dst (VectorInsert (Binary src val) idx)); 4783 effect(TEMP vtmp); 4784 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4785 ins_encode %{ 4786 int vlen_enc = Assembler::AVX_256bit; 4787 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4788 int elem_per_lane = 16/type2aelembytes(elem_bt); 4789 int log2epr = log2(elem_per_lane); 4790 4791 assert(is_integral_type(elem_bt), "sanity"); 4792 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4793 4794 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4795 uint y_idx = ($idx$$constant >> log2epr) & 1; 4796 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4797 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4798 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4799 %} 4800 ins_pipe( pipe_slow ); 4801 %} 4802 4803 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4804 predicate(Matcher::vector_length_in_bytes(n) == 64); 4805 match(Set dst (VectorInsert (Binary src val) idx)); 4806 effect(TEMP vtmp); 4807 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4808 ins_encode %{ 4809 assert(UseAVX > 2, "sanity"); 4810 4811 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4812 int elem_per_lane = 16/type2aelembytes(elem_bt); 4813 int log2epr = log2(elem_per_lane); 4814 4815 assert(is_integral_type(elem_bt), ""); 4816 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4817 4818 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4819 uint y_idx = ($idx$$constant >> log2epr) & 3; 4820 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4821 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4822 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4823 %} 4824 ins_pipe( pipe_slow ); 4825 %} 4826 4827 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4828 predicate(Matcher::vector_length(n) == 2); 4829 match(Set dst (VectorInsert (Binary dst val) idx)); 4830 format %{ "vector_insert $dst,$val,$idx" %} 4831 ins_encode %{ 4832 assert(UseSSE >= 4, "required"); 4833 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4834 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4835 4836 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4837 %} 4838 ins_pipe( pipe_slow ); 4839 %} 4840 4841 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4842 predicate(Matcher::vector_length(n) == 4); 4843 match(Set dst (VectorInsert (Binary src val) idx)); 4844 effect(TEMP vtmp); 4845 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4846 ins_encode %{ 4847 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4848 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4849 4850 uint x_idx = $idx$$constant & right_n_bits(1); 4851 uint y_idx = ($idx$$constant >> 1) & 1; 4852 int vlen_enc = Assembler::AVX_256bit; 4853 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4854 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4855 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4856 %} 4857 ins_pipe( pipe_slow ); 4858 %} 4859 4860 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4861 predicate(Matcher::vector_length(n) == 8); 4862 match(Set dst (VectorInsert (Binary src val) idx)); 4863 effect(TEMP vtmp); 4864 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4865 ins_encode %{ 4866 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4867 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4868 4869 uint x_idx = $idx$$constant & right_n_bits(1); 4870 uint y_idx = ($idx$$constant >> 1) & 3; 4871 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4872 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4873 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4874 %} 4875 ins_pipe( pipe_slow ); 4876 %} 4877 4878 instruct insertF(vec dst, regF val, immU8 idx) %{ 4879 predicate(Matcher::vector_length(n) < 8); 4880 match(Set dst (VectorInsert (Binary dst val) idx)); 4881 format %{ "vector_insert $dst,$val,$idx" %} 4882 ins_encode %{ 4883 assert(UseSSE >= 4, "sanity"); 4884 4885 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4886 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4887 4888 uint x_idx = $idx$$constant & right_n_bits(2); 4889 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4890 %} 4891 ins_pipe( pipe_slow ); 4892 %} 4893 4894 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4895 predicate(Matcher::vector_length(n) >= 8); 4896 match(Set dst (VectorInsert (Binary src val) idx)); 4897 effect(TEMP vtmp); 4898 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4899 ins_encode %{ 4900 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4901 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4902 4903 int vlen = Matcher::vector_length(this); 4904 uint x_idx = $idx$$constant & right_n_bits(2); 4905 if (vlen == 8) { 4906 uint y_idx = ($idx$$constant >> 2) & 1; 4907 int vlen_enc = Assembler::AVX_256bit; 4908 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4909 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4910 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4911 } else { 4912 assert(vlen == 16, "sanity"); 4913 uint y_idx = ($idx$$constant >> 2) & 3; 4914 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4915 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4916 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4917 } 4918 %} 4919 ins_pipe( pipe_slow ); 4920 %} 4921 4922 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4923 predicate(Matcher::vector_length(n) == 2); 4924 match(Set dst (VectorInsert (Binary dst val) idx)); 4925 effect(TEMP tmp); 4926 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4927 ins_encode %{ 4928 assert(UseSSE >= 4, "sanity"); 4929 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4930 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4931 4932 __ movq($tmp$$Register, $val$$XMMRegister); 4933 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4934 %} 4935 ins_pipe( pipe_slow ); 4936 %} 4937 4938 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4939 predicate(Matcher::vector_length(n) == 4); 4940 match(Set dst (VectorInsert (Binary src val) idx)); 4941 effect(TEMP vtmp, TEMP tmp); 4942 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4943 ins_encode %{ 4944 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4945 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4946 4947 uint x_idx = $idx$$constant & right_n_bits(1); 4948 uint y_idx = ($idx$$constant >> 1) & 1; 4949 int vlen_enc = Assembler::AVX_256bit; 4950 __ movq($tmp$$Register, $val$$XMMRegister); 4951 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4952 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4953 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4954 %} 4955 ins_pipe( pipe_slow ); 4956 %} 4957 4958 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4959 predicate(Matcher::vector_length(n) == 8); 4960 match(Set dst (VectorInsert (Binary src val) idx)); 4961 effect(TEMP tmp, TEMP vtmp); 4962 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4963 ins_encode %{ 4964 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4965 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4966 4967 uint x_idx = $idx$$constant & right_n_bits(1); 4968 uint y_idx = ($idx$$constant >> 1) & 3; 4969 __ movq($tmp$$Register, $val$$XMMRegister); 4970 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4971 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4972 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4973 %} 4974 ins_pipe( pipe_slow ); 4975 %} 4976 4977 // ====================REDUCTION ARITHMETIC======================================= 4978 4979 // =======================Int Reduction========================================== 4980 4981 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4982 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 4983 match(Set dst (AddReductionVI src1 src2)); 4984 match(Set dst (MulReductionVI src1 src2)); 4985 match(Set dst (AndReductionV src1 src2)); 4986 match(Set dst ( OrReductionV src1 src2)); 4987 match(Set dst (XorReductionV src1 src2)); 4988 match(Set dst (MinReductionV src1 src2)); 4989 match(Set dst (MaxReductionV src1 src2)); 4990 effect(TEMP vtmp1, TEMP vtmp2); 4991 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4992 ins_encode %{ 4993 int opcode = this->ideal_Opcode(); 4994 int vlen = Matcher::vector_length(this, $src2); 4995 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4996 %} 4997 ins_pipe( pipe_slow ); 4998 %} 4999 5000 // =======================Long Reduction========================================== 5001 5002 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5003 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 5004 match(Set dst (AddReductionVL src1 src2)); 5005 match(Set dst (MulReductionVL src1 src2)); 5006 match(Set dst (AndReductionV src1 src2)); 5007 match(Set dst ( OrReductionV src1 src2)); 5008 match(Set dst (XorReductionV src1 src2)); 5009 match(Set dst (MinReductionV src1 src2)); 5010 match(Set dst (MaxReductionV src1 src2)); 5011 effect(TEMP vtmp1, TEMP vtmp2); 5012 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5013 ins_encode %{ 5014 int opcode = this->ideal_Opcode(); 5015 int vlen = Matcher::vector_length(this, $src2); 5016 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5017 %} 5018 ins_pipe( pipe_slow ); 5019 %} 5020 5021 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 5022 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 5023 match(Set dst (AddReductionVL src1 src2)); 5024 match(Set dst (MulReductionVL src1 src2)); 5025 match(Set dst (AndReductionV src1 src2)); 5026 match(Set dst ( OrReductionV src1 src2)); 5027 match(Set dst (XorReductionV src1 src2)); 5028 match(Set dst (MinReductionV src1 src2)); 5029 match(Set dst (MaxReductionV src1 src2)); 5030 effect(TEMP vtmp1, TEMP vtmp2); 5031 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5032 ins_encode %{ 5033 int opcode = this->ideal_Opcode(); 5034 int vlen = Matcher::vector_length(this, $src2); 5035 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5036 %} 5037 ins_pipe( pipe_slow ); 5038 %} 5039 5040 // =======================Float Reduction========================================== 5041 5042 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 5043 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src 5044 match(Set dst (AddReductionVF dst src)); 5045 match(Set dst (MulReductionVF dst src)); 5046 effect(TEMP dst, TEMP vtmp); 5047 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 5048 ins_encode %{ 5049 int opcode = this->ideal_Opcode(); 5050 int vlen = Matcher::vector_length(this, $src); 5051 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5052 %} 5053 ins_pipe( pipe_slow ); 5054 %} 5055 5056 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 5057 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5058 match(Set dst (AddReductionVF dst src)); 5059 match(Set dst (MulReductionVF dst src)); 5060 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5061 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5062 ins_encode %{ 5063 int opcode = this->ideal_Opcode(); 5064 int vlen = Matcher::vector_length(this, $src); 5065 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5066 %} 5067 ins_pipe( pipe_slow ); 5068 %} 5069 5070 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5071 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src 5072 match(Set dst (AddReductionVF dst src)); 5073 match(Set dst (MulReductionVF dst src)); 5074 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5075 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5076 ins_encode %{ 5077 int opcode = this->ideal_Opcode(); 5078 int vlen = Matcher::vector_length(this, $src); 5079 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5080 %} 5081 ins_pipe( pipe_slow ); 5082 %} 5083 5084 5085 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{ 5086 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5087 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5088 // src1 contains reduction identity 5089 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5090 match(Set dst (AddReductionVF src1 src2)); 5091 match(Set dst (MulReductionVF src1 src2)); 5092 effect(TEMP dst); 5093 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %} 5094 ins_encode %{ 5095 int opcode = this->ideal_Opcode(); 5096 int vlen = Matcher::vector_length(this, $src2); 5097 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5098 %} 5099 ins_pipe( pipe_slow ); 5100 %} 5101 5102 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{ 5103 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5104 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5105 // src1 contains reduction identity 5106 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5107 match(Set dst (AddReductionVF src1 src2)); 5108 match(Set dst (MulReductionVF src1 src2)); 5109 effect(TEMP dst, TEMP vtmp); 5110 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5111 ins_encode %{ 5112 int opcode = this->ideal_Opcode(); 5113 int vlen = Matcher::vector_length(this, $src2); 5114 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5115 %} 5116 ins_pipe( pipe_slow ); 5117 %} 5118 5119 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{ 5120 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5121 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5122 // src1 contains reduction identity 5123 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5124 match(Set dst (AddReductionVF src1 src2)); 5125 match(Set dst (MulReductionVF src1 src2)); 5126 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5127 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5128 ins_encode %{ 5129 int opcode = this->ideal_Opcode(); 5130 int vlen = Matcher::vector_length(this, $src2); 5131 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5132 %} 5133 ins_pipe( pipe_slow ); 5134 %} 5135 5136 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5137 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5138 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5139 // src1 contains reduction identity 5140 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2 5141 match(Set dst (AddReductionVF src1 src2)); 5142 match(Set dst (MulReductionVF src1 src2)); 5143 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5144 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5145 ins_encode %{ 5146 int opcode = this->ideal_Opcode(); 5147 int vlen = Matcher::vector_length(this, $src2); 5148 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5149 %} 5150 ins_pipe( pipe_slow ); 5151 %} 5152 5153 // =======================Double Reduction========================================== 5154 5155 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 5156 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src 5157 match(Set dst (AddReductionVD dst src)); 5158 match(Set dst (MulReductionVD dst src)); 5159 effect(TEMP dst, TEMP vtmp); 5160 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 5161 ins_encode %{ 5162 int opcode = this->ideal_Opcode(); 5163 int vlen = Matcher::vector_length(this, $src); 5164 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5165 %} 5166 ins_pipe( pipe_slow ); 5167 %} 5168 5169 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 5170 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src 5171 match(Set dst (AddReductionVD dst src)); 5172 match(Set dst (MulReductionVD dst src)); 5173 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5174 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5175 ins_encode %{ 5176 int opcode = this->ideal_Opcode(); 5177 int vlen = Matcher::vector_length(this, $src); 5178 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5179 %} 5180 ins_pipe( pipe_slow ); 5181 %} 5182 5183 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5184 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5185 match(Set dst (AddReductionVD dst src)); 5186 match(Set dst (MulReductionVD dst src)); 5187 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5188 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5189 ins_encode %{ 5190 int opcode = this->ideal_Opcode(); 5191 int vlen = Matcher::vector_length(this, $src); 5192 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5193 %} 5194 ins_pipe( pipe_slow ); 5195 %} 5196 5197 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{ 5198 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5199 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5200 // src1 contains reduction identity 5201 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5202 match(Set dst (AddReductionVD src1 src2)); 5203 match(Set dst (MulReductionVD src1 src2)); 5204 effect(TEMP dst); 5205 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %} 5206 ins_encode %{ 5207 int opcode = this->ideal_Opcode(); 5208 int vlen = Matcher::vector_length(this, $src2); 5209 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5210 %} 5211 ins_pipe( pipe_slow ); 5212 %} 5213 5214 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{ 5215 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5216 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5217 // src1 contains reduction identity 5218 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5219 match(Set dst (AddReductionVD src1 src2)); 5220 match(Set dst (MulReductionVD src1 src2)); 5221 effect(TEMP dst, TEMP vtmp); 5222 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5223 ins_encode %{ 5224 int opcode = this->ideal_Opcode(); 5225 int vlen = Matcher::vector_length(this, $src2); 5226 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5227 %} 5228 ins_pipe( pipe_slow ); 5229 %} 5230 5231 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5232 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5233 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5234 // src1 contains reduction identity 5235 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5236 match(Set dst (AddReductionVD src1 src2)); 5237 match(Set dst (MulReductionVD src1 src2)); 5238 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5239 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5240 ins_encode %{ 5241 int opcode = this->ideal_Opcode(); 5242 int vlen = Matcher::vector_length(this, $src2); 5243 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5244 %} 5245 ins_pipe( pipe_slow ); 5246 %} 5247 5248 // =======================Byte Reduction========================================== 5249 5250 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5251 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5252 match(Set dst (AddReductionVI src1 src2)); 5253 match(Set dst (AndReductionV src1 src2)); 5254 match(Set dst ( OrReductionV src1 src2)); 5255 match(Set dst (XorReductionV src1 src2)); 5256 match(Set dst (MinReductionV src1 src2)); 5257 match(Set dst (MaxReductionV src1 src2)); 5258 effect(TEMP vtmp1, TEMP vtmp2); 5259 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5260 ins_encode %{ 5261 int opcode = this->ideal_Opcode(); 5262 int vlen = Matcher::vector_length(this, $src2); 5263 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5264 %} 5265 ins_pipe( pipe_slow ); 5266 %} 5267 5268 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5269 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5270 match(Set dst (AddReductionVI src1 src2)); 5271 match(Set dst (AndReductionV src1 src2)); 5272 match(Set dst ( OrReductionV src1 src2)); 5273 match(Set dst (XorReductionV src1 src2)); 5274 match(Set dst (MinReductionV src1 src2)); 5275 match(Set dst (MaxReductionV src1 src2)); 5276 effect(TEMP vtmp1, TEMP vtmp2); 5277 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5278 ins_encode %{ 5279 int opcode = this->ideal_Opcode(); 5280 int vlen = Matcher::vector_length(this, $src2); 5281 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5282 %} 5283 ins_pipe( pipe_slow ); 5284 %} 5285 5286 // =======================Short Reduction========================================== 5287 5288 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5289 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5290 match(Set dst (AddReductionVI src1 src2)); 5291 match(Set dst (MulReductionVI src1 src2)); 5292 match(Set dst (AndReductionV src1 src2)); 5293 match(Set dst ( OrReductionV src1 src2)); 5294 match(Set dst (XorReductionV src1 src2)); 5295 match(Set dst (MinReductionV src1 src2)); 5296 match(Set dst (MaxReductionV src1 src2)); 5297 effect(TEMP vtmp1, TEMP vtmp2); 5298 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5299 ins_encode %{ 5300 int opcode = this->ideal_Opcode(); 5301 int vlen = Matcher::vector_length(this, $src2); 5302 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5303 %} 5304 ins_pipe( pipe_slow ); 5305 %} 5306 5307 // =======================Mul Reduction========================================== 5308 5309 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5310 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5311 Matcher::vector_length(n->in(2)) <= 32); // src2 5312 match(Set dst (MulReductionVI src1 src2)); 5313 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5314 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5315 ins_encode %{ 5316 int opcode = this->ideal_Opcode(); 5317 int vlen = Matcher::vector_length(this, $src2); 5318 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5319 %} 5320 ins_pipe( pipe_slow ); 5321 %} 5322 5323 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5324 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5325 Matcher::vector_length(n->in(2)) == 64); // src2 5326 match(Set dst (MulReductionVI src1 src2)); 5327 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5328 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5329 ins_encode %{ 5330 int opcode = this->ideal_Opcode(); 5331 int vlen = Matcher::vector_length(this, $src2); 5332 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5333 %} 5334 ins_pipe( pipe_slow ); 5335 %} 5336 5337 //--------------------Min/Max Float Reduction -------------------- 5338 // Float Min Reduction 5339 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 5340 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5341 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5342 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5343 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5344 Matcher::vector_length(n->in(2)) == 2); 5345 match(Set dst (MinReductionV src1 src2)); 5346 match(Set dst (MaxReductionV src1 src2)); 5347 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5348 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5349 ins_encode %{ 5350 assert(UseAVX > 0, "sanity"); 5351 5352 int opcode = this->ideal_Opcode(); 5353 int vlen = Matcher::vector_length(this, $src2); 5354 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5355 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5356 %} 5357 ins_pipe( pipe_slow ); 5358 %} 5359 5360 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5361 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5362 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5363 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5364 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5365 Matcher::vector_length(n->in(2)) >= 4); 5366 match(Set dst (MinReductionV src1 src2)); 5367 match(Set dst (MaxReductionV src1 src2)); 5368 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5369 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5370 ins_encode %{ 5371 assert(UseAVX > 0, "sanity"); 5372 5373 int opcode = this->ideal_Opcode(); 5374 int vlen = Matcher::vector_length(this, $src2); 5375 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5376 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5377 %} 5378 ins_pipe( pipe_slow ); 5379 %} 5380 5381 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 5382 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5383 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5384 Matcher::vector_length(n->in(2)) == 2); 5385 match(Set dst (MinReductionV dst src)); 5386 match(Set dst (MaxReductionV dst src)); 5387 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5388 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5389 ins_encode %{ 5390 assert(UseAVX > 0, "sanity"); 5391 5392 int opcode = this->ideal_Opcode(); 5393 int vlen = Matcher::vector_length(this, $src); 5394 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5395 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5396 %} 5397 ins_pipe( pipe_slow ); 5398 %} 5399 5400 5401 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5402 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5403 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5404 Matcher::vector_length(n->in(2)) >= 4); 5405 match(Set dst (MinReductionV dst src)); 5406 match(Set dst (MaxReductionV dst src)); 5407 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5408 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5409 ins_encode %{ 5410 assert(UseAVX > 0, "sanity"); 5411 5412 int opcode = this->ideal_Opcode(); 5413 int vlen = Matcher::vector_length(this, $src); 5414 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5415 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5416 %} 5417 ins_pipe( pipe_slow ); 5418 %} 5419 5420 5421 //--------------------Min Double Reduction -------------------- 5422 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5423 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5424 rFlagsReg cr) %{ 5425 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5426 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5427 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5428 Matcher::vector_length(n->in(2)) == 2); 5429 match(Set dst (MinReductionV src1 src2)); 5430 match(Set dst (MaxReductionV src1 src2)); 5431 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5432 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5433 ins_encode %{ 5434 assert(UseAVX > 0, "sanity"); 5435 5436 int opcode = this->ideal_Opcode(); 5437 int vlen = Matcher::vector_length(this, $src2); 5438 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5439 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5440 %} 5441 ins_pipe( pipe_slow ); 5442 %} 5443 5444 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5445 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5446 rFlagsReg cr) %{ 5447 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5448 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5449 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5450 Matcher::vector_length(n->in(2)) >= 4); 5451 match(Set dst (MinReductionV src1 src2)); 5452 match(Set dst (MaxReductionV src1 src2)); 5453 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5454 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5455 ins_encode %{ 5456 assert(UseAVX > 0, "sanity"); 5457 5458 int opcode = this->ideal_Opcode(); 5459 int vlen = Matcher::vector_length(this, $src2); 5460 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5461 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5462 %} 5463 ins_pipe( pipe_slow ); 5464 %} 5465 5466 5467 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5468 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5469 rFlagsReg cr) %{ 5470 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5471 Matcher::vector_length(n->in(2)) == 2); 5472 match(Set dst (MinReductionV dst src)); 5473 match(Set dst (MaxReductionV dst src)); 5474 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5475 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5476 ins_encode %{ 5477 assert(UseAVX > 0, "sanity"); 5478 5479 int opcode = this->ideal_Opcode(); 5480 int vlen = Matcher::vector_length(this, $src); 5481 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5482 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5483 %} 5484 ins_pipe( pipe_slow ); 5485 %} 5486 5487 instruct minmax_reductionD_av(legRegD dst, legVec src, 5488 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5489 rFlagsReg cr) %{ 5490 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5491 Matcher::vector_length(n->in(2)) >= 4); 5492 match(Set dst (MinReductionV dst src)); 5493 match(Set dst (MaxReductionV dst src)); 5494 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5495 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5496 ins_encode %{ 5497 assert(UseAVX > 0, "sanity"); 5498 5499 int opcode = this->ideal_Opcode(); 5500 int vlen = Matcher::vector_length(this, $src); 5501 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5502 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5503 %} 5504 ins_pipe( pipe_slow ); 5505 %} 5506 5507 // ====================VECTOR ARITHMETIC======================================= 5508 5509 // --------------------------------- ADD -------------------------------------- 5510 5511 // Bytes vector add 5512 instruct vaddB(vec dst, vec src) %{ 5513 predicate(UseAVX == 0); 5514 match(Set dst (AddVB dst src)); 5515 format %{ "paddb $dst,$src\t! add packedB" %} 5516 ins_encode %{ 5517 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5518 %} 5519 ins_pipe( pipe_slow ); 5520 %} 5521 5522 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5523 predicate(UseAVX > 0); 5524 match(Set dst (AddVB src1 src2)); 5525 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5526 ins_encode %{ 5527 int vlen_enc = vector_length_encoding(this); 5528 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5529 %} 5530 ins_pipe( pipe_slow ); 5531 %} 5532 5533 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5534 predicate((UseAVX > 0) && 5535 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5536 match(Set dst (AddVB src (LoadVector mem))); 5537 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5538 ins_encode %{ 5539 int vlen_enc = vector_length_encoding(this); 5540 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5541 %} 5542 ins_pipe( pipe_slow ); 5543 %} 5544 5545 // Shorts/Chars vector add 5546 instruct vaddS(vec dst, vec src) %{ 5547 predicate(UseAVX == 0); 5548 match(Set dst (AddVS dst src)); 5549 format %{ "paddw $dst,$src\t! add packedS" %} 5550 ins_encode %{ 5551 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5552 %} 5553 ins_pipe( pipe_slow ); 5554 %} 5555 5556 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5557 predicate(UseAVX > 0); 5558 match(Set dst (AddVS src1 src2)); 5559 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5560 ins_encode %{ 5561 int vlen_enc = vector_length_encoding(this); 5562 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5563 %} 5564 ins_pipe( pipe_slow ); 5565 %} 5566 5567 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5568 predicate((UseAVX > 0) && 5569 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5570 match(Set dst (AddVS src (LoadVector mem))); 5571 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5572 ins_encode %{ 5573 int vlen_enc = vector_length_encoding(this); 5574 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5575 %} 5576 ins_pipe( pipe_slow ); 5577 %} 5578 5579 // Integers vector add 5580 instruct vaddI(vec dst, vec src) %{ 5581 predicate(UseAVX == 0); 5582 match(Set dst (AddVI dst src)); 5583 format %{ "paddd $dst,$src\t! add packedI" %} 5584 ins_encode %{ 5585 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5586 %} 5587 ins_pipe( pipe_slow ); 5588 %} 5589 5590 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5591 predicate(UseAVX > 0); 5592 match(Set dst (AddVI src1 src2)); 5593 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5594 ins_encode %{ 5595 int vlen_enc = vector_length_encoding(this); 5596 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5597 %} 5598 ins_pipe( pipe_slow ); 5599 %} 5600 5601 5602 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5603 predicate((UseAVX > 0) && 5604 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5605 match(Set dst (AddVI src (LoadVector mem))); 5606 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5607 ins_encode %{ 5608 int vlen_enc = vector_length_encoding(this); 5609 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5610 %} 5611 ins_pipe( pipe_slow ); 5612 %} 5613 5614 // Longs vector add 5615 instruct vaddL(vec dst, vec src) %{ 5616 predicate(UseAVX == 0); 5617 match(Set dst (AddVL dst src)); 5618 format %{ "paddq $dst,$src\t! add packedL" %} 5619 ins_encode %{ 5620 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5621 %} 5622 ins_pipe( pipe_slow ); 5623 %} 5624 5625 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5626 predicate(UseAVX > 0); 5627 match(Set dst (AddVL src1 src2)); 5628 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5629 ins_encode %{ 5630 int vlen_enc = vector_length_encoding(this); 5631 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5632 %} 5633 ins_pipe( pipe_slow ); 5634 %} 5635 5636 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5637 predicate((UseAVX > 0) && 5638 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5639 match(Set dst (AddVL src (LoadVector mem))); 5640 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5641 ins_encode %{ 5642 int vlen_enc = vector_length_encoding(this); 5643 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5644 %} 5645 ins_pipe( pipe_slow ); 5646 %} 5647 5648 // Floats vector add 5649 instruct vaddF(vec dst, vec src) %{ 5650 predicate(UseAVX == 0); 5651 match(Set dst (AddVF dst src)); 5652 format %{ "addps $dst,$src\t! add packedF" %} 5653 ins_encode %{ 5654 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5655 %} 5656 ins_pipe( pipe_slow ); 5657 %} 5658 5659 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5660 predicate(UseAVX > 0); 5661 match(Set dst (AddVF src1 src2)); 5662 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5663 ins_encode %{ 5664 int vlen_enc = vector_length_encoding(this); 5665 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5666 %} 5667 ins_pipe( pipe_slow ); 5668 %} 5669 5670 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5671 predicate((UseAVX > 0) && 5672 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5673 match(Set dst (AddVF src (LoadVector mem))); 5674 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5675 ins_encode %{ 5676 int vlen_enc = vector_length_encoding(this); 5677 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5678 %} 5679 ins_pipe( pipe_slow ); 5680 %} 5681 5682 // Doubles vector add 5683 instruct vaddD(vec dst, vec src) %{ 5684 predicate(UseAVX == 0); 5685 match(Set dst (AddVD dst src)); 5686 format %{ "addpd $dst,$src\t! add packedD" %} 5687 ins_encode %{ 5688 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5689 %} 5690 ins_pipe( pipe_slow ); 5691 %} 5692 5693 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5694 predicate(UseAVX > 0); 5695 match(Set dst (AddVD src1 src2)); 5696 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5697 ins_encode %{ 5698 int vlen_enc = vector_length_encoding(this); 5699 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5700 %} 5701 ins_pipe( pipe_slow ); 5702 %} 5703 5704 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5705 predicate((UseAVX > 0) && 5706 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5707 match(Set dst (AddVD src (LoadVector mem))); 5708 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5709 ins_encode %{ 5710 int vlen_enc = vector_length_encoding(this); 5711 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5712 %} 5713 ins_pipe( pipe_slow ); 5714 %} 5715 5716 // --------------------------------- SUB -------------------------------------- 5717 5718 // Bytes vector sub 5719 instruct vsubB(vec dst, vec src) %{ 5720 predicate(UseAVX == 0); 5721 match(Set dst (SubVB dst src)); 5722 format %{ "psubb $dst,$src\t! sub packedB" %} 5723 ins_encode %{ 5724 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5725 %} 5726 ins_pipe( pipe_slow ); 5727 %} 5728 5729 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5730 predicate(UseAVX > 0); 5731 match(Set dst (SubVB src1 src2)); 5732 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5733 ins_encode %{ 5734 int vlen_enc = vector_length_encoding(this); 5735 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5736 %} 5737 ins_pipe( pipe_slow ); 5738 %} 5739 5740 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5741 predicate((UseAVX > 0) && 5742 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5743 match(Set dst (SubVB src (LoadVector mem))); 5744 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5745 ins_encode %{ 5746 int vlen_enc = vector_length_encoding(this); 5747 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5748 %} 5749 ins_pipe( pipe_slow ); 5750 %} 5751 5752 // Shorts/Chars vector sub 5753 instruct vsubS(vec dst, vec src) %{ 5754 predicate(UseAVX == 0); 5755 match(Set dst (SubVS dst src)); 5756 format %{ "psubw $dst,$src\t! sub packedS" %} 5757 ins_encode %{ 5758 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5759 %} 5760 ins_pipe( pipe_slow ); 5761 %} 5762 5763 5764 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5765 predicate(UseAVX > 0); 5766 match(Set dst (SubVS src1 src2)); 5767 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5768 ins_encode %{ 5769 int vlen_enc = vector_length_encoding(this); 5770 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5771 %} 5772 ins_pipe( pipe_slow ); 5773 %} 5774 5775 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5776 predicate((UseAVX > 0) && 5777 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5778 match(Set dst (SubVS src (LoadVector mem))); 5779 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5780 ins_encode %{ 5781 int vlen_enc = vector_length_encoding(this); 5782 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5783 %} 5784 ins_pipe( pipe_slow ); 5785 %} 5786 5787 // Integers vector sub 5788 instruct vsubI(vec dst, vec src) %{ 5789 predicate(UseAVX == 0); 5790 match(Set dst (SubVI dst src)); 5791 format %{ "psubd $dst,$src\t! sub packedI" %} 5792 ins_encode %{ 5793 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5794 %} 5795 ins_pipe( pipe_slow ); 5796 %} 5797 5798 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5799 predicate(UseAVX > 0); 5800 match(Set dst (SubVI src1 src2)); 5801 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5802 ins_encode %{ 5803 int vlen_enc = vector_length_encoding(this); 5804 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5805 %} 5806 ins_pipe( pipe_slow ); 5807 %} 5808 5809 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5810 predicate((UseAVX > 0) && 5811 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5812 match(Set dst (SubVI src (LoadVector mem))); 5813 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5814 ins_encode %{ 5815 int vlen_enc = vector_length_encoding(this); 5816 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5817 %} 5818 ins_pipe( pipe_slow ); 5819 %} 5820 5821 // Longs vector sub 5822 instruct vsubL(vec dst, vec src) %{ 5823 predicate(UseAVX == 0); 5824 match(Set dst (SubVL dst src)); 5825 format %{ "psubq $dst,$src\t! sub packedL" %} 5826 ins_encode %{ 5827 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5828 %} 5829 ins_pipe( pipe_slow ); 5830 %} 5831 5832 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5833 predicate(UseAVX > 0); 5834 match(Set dst (SubVL src1 src2)); 5835 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5836 ins_encode %{ 5837 int vlen_enc = vector_length_encoding(this); 5838 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5839 %} 5840 ins_pipe( pipe_slow ); 5841 %} 5842 5843 5844 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5845 predicate((UseAVX > 0) && 5846 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5847 match(Set dst (SubVL src (LoadVector mem))); 5848 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5849 ins_encode %{ 5850 int vlen_enc = vector_length_encoding(this); 5851 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5852 %} 5853 ins_pipe( pipe_slow ); 5854 %} 5855 5856 // Floats vector sub 5857 instruct vsubF(vec dst, vec src) %{ 5858 predicate(UseAVX == 0); 5859 match(Set dst (SubVF dst src)); 5860 format %{ "subps $dst,$src\t! sub packedF" %} 5861 ins_encode %{ 5862 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5863 %} 5864 ins_pipe( pipe_slow ); 5865 %} 5866 5867 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5868 predicate(UseAVX > 0); 5869 match(Set dst (SubVF src1 src2)); 5870 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5871 ins_encode %{ 5872 int vlen_enc = vector_length_encoding(this); 5873 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5874 %} 5875 ins_pipe( pipe_slow ); 5876 %} 5877 5878 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5879 predicate((UseAVX > 0) && 5880 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5881 match(Set dst (SubVF src (LoadVector mem))); 5882 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5883 ins_encode %{ 5884 int vlen_enc = vector_length_encoding(this); 5885 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5886 %} 5887 ins_pipe( pipe_slow ); 5888 %} 5889 5890 // Doubles vector sub 5891 instruct vsubD(vec dst, vec src) %{ 5892 predicate(UseAVX == 0); 5893 match(Set dst (SubVD dst src)); 5894 format %{ "subpd $dst,$src\t! sub packedD" %} 5895 ins_encode %{ 5896 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5897 %} 5898 ins_pipe( pipe_slow ); 5899 %} 5900 5901 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5902 predicate(UseAVX > 0); 5903 match(Set dst (SubVD src1 src2)); 5904 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5905 ins_encode %{ 5906 int vlen_enc = vector_length_encoding(this); 5907 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5908 %} 5909 ins_pipe( pipe_slow ); 5910 %} 5911 5912 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 5913 predicate((UseAVX > 0) && 5914 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5915 match(Set dst (SubVD src (LoadVector mem))); 5916 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 5917 ins_encode %{ 5918 int vlen_enc = vector_length_encoding(this); 5919 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5920 %} 5921 ins_pipe( pipe_slow ); 5922 %} 5923 5924 // --------------------------------- MUL -------------------------------------- 5925 5926 // Byte vector mul 5927 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 5928 predicate(Matcher::vector_length_in_bytes(n) <= 8); 5929 match(Set dst (MulVB src1 src2)); 5930 effect(TEMP dst, TEMP xtmp); 5931 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5932 ins_encode %{ 5933 assert(UseSSE > 3, "required"); 5934 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 5935 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 5936 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5937 __ psllw($dst$$XMMRegister, 8); 5938 __ psrlw($dst$$XMMRegister, 8); 5939 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5940 %} 5941 ins_pipe( pipe_slow ); 5942 %} 5943 5944 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 5945 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 5946 match(Set dst (MulVB src1 src2)); 5947 effect(TEMP dst, TEMP xtmp); 5948 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5949 ins_encode %{ 5950 assert(UseSSE > 3, "required"); 5951 // Odd-index elements 5952 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 5953 __ psrlw($dst$$XMMRegister, 8); 5954 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 5955 __ psrlw($xtmp$$XMMRegister, 8); 5956 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5957 __ psllw($dst$$XMMRegister, 8); 5958 // Even-index elements 5959 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 5960 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 5961 __ psllw($xtmp$$XMMRegister, 8); 5962 __ psrlw($xtmp$$XMMRegister, 8); 5963 // Combine 5964 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 5965 %} 5966 ins_pipe( pipe_slow ); 5967 %} 5968 5969 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 5970 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 5971 match(Set dst (MulVB src1 src2)); 5972 effect(TEMP xtmp1, TEMP xtmp2); 5973 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 5974 ins_encode %{ 5975 int vlen_enc = vector_length_encoding(this); 5976 // Odd-index elements 5977 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 5978 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 5979 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5980 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 5981 // Even-index elements 5982 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5983 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5984 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5985 // Combine 5986 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5987 %} 5988 ins_pipe( pipe_slow ); 5989 %} 5990 5991 // Shorts/Chars vector mul 5992 instruct vmulS(vec dst, vec src) %{ 5993 predicate(UseAVX == 0); 5994 match(Set dst (MulVS dst src)); 5995 format %{ "pmullw $dst,$src\t! mul packedS" %} 5996 ins_encode %{ 5997 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 5998 %} 5999 ins_pipe( pipe_slow ); 6000 %} 6001 6002 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 6003 predicate(UseAVX > 0); 6004 match(Set dst (MulVS src1 src2)); 6005 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 6006 ins_encode %{ 6007 int vlen_enc = vector_length_encoding(this); 6008 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6009 %} 6010 ins_pipe( pipe_slow ); 6011 %} 6012 6013 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 6014 predicate((UseAVX > 0) && 6015 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6016 match(Set dst (MulVS src (LoadVector mem))); 6017 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 6018 ins_encode %{ 6019 int vlen_enc = vector_length_encoding(this); 6020 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6021 %} 6022 ins_pipe( pipe_slow ); 6023 %} 6024 6025 // Integers vector mul 6026 instruct vmulI(vec dst, vec src) %{ 6027 predicate(UseAVX == 0); 6028 match(Set dst (MulVI dst src)); 6029 format %{ "pmulld $dst,$src\t! mul packedI" %} 6030 ins_encode %{ 6031 assert(UseSSE > 3, "required"); 6032 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6033 %} 6034 ins_pipe( pipe_slow ); 6035 %} 6036 6037 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 6038 predicate(UseAVX > 0); 6039 match(Set dst (MulVI src1 src2)); 6040 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 6041 ins_encode %{ 6042 int vlen_enc = vector_length_encoding(this); 6043 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6044 %} 6045 ins_pipe( pipe_slow ); 6046 %} 6047 6048 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 6049 predicate((UseAVX > 0) && 6050 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6051 match(Set dst (MulVI src (LoadVector mem))); 6052 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 6053 ins_encode %{ 6054 int vlen_enc = vector_length_encoding(this); 6055 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6056 %} 6057 ins_pipe( pipe_slow ); 6058 %} 6059 6060 // Longs vector mul 6061 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 6062 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6063 VM_Version::supports_avx512dq()) || 6064 VM_Version::supports_avx512vldq()); 6065 match(Set dst (MulVL src1 src2)); 6066 ins_cost(500); 6067 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 6068 ins_encode %{ 6069 assert(UseAVX > 2, "required"); 6070 int vlen_enc = vector_length_encoding(this); 6071 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6072 %} 6073 ins_pipe( pipe_slow ); 6074 %} 6075 6076 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 6077 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6078 VM_Version::supports_avx512dq()) || 6079 (Matcher::vector_length_in_bytes(n) > 8 && 6080 VM_Version::supports_avx512vldq())); 6081 match(Set dst (MulVL src (LoadVector mem))); 6082 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 6083 ins_cost(500); 6084 ins_encode %{ 6085 assert(UseAVX > 2, "required"); 6086 int vlen_enc = vector_length_encoding(this); 6087 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6088 %} 6089 ins_pipe( pipe_slow ); 6090 %} 6091 6092 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 6093 predicate(UseAVX == 0); 6094 match(Set dst (MulVL src1 src2)); 6095 ins_cost(500); 6096 effect(TEMP dst, TEMP xtmp); 6097 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6098 ins_encode %{ 6099 assert(VM_Version::supports_sse4_1(), "required"); 6100 // Get the lo-hi products, only the lower 32 bits is in concerns 6101 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 6102 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 6103 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 6104 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 6105 __ psllq($dst$$XMMRegister, 32); 6106 // Get the lo-lo products 6107 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6108 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 6109 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 6110 %} 6111 ins_pipe( pipe_slow ); 6112 %} 6113 6114 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6115 predicate(UseAVX > 0 && 6116 ((Matcher::vector_length_in_bytes(n) == 64 && 6117 !VM_Version::supports_avx512dq()) || 6118 (Matcher::vector_length_in_bytes(n) < 64 && 6119 !VM_Version::supports_avx512vldq()))); 6120 match(Set dst (MulVL src1 src2)); 6121 effect(TEMP xtmp1, TEMP xtmp2); 6122 ins_cost(500); 6123 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6124 ins_encode %{ 6125 int vlen_enc = vector_length_encoding(this); 6126 // Get the lo-hi products, only the lower 32 bits is in concerns 6127 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 6128 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6129 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 6130 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6131 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 6132 // Get the lo-lo products 6133 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6134 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6135 %} 6136 ins_pipe( pipe_slow ); 6137 %} 6138 6139 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{ 6140 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs()); 6141 match(Set dst (MulVL src1 src2)); 6142 ins_cost(100); 6143 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %} 6144 ins_encode %{ 6145 int vlen_enc = vector_length_encoding(this); 6146 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6147 %} 6148 ins_pipe( pipe_slow ); 6149 %} 6150 6151 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{ 6152 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs()); 6153 match(Set dst (MulVL src1 src2)); 6154 ins_cost(100); 6155 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %} 6156 ins_encode %{ 6157 int vlen_enc = vector_length_encoding(this); 6158 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6159 %} 6160 ins_pipe( pipe_slow ); 6161 %} 6162 6163 // Floats vector mul 6164 instruct vmulF(vec dst, vec src) %{ 6165 predicate(UseAVX == 0); 6166 match(Set dst (MulVF dst src)); 6167 format %{ "mulps $dst,$src\t! mul packedF" %} 6168 ins_encode %{ 6169 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6170 %} 6171 ins_pipe( pipe_slow ); 6172 %} 6173 6174 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 6175 predicate(UseAVX > 0); 6176 match(Set dst (MulVF src1 src2)); 6177 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 6178 ins_encode %{ 6179 int vlen_enc = vector_length_encoding(this); 6180 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6181 %} 6182 ins_pipe( pipe_slow ); 6183 %} 6184 6185 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 6186 predicate((UseAVX > 0) && 6187 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6188 match(Set dst (MulVF src (LoadVector mem))); 6189 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 6190 ins_encode %{ 6191 int vlen_enc = vector_length_encoding(this); 6192 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6193 %} 6194 ins_pipe( pipe_slow ); 6195 %} 6196 6197 // Doubles vector mul 6198 instruct vmulD(vec dst, vec src) %{ 6199 predicate(UseAVX == 0); 6200 match(Set dst (MulVD dst src)); 6201 format %{ "mulpd $dst,$src\t! mul packedD" %} 6202 ins_encode %{ 6203 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6204 %} 6205 ins_pipe( pipe_slow ); 6206 %} 6207 6208 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 6209 predicate(UseAVX > 0); 6210 match(Set dst (MulVD src1 src2)); 6211 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 6212 ins_encode %{ 6213 int vlen_enc = vector_length_encoding(this); 6214 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6215 %} 6216 ins_pipe( pipe_slow ); 6217 %} 6218 6219 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 6220 predicate((UseAVX > 0) && 6221 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6222 match(Set dst (MulVD src (LoadVector mem))); 6223 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 6224 ins_encode %{ 6225 int vlen_enc = vector_length_encoding(this); 6226 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6227 %} 6228 ins_pipe( pipe_slow ); 6229 %} 6230 6231 // --------------------------------- DIV -------------------------------------- 6232 6233 // Floats vector div 6234 instruct vdivF(vec dst, vec src) %{ 6235 predicate(UseAVX == 0); 6236 match(Set dst (DivVF dst src)); 6237 format %{ "divps $dst,$src\t! div packedF" %} 6238 ins_encode %{ 6239 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6240 %} 6241 ins_pipe( pipe_slow ); 6242 %} 6243 6244 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 6245 predicate(UseAVX > 0); 6246 match(Set dst (DivVF src1 src2)); 6247 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 6248 ins_encode %{ 6249 int vlen_enc = vector_length_encoding(this); 6250 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6251 %} 6252 ins_pipe( pipe_slow ); 6253 %} 6254 6255 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6256 predicate((UseAVX > 0) && 6257 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6258 match(Set dst (DivVF src (LoadVector mem))); 6259 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6260 ins_encode %{ 6261 int vlen_enc = vector_length_encoding(this); 6262 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6263 %} 6264 ins_pipe( pipe_slow ); 6265 %} 6266 6267 // Doubles vector div 6268 instruct vdivD(vec dst, vec src) %{ 6269 predicate(UseAVX == 0); 6270 match(Set dst (DivVD dst src)); 6271 format %{ "divpd $dst,$src\t! div packedD" %} 6272 ins_encode %{ 6273 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6274 %} 6275 ins_pipe( pipe_slow ); 6276 %} 6277 6278 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6279 predicate(UseAVX > 0); 6280 match(Set dst (DivVD src1 src2)); 6281 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6282 ins_encode %{ 6283 int vlen_enc = vector_length_encoding(this); 6284 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6285 %} 6286 ins_pipe( pipe_slow ); 6287 %} 6288 6289 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6290 predicate((UseAVX > 0) && 6291 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6292 match(Set dst (DivVD src (LoadVector mem))); 6293 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6294 ins_encode %{ 6295 int vlen_enc = vector_length_encoding(this); 6296 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6297 %} 6298 ins_pipe( pipe_slow ); 6299 %} 6300 6301 // ------------------------------ MinMax --------------------------------------- 6302 6303 // Byte, Short, Int vector Min/Max 6304 instruct minmax_reg_sse(vec dst, vec src) %{ 6305 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6306 UseAVX == 0); 6307 match(Set dst (MinV dst src)); 6308 match(Set dst (MaxV dst src)); 6309 format %{ "vector_minmax $dst,$src\t! " %} 6310 ins_encode %{ 6311 assert(UseSSE >= 4, "required"); 6312 6313 int opcode = this->ideal_Opcode(); 6314 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6315 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6316 %} 6317 ins_pipe( pipe_slow ); 6318 %} 6319 6320 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6321 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6322 UseAVX > 0); 6323 match(Set dst (MinV src1 src2)); 6324 match(Set dst (MaxV src1 src2)); 6325 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6326 ins_encode %{ 6327 int opcode = this->ideal_Opcode(); 6328 int vlen_enc = vector_length_encoding(this); 6329 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6330 6331 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6332 %} 6333 ins_pipe( pipe_slow ); 6334 %} 6335 6336 // Long vector Min/Max 6337 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6338 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6339 UseAVX == 0); 6340 match(Set dst (MinV dst src)); 6341 match(Set dst (MaxV src dst)); 6342 effect(TEMP dst, TEMP tmp); 6343 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6344 ins_encode %{ 6345 assert(UseSSE >= 4, "required"); 6346 6347 int opcode = this->ideal_Opcode(); 6348 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6349 assert(elem_bt == T_LONG, "sanity"); 6350 6351 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6352 %} 6353 ins_pipe( pipe_slow ); 6354 %} 6355 6356 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6357 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6358 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6359 match(Set dst (MinV src1 src2)); 6360 match(Set dst (MaxV src1 src2)); 6361 effect(TEMP dst); 6362 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6363 ins_encode %{ 6364 int vlen_enc = vector_length_encoding(this); 6365 int opcode = this->ideal_Opcode(); 6366 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6367 assert(elem_bt == T_LONG, "sanity"); 6368 6369 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6370 %} 6371 ins_pipe( pipe_slow ); 6372 %} 6373 6374 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6375 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6376 Matcher::vector_element_basic_type(n) == T_LONG); 6377 match(Set dst (MinV src1 src2)); 6378 match(Set dst (MaxV src1 src2)); 6379 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6380 ins_encode %{ 6381 assert(UseAVX > 2, "required"); 6382 6383 int vlen_enc = vector_length_encoding(this); 6384 int opcode = this->ideal_Opcode(); 6385 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6386 assert(elem_bt == T_LONG, "sanity"); 6387 6388 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6389 %} 6390 ins_pipe( pipe_slow ); 6391 %} 6392 6393 // Float/Double vector Min/Max 6394 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6395 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6396 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6397 UseAVX > 0); 6398 match(Set dst (MinV a b)); 6399 match(Set dst (MaxV a b)); 6400 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6401 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6402 ins_encode %{ 6403 assert(UseAVX > 0, "required"); 6404 6405 int opcode = this->ideal_Opcode(); 6406 int vlen_enc = vector_length_encoding(this); 6407 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6408 6409 __ vminmax_fp(opcode, elem_bt, 6410 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6411 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6412 %} 6413 ins_pipe( pipe_slow ); 6414 %} 6415 6416 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6417 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6418 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6419 match(Set dst (MinV a b)); 6420 match(Set dst (MaxV a b)); 6421 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6422 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6423 ins_encode %{ 6424 assert(UseAVX > 2, "required"); 6425 6426 int opcode = this->ideal_Opcode(); 6427 int vlen_enc = vector_length_encoding(this); 6428 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6429 6430 __ evminmax_fp(opcode, elem_bt, 6431 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6432 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6433 %} 6434 ins_pipe( pipe_slow ); 6435 %} 6436 6437 // ------------------------------ Unsigned vector Min/Max ---------------------- 6438 6439 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{ 6440 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6441 match(Set dst (UMinV a b)); 6442 match(Set dst (UMaxV a b)); 6443 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6444 ins_encode %{ 6445 int opcode = this->ideal_Opcode(); 6446 int vlen_enc = vector_length_encoding(this); 6447 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6448 assert(is_integral_type(elem_bt), ""); 6449 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); 6450 %} 6451 ins_pipe( pipe_slow ); 6452 %} 6453 6454 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{ 6455 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6456 match(Set dst (UMinV a (LoadVector b))); 6457 match(Set dst (UMaxV a (LoadVector b))); 6458 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6459 ins_encode %{ 6460 int opcode = this->ideal_Opcode(); 6461 int vlen_enc = vector_length_encoding(this); 6462 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6463 assert(is_integral_type(elem_bt), ""); 6464 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc); 6465 %} 6466 ins_pipe( pipe_slow ); 6467 %} 6468 6469 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{ 6470 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG); 6471 match(Set dst (UMinV a b)); 6472 match(Set dst (UMaxV a b)); 6473 effect(TEMP xtmp1, TEMP xtmp2); 6474 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %} 6475 ins_encode %{ 6476 int opcode = this->ideal_Opcode(); 6477 int vlen_enc = vector_length_encoding(this); 6478 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6479 %} 6480 ins_pipe( pipe_slow ); 6481 %} 6482 6483 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{ 6484 match(Set dst (UMinV (Binary dst src2) mask)); 6485 match(Set dst (UMaxV (Binary dst src2) mask)); 6486 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6487 ins_encode %{ 6488 int vlen_enc = vector_length_encoding(this); 6489 BasicType bt = Matcher::vector_element_basic_type(this); 6490 int opc = this->ideal_Opcode(); 6491 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6492 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 6493 %} 6494 ins_pipe( pipe_slow ); 6495 %} 6496 6497 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{ 6498 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask)); 6499 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask)); 6500 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6501 ins_encode %{ 6502 int vlen_enc = vector_length_encoding(this); 6503 BasicType bt = Matcher::vector_element_basic_type(this); 6504 int opc = this->ideal_Opcode(); 6505 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6506 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 6507 %} 6508 ins_pipe( pipe_slow ); 6509 %} 6510 6511 // --------------------------------- Signum/CopySign --------------------------- 6512 6513 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6514 match(Set dst (SignumF dst (Binary zero one))); 6515 effect(KILL cr); 6516 format %{ "signumF $dst, $dst" %} 6517 ins_encode %{ 6518 int opcode = this->ideal_Opcode(); 6519 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6520 %} 6521 ins_pipe( pipe_slow ); 6522 %} 6523 6524 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6525 match(Set dst (SignumD dst (Binary zero one))); 6526 effect(KILL cr); 6527 format %{ "signumD $dst, $dst" %} 6528 ins_encode %{ 6529 int opcode = this->ideal_Opcode(); 6530 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6531 %} 6532 ins_pipe( pipe_slow ); 6533 %} 6534 6535 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6536 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6537 match(Set dst (SignumVF src (Binary zero one))); 6538 match(Set dst (SignumVD src (Binary zero one))); 6539 effect(TEMP dst, TEMP xtmp1); 6540 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6541 ins_encode %{ 6542 int opcode = this->ideal_Opcode(); 6543 int vec_enc = vector_length_encoding(this); 6544 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6545 $xtmp1$$XMMRegister, vec_enc); 6546 %} 6547 ins_pipe( pipe_slow ); 6548 %} 6549 6550 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6551 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6552 match(Set dst (SignumVF src (Binary zero one))); 6553 match(Set dst (SignumVD src (Binary zero one))); 6554 effect(TEMP dst, TEMP ktmp1); 6555 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6556 ins_encode %{ 6557 int opcode = this->ideal_Opcode(); 6558 int vec_enc = vector_length_encoding(this); 6559 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6560 $ktmp1$$KRegister, vec_enc); 6561 %} 6562 ins_pipe( pipe_slow ); 6563 %} 6564 6565 // --------------------------------------- 6566 // For copySign use 0xE4 as writemask for vpternlog 6567 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6568 // C (xmm2) is set to 0x7FFFFFFF 6569 // Wherever xmm2 is 0, we want to pick from B (sign) 6570 // Wherever xmm2 is 1, we want to pick from A (src) 6571 // 6572 // A B C Result 6573 // 0 0 0 0 6574 // 0 0 1 0 6575 // 0 1 0 1 6576 // 0 1 1 0 6577 // 1 0 0 0 6578 // 1 0 1 1 6579 // 1 1 0 1 6580 // 1 1 1 1 6581 // 6582 // Result going from high bit to low bit is 0x11100100 = 0xe4 6583 // --------------------------------------- 6584 6585 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6586 match(Set dst (CopySignF dst src)); 6587 effect(TEMP tmp1, TEMP tmp2); 6588 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6589 ins_encode %{ 6590 __ movl($tmp2$$Register, 0x7FFFFFFF); 6591 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6592 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6593 %} 6594 ins_pipe( pipe_slow ); 6595 %} 6596 6597 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6598 match(Set dst (CopySignD dst (Binary src zero))); 6599 ins_cost(100); 6600 effect(TEMP tmp1, TEMP tmp2); 6601 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6602 ins_encode %{ 6603 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6604 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6605 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6606 %} 6607 ins_pipe( pipe_slow ); 6608 %} 6609 6610 //----------------------------- CompressBits/ExpandBits ------------------------ 6611 6612 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6613 predicate(n->bottom_type()->isa_int()); 6614 match(Set dst (CompressBits src mask)); 6615 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6616 ins_encode %{ 6617 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6618 %} 6619 ins_pipe( pipe_slow ); 6620 %} 6621 6622 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6623 predicate(n->bottom_type()->isa_int()); 6624 match(Set dst (ExpandBits src mask)); 6625 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6626 ins_encode %{ 6627 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6628 %} 6629 ins_pipe( pipe_slow ); 6630 %} 6631 6632 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6633 predicate(n->bottom_type()->isa_int()); 6634 match(Set dst (CompressBits src (LoadI mask))); 6635 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6636 ins_encode %{ 6637 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6638 %} 6639 ins_pipe( pipe_slow ); 6640 %} 6641 6642 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6643 predicate(n->bottom_type()->isa_int()); 6644 match(Set dst (ExpandBits src (LoadI mask))); 6645 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6646 ins_encode %{ 6647 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6648 %} 6649 ins_pipe( pipe_slow ); 6650 %} 6651 6652 // --------------------------------- Sqrt -------------------------------------- 6653 6654 instruct vsqrtF_reg(vec dst, vec src) %{ 6655 match(Set dst (SqrtVF src)); 6656 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6657 ins_encode %{ 6658 assert(UseAVX > 0, "required"); 6659 int vlen_enc = vector_length_encoding(this); 6660 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6661 %} 6662 ins_pipe( pipe_slow ); 6663 %} 6664 6665 instruct vsqrtF_mem(vec dst, memory mem) %{ 6666 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6667 match(Set dst (SqrtVF (LoadVector mem))); 6668 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6669 ins_encode %{ 6670 assert(UseAVX > 0, "required"); 6671 int vlen_enc = vector_length_encoding(this); 6672 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6673 %} 6674 ins_pipe( pipe_slow ); 6675 %} 6676 6677 // Floating point vector sqrt 6678 instruct vsqrtD_reg(vec dst, vec src) %{ 6679 match(Set dst (SqrtVD src)); 6680 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6681 ins_encode %{ 6682 assert(UseAVX > 0, "required"); 6683 int vlen_enc = vector_length_encoding(this); 6684 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6685 %} 6686 ins_pipe( pipe_slow ); 6687 %} 6688 6689 instruct vsqrtD_mem(vec dst, memory mem) %{ 6690 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6691 match(Set dst (SqrtVD (LoadVector mem))); 6692 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6693 ins_encode %{ 6694 assert(UseAVX > 0, "required"); 6695 int vlen_enc = vector_length_encoding(this); 6696 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6697 %} 6698 ins_pipe( pipe_slow ); 6699 %} 6700 6701 // ------------------------------ Shift --------------------------------------- 6702 6703 // Left and right shift count vectors are the same on x86 6704 // (only lowest bits of xmm reg are used for count). 6705 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6706 match(Set dst (LShiftCntV cnt)); 6707 match(Set dst (RShiftCntV cnt)); 6708 format %{ "movdl $dst,$cnt\t! load shift count" %} 6709 ins_encode %{ 6710 __ movdl($dst$$XMMRegister, $cnt$$Register); 6711 %} 6712 ins_pipe( pipe_slow ); 6713 %} 6714 6715 // Byte vector shift 6716 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6717 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6718 match(Set dst ( LShiftVB src shift)); 6719 match(Set dst ( RShiftVB src shift)); 6720 match(Set dst (URShiftVB src shift)); 6721 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6722 format %{"vector_byte_shift $dst,$src,$shift" %} 6723 ins_encode %{ 6724 assert(UseSSE > 3, "required"); 6725 int opcode = this->ideal_Opcode(); 6726 bool sign = (opcode != Op_URShiftVB); 6727 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6728 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6729 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6730 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6731 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6732 %} 6733 ins_pipe( pipe_slow ); 6734 %} 6735 6736 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6737 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6738 UseAVX <= 1); 6739 match(Set dst ( LShiftVB src shift)); 6740 match(Set dst ( RShiftVB src shift)); 6741 match(Set dst (URShiftVB src shift)); 6742 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6743 format %{"vector_byte_shift $dst,$src,$shift" %} 6744 ins_encode %{ 6745 assert(UseSSE > 3, "required"); 6746 int opcode = this->ideal_Opcode(); 6747 bool sign = (opcode != Op_URShiftVB); 6748 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6749 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6750 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6751 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6752 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6753 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6754 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6755 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6756 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6757 %} 6758 ins_pipe( pipe_slow ); 6759 %} 6760 6761 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6762 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6763 UseAVX > 1); 6764 match(Set dst ( LShiftVB src shift)); 6765 match(Set dst ( RShiftVB src shift)); 6766 match(Set dst (URShiftVB src shift)); 6767 effect(TEMP dst, TEMP tmp); 6768 format %{"vector_byte_shift $dst,$src,$shift" %} 6769 ins_encode %{ 6770 int opcode = this->ideal_Opcode(); 6771 bool sign = (opcode != Op_URShiftVB); 6772 int vlen_enc = Assembler::AVX_256bit; 6773 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6774 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6775 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6776 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6777 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6778 %} 6779 ins_pipe( pipe_slow ); 6780 %} 6781 6782 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6783 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6784 match(Set dst ( LShiftVB src shift)); 6785 match(Set dst ( RShiftVB src shift)); 6786 match(Set dst (URShiftVB src shift)); 6787 effect(TEMP dst, TEMP tmp); 6788 format %{"vector_byte_shift $dst,$src,$shift" %} 6789 ins_encode %{ 6790 assert(UseAVX > 1, "required"); 6791 int opcode = this->ideal_Opcode(); 6792 bool sign = (opcode != Op_URShiftVB); 6793 int vlen_enc = Assembler::AVX_256bit; 6794 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6795 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6796 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6797 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6798 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6799 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6800 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6801 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6802 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6803 %} 6804 ins_pipe( pipe_slow ); 6805 %} 6806 6807 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6808 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6809 match(Set dst ( LShiftVB src shift)); 6810 match(Set dst (RShiftVB src shift)); 6811 match(Set dst (URShiftVB src shift)); 6812 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6813 format %{"vector_byte_shift $dst,$src,$shift" %} 6814 ins_encode %{ 6815 assert(UseAVX > 2, "required"); 6816 int opcode = this->ideal_Opcode(); 6817 bool sign = (opcode != Op_URShiftVB); 6818 int vlen_enc = Assembler::AVX_512bit; 6819 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6820 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6821 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6822 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6823 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6824 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6825 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6826 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6827 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6828 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6829 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 6830 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6831 %} 6832 ins_pipe( pipe_slow ); 6833 %} 6834 6835 // Shorts vector logical right shift produces incorrect Java result 6836 // for negative data because java code convert short value into int with 6837 // sign extension before a shift. But char vectors are fine since chars are 6838 // unsigned values. 6839 // Shorts/Chars vector left shift 6840 instruct vshiftS(vec dst, vec src, vec shift) %{ 6841 predicate(!n->as_ShiftV()->is_var_shift()); 6842 match(Set dst ( LShiftVS src shift)); 6843 match(Set dst ( RShiftVS src shift)); 6844 match(Set dst (URShiftVS src shift)); 6845 effect(TEMP dst, USE src, USE shift); 6846 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6847 ins_encode %{ 6848 int opcode = this->ideal_Opcode(); 6849 if (UseAVX > 0) { 6850 int vlen_enc = vector_length_encoding(this); 6851 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6852 } else { 6853 int vlen = Matcher::vector_length(this); 6854 if (vlen == 2) { 6855 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6856 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6857 } else if (vlen == 4) { 6858 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6859 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6860 } else { 6861 assert (vlen == 8, "sanity"); 6862 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6863 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6864 } 6865 } 6866 %} 6867 ins_pipe( pipe_slow ); 6868 %} 6869 6870 // Integers vector left shift 6871 instruct vshiftI(vec dst, vec src, vec shift) %{ 6872 predicate(!n->as_ShiftV()->is_var_shift()); 6873 match(Set dst ( LShiftVI src shift)); 6874 match(Set dst ( RShiftVI src shift)); 6875 match(Set dst (URShiftVI src shift)); 6876 effect(TEMP dst, USE src, USE shift); 6877 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6878 ins_encode %{ 6879 int opcode = this->ideal_Opcode(); 6880 if (UseAVX > 0) { 6881 int vlen_enc = vector_length_encoding(this); 6882 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6883 } else { 6884 int vlen = Matcher::vector_length(this); 6885 if (vlen == 2) { 6886 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6887 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6888 } else { 6889 assert(vlen == 4, "sanity"); 6890 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6891 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6892 } 6893 } 6894 %} 6895 ins_pipe( pipe_slow ); 6896 %} 6897 6898 // Integers vector left constant shift 6899 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6900 match(Set dst (LShiftVI src (LShiftCntV shift))); 6901 match(Set dst (RShiftVI src (RShiftCntV shift))); 6902 match(Set dst (URShiftVI src (RShiftCntV shift))); 6903 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6904 ins_encode %{ 6905 int opcode = this->ideal_Opcode(); 6906 if (UseAVX > 0) { 6907 int vector_len = vector_length_encoding(this); 6908 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6909 } else { 6910 int vlen = Matcher::vector_length(this); 6911 if (vlen == 2) { 6912 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6913 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6914 } else { 6915 assert(vlen == 4, "sanity"); 6916 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6917 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6918 } 6919 } 6920 %} 6921 ins_pipe( pipe_slow ); 6922 %} 6923 6924 // Longs vector shift 6925 instruct vshiftL(vec dst, vec src, vec shift) %{ 6926 predicate(!n->as_ShiftV()->is_var_shift()); 6927 match(Set dst ( LShiftVL src shift)); 6928 match(Set dst (URShiftVL src shift)); 6929 effect(TEMP dst, USE src, USE shift); 6930 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6931 ins_encode %{ 6932 int opcode = this->ideal_Opcode(); 6933 if (UseAVX > 0) { 6934 int vlen_enc = vector_length_encoding(this); 6935 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6936 } else { 6937 assert(Matcher::vector_length(this) == 2, ""); 6938 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6939 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6940 } 6941 %} 6942 ins_pipe( pipe_slow ); 6943 %} 6944 6945 // Longs vector constant shift 6946 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 6947 match(Set dst (LShiftVL src (LShiftCntV shift))); 6948 match(Set dst (URShiftVL src (RShiftCntV shift))); 6949 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 6950 ins_encode %{ 6951 int opcode = this->ideal_Opcode(); 6952 if (UseAVX > 0) { 6953 int vector_len = vector_length_encoding(this); 6954 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6955 } else { 6956 assert(Matcher::vector_length(this) == 2, ""); 6957 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6958 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6959 } 6960 %} 6961 ins_pipe( pipe_slow ); 6962 %} 6963 6964 // -------------------ArithmeticRightShift ----------------------------------- 6965 // Long vector arithmetic right shift 6966 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 6967 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 6968 match(Set dst (RShiftVL src shift)); 6969 effect(TEMP dst, TEMP tmp); 6970 format %{ "vshiftq $dst,$src,$shift" %} 6971 ins_encode %{ 6972 uint vlen = Matcher::vector_length(this); 6973 if (vlen == 2) { 6974 assert(UseSSE >= 2, "required"); 6975 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6976 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6977 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6978 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6979 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6980 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 6981 } else { 6982 assert(vlen == 4, "sanity"); 6983 assert(UseAVX > 1, "required"); 6984 int vlen_enc = Assembler::AVX_256bit; 6985 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6986 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6987 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6988 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6989 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6990 } 6991 %} 6992 ins_pipe( pipe_slow ); 6993 %} 6994 6995 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 6996 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 6997 match(Set dst (RShiftVL src shift)); 6998 format %{ "vshiftq $dst,$src,$shift" %} 6999 ins_encode %{ 7000 int vlen_enc = vector_length_encoding(this); 7001 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7002 %} 7003 ins_pipe( pipe_slow ); 7004 %} 7005 7006 // ------------------- Variable Shift ----------------------------- 7007 // Byte variable shift 7008 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7009 predicate(Matcher::vector_length(n) <= 8 && 7010 n->as_ShiftV()->is_var_shift() && 7011 !VM_Version::supports_avx512bw()); 7012 match(Set dst ( LShiftVB src shift)); 7013 match(Set dst ( RShiftVB src shift)); 7014 match(Set dst (URShiftVB src shift)); 7015 effect(TEMP dst, TEMP vtmp); 7016 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7017 ins_encode %{ 7018 assert(UseAVX >= 2, "required"); 7019 7020 int opcode = this->ideal_Opcode(); 7021 int vlen_enc = Assembler::AVX_128bit; 7022 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7023 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7024 %} 7025 ins_pipe( pipe_slow ); 7026 %} 7027 7028 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7029 predicate(Matcher::vector_length(n) == 16 && 7030 n->as_ShiftV()->is_var_shift() && 7031 !VM_Version::supports_avx512bw()); 7032 match(Set dst ( LShiftVB src shift)); 7033 match(Set dst ( RShiftVB src shift)); 7034 match(Set dst (URShiftVB src shift)); 7035 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7036 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7037 ins_encode %{ 7038 assert(UseAVX >= 2, "required"); 7039 7040 int opcode = this->ideal_Opcode(); 7041 int vlen_enc = Assembler::AVX_128bit; 7042 // Shift lower half and get word result in dst 7043 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7044 7045 // Shift upper half and get word result in vtmp1 7046 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7047 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7048 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7049 7050 // Merge and down convert the two word results to byte in dst 7051 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7052 %} 7053 ins_pipe( pipe_slow ); 7054 %} 7055 7056 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 7057 predicate(Matcher::vector_length(n) == 32 && 7058 n->as_ShiftV()->is_var_shift() && 7059 !VM_Version::supports_avx512bw()); 7060 match(Set dst ( LShiftVB src shift)); 7061 match(Set dst ( RShiftVB src shift)); 7062 match(Set dst (URShiftVB src shift)); 7063 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 7064 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 7065 ins_encode %{ 7066 assert(UseAVX >= 2, "required"); 7067 7068 int opcode = this->ideal_Opcode(); 7069 int vlen_enc = Assembler::AVX_128bit; 7070 // Process lower 128 bits and get result in dst 7071 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7072 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7073 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7074 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7075 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7076 7077 // Process higher 128 bits and get result in vtmp3 7078 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7079 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7080 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 7081 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 7082 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 7083 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7084 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 7085 7086 // Merge the two results in dst 7087 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7088 %} 7089 ins_pipe( pipe_slow ); 7090 %} 7091 7092 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 7093 predicate(Matcher::vector_length(n) <= 32 && 7094 n->as_ShiftV()->is_var_shift() && 7095 VM_Version::supports_avx512bw()); 7096 match(Set dst ( LShiftVB src shift)); 7097 match(Set dst ( RShiftVB src shift)); 7098 match(Set dst (URShiftVB src shift)); 7099 effect(TEMP dst, TEMP vtmp); 7100 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7101 ins_encode %{ 7102 assert(UseAVX > 2, "required"); 7103 7104 int opcode = this->ideal_Opcode(); 7105 int vlen_enc = vector_length_encoding(this); 7106 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7107 %} 7108 ins_pipe( pipe_slow ); 7109 %} 7110 7111 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7112 predicate(Matcher::vector_length(n) == 64 && 7113 n->as_ShiftV()->is_var_shift() && 7114 VM_Version::supports_avx512bw()); 7115 match(Set dst ( LShiftVB src shift)); 7116 match(Set dst ( RShiftVB src shift)); 7117 match(Set dst (URShiftVB src shift)); 7118 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7119 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7120 ins_encode %{ 7121 assert(UseAVX > 2, "required"); 7122 7123 int opcode = this->ideal_Opcode(); 7124 int vlen_enc = Assembler::AVX_256bit; 7125 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7126 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7127 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7128 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7129 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7130 %} 7131 ins_pipe( pipe_slow ); 7132 %} 7133 7134 // Short variable shift 7135 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7136 predicate(Matcher::vector_length(n) <= 8 && 7137 n->as_ShiftV()->is_var_shift() && 7138 !VM_Version::supports_avx512bw()); 7139 match(Set dst ( LShiftVS src shift)); 7140 match(Set dst ( RShiftVS src shift)); 7141 match(Set dst (URShiftVS src shift)); 7142 effect(TEMP dst, TEMP vtmp); 7143 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7144 ins_encode %{ 7145 assert(UseAVX >= 2, "required"); 7146 7147 int opcode = this->ideal_Opcode(); 7148 bool sign = (opcode != Op_URShiftVS); 7149 int vlen_enc = Assembler::AVX_256bit; 7150 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 7151 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 7152 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7153 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7154 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 7155 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7156 %} 7157 ins_pipe( pipe_slow ); 7158 %} 7159 7160 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7161 predicate(Matcher::vector_length(n) == 16 && 7162 n->as_ShiftV()->is_var_shift() && 7163 !VM_Version::supports_avx512bw()); 7164 match(Set dst ( LShiftVS src shift)); 7165 match(Set dst ( RShiftVS src shift)); 7166 match(Set dst (URShiftVS src shift)); 7167 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7168 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7169 ins_encode %{ 7170 assert(UseAVX >= 2, "required"); 7171 7172 int opcode = this->ideal_Opcode(); 7173 bool sign = (opcode != Op_URShiftVS); 7174 int vlen_enc = Assembler::AVX_256bit; 7175 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 7176 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7177 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7178 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7179 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7180 7181 // Shift upper half, with result in dst using vtmp1 as TEMP 7182 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 7183 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 7184 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7185 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7186 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7187 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7188 7189 // Merge lower and upper half result into dst 7190 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7191 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 7192 %} 7193 ins_pipe( pipe_slow ); 7194 %} 7195 7196 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 7197 predicate(n->as_ShiftV()->is_var_shift() && 7198 VM_Version::supports_avx512bw()); 7199 match(Set dst ( LShiftVS src shift)); 7200 match(Set dst ( RShiftVS src shift)); 7201 match(Set dst (URShiftVS src shift)); 7202 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 7203 ins_encode %{ 7204 assert(UseAVX > 2, "required"); 7205 7206 int opcode = this->ideal_Opcode(); 7207 int vlen_enc = vector_length_encoding(this); 7208 if (!VM_Version::supports_avx512vl()) { 7209 vlen_enc = Assembler::AVX_512bit; 7210 } 7211 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7212 %} 7213 ins_pipe( pipe_slow ); 7214 %} 7215 7216 //Integer variable shift 7217 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 7218 predicate(n->as_ShiftV()->is_var_shift()); 7219 match(Set dst ( LShiftVI src shift)); 7220 match(Set dst ( RShiftVI src shift)); 7221 match(Set dst (URShiftVI src shift)); 7222 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 7223 ins_encode %{ 7224 assert(UseAVX >= 2, "required"); 7225 7226 int opcode = this->ideal_Opcode(); 7227 int vlen_enc = vector_length_encoding(this); 7228 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7229 %} 7230 ins_pipe( pipe_slow ); 7231 %} 7232 7233 //Long variable shift 7234 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 7235 predicate(n->as_ShiftV()->is_var_shift()); 7236 match(Set dst ( LShiftVL src shift)); 7237 match(Set dst (URShiftVL src shift)); 7238 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 7239 ins_encode %{ 7240 assert(UseAVX >= 2, "required"); 7241 7242 int opcode = this->ideal_Opcode(); 7243 int vlen_enc = vector_length_encoding(this); 7244 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7245 %} 7246 ins_pipe( pipe_slow ); 7247 %} 7248 7249 //Long variable right shift arithmetic 7250 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 7251 predicate(Matcher::vector_length(n) <= 4 && 7252 n->as_ShiftV()->is_var_shift() && 7253 UseAVX == 2); 7254 match(Set dst (RShiftVL src shift)); 7255 effect(TEMP dst, TEMP vtmp); 7256 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 7257 ins_encode %{ 7258 int opcode = this->ideal_Opcode(); 7259 int vlen_enc = vector_length_encoding(this); 7260 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 7261 $vtmp$$XMMRegister); 7262 %} 7263 ins_pipe( pipe_slow ); 7264 %} 7265 7266 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 7267 predicate(n->as_ShiftV()->is_var_shift() && 7268 UseAVX > 2); 7269 match(Set dst (RShiftVL src shift)); 7270 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 7271 ins_encode %{ 7272 int opcode = this->ideal_Opcode(); 7273 int vlen_enc = vector_length_encoding(this); 7274 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7275 %} 7276 ins_pipe( pipe_slow ); 7277 %} 7278 7279 // --------------------------------- AND -------------------------------------- 7280 7281 instruct vand(vec dst, vec src) %{ 7282 predicate(UseAVX == 0); 7283 match(Set dst (AndV dst src)); 7284 format %{ "pand $dst,$src\t! and vectors" %} 7285 ins_encode %{ 7286 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7287 %} 7288 ins_pipe( pipe_slow ); 7289 %} 7290 7291 instruct vand_reg(vec dst, vec src1, vec src2) %{ 7292 predicate(UseAVX > 0); 7293 match(Set dst (AndV src1 src2)); 7294 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 7295 ins_encode %{ 7296 int vlen_enc = vector_length_encoding(this); 7297 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7298 %} 7299 ins_pipe( pipe_slow ); 7300 %} 7301 7302 instruct vand_mem(vec dst, vec src, memory mem) %{ 7303 predicate((UseAVX > 0) && 7304 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7305 match(Set dst (AndV src (LoadVector mem))); 7306 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 7307 ins_encode %{ 7308 int vlen_enc = vector_length_encoding(this); 7309 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7310 %} 7311 ins_pipe( pipe_slow ); 7312 %} 7313 7314 // --------------------------------- OR --------------------------------------- 7315 7316 instruct vor(vec dst, vec src) %{ 7317 predicate(UseAVX == 0); 7318 match(Set dst (OrV dst src)); 7319 format %{ "por $dst,$src\t! or vectors" %} 7320 ins_encode %{ 7321 __ por($dst$$XMMRegister, $src$$XMMRegister); 7322 %} 7323 ins_pipe( pipe_slow ); 7324 %} 7325 7326 instruct vor_reg(vec dst, vec src1, vec src2) %{ 7327 predicate(UseAVX > 0); 7328 match(Set dst (OrV src1 src2)); 7329 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7330 ins_encode %{ 7331 int vlen_enc = vector_length_encoding(this); 7332 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7333 %} 7334 ins_pipe( pipe_slow ); 7335 %} 7336 7337 instruct vor_mem(vec dst, vec src, memory mem) %{ 7338 predicate((UseAVX > 0) && 7339 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7340 match(Set dst (OrV src (LoadVector mem))); 7341 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7342 ins_encode %{ 7343 int vlen_enc = vector_length_encoding(this); 7344 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7345 %} 7346 ins_pipe( pipe_slow ); 7347 %} 7348 7349 // --------------------------------- XOR -------------------------------------- 7350 7351 instruct vxor(vec dst, vec src) %{ 7352 predicate(UseAVX == 0); 7353 match(Set dst (XorV dst src)); 7354 format %{ "pxor $dst,$src\t! xor vectors" %} 7355 ins_encode %{ 7356 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7357 %} 7358 ins_pipe( pipe_slow ); 7359 %} 7360 7361 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7362 predicate(UseAVX > 0); 7363 match(Set dst (XorV src1 src2)); 7364 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7365 ins_encode %{ 7366 int vlen_enc = vector_length_encoding(this); 7367 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7368 %} 7369 ins_pipe( pipe_slow ); 7370 %} 7371 7372 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7373 predicate((UseAVX > 0) && 7374 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7375 match(Set dst (XorV src (LoadVector mem))); 7376 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7377 ins_encode %{ 7378 int vlen_enc = vector_length_encoding(this); 7379 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7380 %} 7381 ins_pipe( pipe_slow ); 7382 %} 7383 7384 // --------------------------------- VectorCast -------------------------------------- 7385 7386 instruct vcastBtoX(vec dst, vec src) %{ 7387 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE); 7388 match(Set dst (VectorCastB2X src)); 7389 format %{ "vector_cast_b2x $dst,$src\t!" %} 7390 ins_encode %{ 7391 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7392 int vlen_enc = vector_length_encoding(this); 7393 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7394 %} 7395 ins_pipe( pipe_slow ); 7396 %} 7397 7398 instruct vcastBtoD(legVec dst, legVec src) %{ 7399 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE); 7400 match(Set dst (VectorCastB2X src)); 7401 format %{ "vector_cast_b2x $dst,$src\t!" %} 7402 ins_encode %{ 7403 int vlen_enc = vector_length_encoding(this); 7404 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7405 %} 7406 ins_pipe( pipe_slow ); 7407 %} 7408 7409 instruct castStoX(vec dst, vec src) %{ 7410 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7411 Matcher::vector_length(n->in(1)) <= 8 && // src 7412 Matcher::vector_element_basic_type(n) == T_BYTE); 7413 match(Set dst (VectorCastS2X src)); 7414 format %{ "vector_cast_s2x $dst,$src" %} 7415 ins_encode %{ 7416 assert(UseAVX > 0, "required"); 7417 7418 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7419 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7420 %} 7421 ins_pipe( pipe_slow ); 7422 %} 7423 7424 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7425 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7426 Matcher::vector_length(n->in(1)) == 16 && // src 7427 Matcher::vector_element_basic_type(n) == T_BYTE); 7428 effect(TEMP dst, TEMP vtmp); 7429 match(Set dst (VectorCastS2X src)); 7430 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7431 ins_encode %{ 7432 assert(UseAVX > 0, "required"); 7433 7434 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7435 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7436 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7437 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7438 %} 7439 ins_pipe( pipe_slow ); 7440 %} 7441 7442 instruct vcastStoX_evex(vec dst, vec src) %{ 7443 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7444 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7445 match(Set dst (VectorCastS2X src)); 7446 format %{ "vector_cast_s2x $dst,$src\t!" %} 7447 ins_encode %{ 7448 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7449 int src_vlen_enc = vector_length_encoding(this, $src); 7450 int vlen_enc = vector_length_encoding(this); 7451 switch (to_elem_bt) { 7452 case T_BYTE: 7453 if (!VM_Version::supports_avx512vl()) { 7454 vlen_enc = Assembler::AVX_512bit; 7455 } 7456 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7457 break; 7458 case T_INT: 7459 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7460 break; 7461 case T_FLOAT: 7462 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7463 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7464 break; 7465 case T_LONG: 7466 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7467 break; 7468 case T_DOUBLE: { 7469 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7470 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7471 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7472 break; 7473 } 7474 default: 7475 ShouldNotReachHere(); 7476 } 7477 %} 7478 ins_pipe( pipe_slow ); 7479 %} 7480 7481 instruct castItoX(vec dst, vec src) %{ 7482 predicate(UseAVX <= 2 && 7483 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7484 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7485 match(Set dst (VectorCastI2X src)); 7486 format %{ "vector_cast_i2x $dst,$src" %} 7487 ins_encode %{ 7488 assert(UseAVX > 0, "required"); 7489 7490 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7491 int vlen_enc = vector_length_encoding(this, $src); 7492 7493 if (to_elem_bt == T_BYTE) { 7494 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7495 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7496 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7497 } else { 7498 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7499 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7500 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7501 } 7502 %} 7503 ins_pipe( pipe_slow ); 7504 %} 7505 7506 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7507 predicate(UseAVX <= 2 && 7508 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7509 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7510 match(Set dst (VectorCastI2X src)); 7511 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7512 effect(TEMP dst, TEMP vtmp); 7513 ins_encode %{ 7514 assert(UseAVX > 0, "required"); 7515 7516 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7517 int vlen_enc = vector_length_encoding(this, $src); 7518 7519 if (to_elem_bt == T_BYTE) { 7520 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7521 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7522 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7523 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7524 } else { 7525 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7526 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7527 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7528 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7529 } 7530 %} 7531 ins_pipe( pipe_slow ); 7532 %} 7533 7534 instruct vcastItoX_evex(vec dst, vec src) %{ 7535 predicate(UseAVX > 2 || 7536 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7537 match(Set dst (VectorCastI2X src)); 7538 format %{ "vector_cast_i2x $dst,$src\t!" %} 7539 ins_encode %{ 7540 assert(UseAVX > 0, "required"); 7541 7542 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7543 int src_vlen_enc = vector_length_encoding(this, $src); 7544 int dst_vlen_enc = vector_length_encoding(this); 7545 switch (dst_elem_bt) { 7546 case T_BYTE: 7547 if (!VM_Version::supports_avx512vl()) { 7548 src_vlen_enc = Assembler::AVX_512bit; 7549 } 7550 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7551 break; 7552 case T_SHORT: 7553 if (!VM_Version::supports_avx512vl()) { 7554 src_vlen_enc = Assembler::AVX_512bit; 7555 } 7556 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7557 break; 7558 case T_FLOAT: 7559 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7560 break; 7561 case T_LONG: 7562 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7563 break; 7564 case T_DOUBLE: 7565 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7566 break; 7567 default: 7568 ShouldNotReachHere(); 7569 } 7570 %} 7571 ins_pipe( pipe_slow ); 7572 %} 7573 7574 instruct vcastLtoBS(vec dst, vec src) %{ 7575 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7576 UseAVX <= 2); 7577 match(Set dst (VectorCastL2X src)); 7578 format %{ "vector_cast_l2x $dst,$src" %} 7579 ins_encode %{ 7580 assert(UseAVX > 0, "required"); 7581 7582 int vlen = Matcher::vector_length_in_bytes(this, $src); 7583 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7584 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7585 : ExternalAddress(vector_int_to_short_mask()); 7586 if (vlen <= 16) { 7587 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7588 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7589 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7590 } else { 7591 assert(vlen <= 32, "required"); 7592 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7593 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7594 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7595 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7596 } 7597 if (to_elem_bt == T_BYTE) { 7598 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7599 } 7600 %} 7601 ins_pipe( pipe_slow ); 7602 %} 7603 7604 instruct vcastLtoX_evex(vec dst, vec src) %{ 7605 predicate(UseAVX > 2 || 7606 (Matcher::vector_element_basic_type(n) == T_INT || 7607 Matcher::vector_element_basic_type(n) == T_FLOAT || 7608 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7609 match(Set dst (VectorCastL2X src)); 7610 format %{ "vector_cast_l2x $dst,$src\t!" %} 7611 ins_encode %{ 7612 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7613 int vlen = Matcher::vector_length_in_bytes(this, $src); 7614 int vlen_enc = vector_length_encoding(this, $src); 7615 switch (to_elem_bt) { 7616 case T_BYTE: 7617 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7618 vlen_enc = Assembler::AVX_512bit; 7619 } 7620 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7621 break; 7622 case T_SHORT: 7623 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7624 vlen_enc = Assembler::AVX_512bit; 7625 } 7626 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7627 break; 7628 case T_INT: 7629 if (vlen == 8) { 7630 if ($dst$$XMMRegister != $src$$XMMRegister) { 7631 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7632 } 7633 } else if (vlen == 16) { 7634 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7635 } else if (vlen == 32) { 7636 if (UseAVX > 2) { 7637 if (!VM_Version::supports_avx512vl()) { 7638 vlen_enc = Assembler::AVX_512bit; 7639 } 7640 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7641 } else { 7642 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7643 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7644 } 7645 } else { // vlen == 64 7646 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7647 } 7648 break; 7649 case T_FLOAT: 7650 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7651 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7652 break; 7653 case T_DOUBLE: 7654 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7655 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7656 break; 7657 7658 default: assert(false, "%s", type2name(to_elem_bt)); 7659 } 7660 %} 7661 ins_pipe( pipe_slow ); 7662 %} 7663 7664 instruct vcastFtoD_reg(vec dst, vec src) %{ 7665 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7666 match(Set dst (VectorCastF2X src)); 7667 format %{ "vector_cast_f2d $dst,$src\t!" %} 7668 ins_encode %{ 7669 int vlen_enc = vector_length_encoding(this); 7670 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7671 %} 7672 ins_pipe( pipe_slow ); 7673 %} 7674 7675 7676 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7677 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7678 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7679 match(Set dst (VectorCastF2X src)); 7680 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7681 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7682 ins_encode %{ 7683 int vlen_enc = vector_length_encoding(this, $src); 7684 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7685 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7686 // 32 bit addresses for register indirect addressing mode since stub constants 7687 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7688 // However, targets are free to increase this limit, but having a large code cache size 7689 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7690 // cap we save a temporary register allocation which in limiting case can prevent 7691 // spilling in high register pressure blocks. 7692 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7693 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7694 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7695 %} 7696 ins_pipe( pipe_slow ); 7697 %} 7698 7699 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7700 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7701 is_integral_type(Matcher::vector_element_basic_type(n))); 7702 match(Set dst (VectorCastF2X src)); 7703 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7704 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7705 ins_encode %{ 7706 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7707 if (to_elem_bt == T_LONG) { 7708 int vlen_enc = vector_length_encoding(this); 7709 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7710 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7711 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7712 } else { 7713 int vlen_enc = vector_length_encoding(this, $src); 7714 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7715 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7716 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7717 } 7718 %} 7719 ins_pipe( pipe_slow ); 7720 %} 7721 7722 instruct vcastDtoF_reg(vec dst, vec src) %{ 7723 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7724 match(Set dst (VectorCastD2X src)); 7725 format %{ "vector_cast_d2x $dst,$src\t!" %} 7726 ins_encode %{ 7727 int vlen_enc = vector_length_encoding(this, $src); 7728 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7729 %} 7730 ins_pipe( pipe_slow ); 7731 %} 7732 7733 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7734 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7735 is_integral_type(Matcher::vector_element_basic_type(n))); 7736 match(Set dst (VectorCastD2X src)); 7737 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7738 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7739 ins_encode %{ 7740 int vlen_enc = vector_length_encoding(this, $src); 7741 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7742 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7743 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7744 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7745 %} 7746 ins_pipe( pipe_slow ); 7747 %} 7748 7749 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7750 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7751 is_integral_type(Matcher::vector_element_basic_type(n))); 7752 match(Set dst (VectorCastD2X src)); 7753 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7754 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7755 ins_encode %{ 7756 int vlen_enc = vector_length_encoding(this, $src); 7757 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7758 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7759 ExternalAddress(vector_float_signflip()); 7760 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7761 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7762 %} 7763 ins_pipe( pipe_slow ); 7764 %} 7765 7766 instruct vucast(vec dst, vec src) %{ 7767 match(Set dst (VectorUCastB2X src)); 7768 match(Set dst (VectorUCastS2X src)); 7769 match(Set dst (VectorUCastI2X src)); 7770 format %{ "vector_ucast $dst,$src\t!" %} 7771 ins_encode %{ 7772 assert(UseAVX > 0, "required"); 7773 7774 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7775 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7776 int vlen_enc = vector_length_encoding(this); 7777 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7778 %} 7779 ins_pipe( pipe_slow ); 7780 %} 7781 7782 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7783 predicate(!VM_Version::supports_avx512vl() && 7784 Matcher::vector_length_in_bytes(n) < 64 && 7785 Matcher::vector_element_basic_type(n) == T_INT); 7786 match(Set dst (RoundVF src)); 7787 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7788 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7789 ins_encode %{ 7790 int vlen_enc = vector_length_encoding(this); 7791 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7792 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7793 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7794 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7795 %} 7796 ins_pipe( pipe_slow ); 7797 %} 7798 7799 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7800 predicate((VM_Version::supports_avx512vl() || 7801 Matcher::vector_length_in_bytes(n) == 64) && 7802 Matcher::vector_element_basic_type(n) == T_INT); 7803 match(Set dst (RoundVF src)); 7804 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7805 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7806 ins_encode %{ 7807 int vlen_enc = vector_length_encoding(this); 7808 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7809 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7810 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7811 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7812 %} 7813 ins_pipe( pipe_slow ); 7814 %} 7815 7816 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7817 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7818 match(Set dst (RoundVD src)); 7819 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7820 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7821 ins_encode %{ 7822 int vlen_enc = vector_length_encoding(this); 7823 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7824 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 7825 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 7826 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7827 %} 7828 ins_pipe( pipe_slow ); 7829 %} 7830 7831 // --------------------------------- VectorMaskCmp -------------------------------------- 7832 7833 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7834 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7835 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7836 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7837 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7838 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7839 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7840 ins_encode %{ 7841 int vlen_enc = vector_length_encoding(this, $src1); 7842 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7843 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7844 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7845 } else { 7846 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7847 } 7848 %} 7849 ins_pipe( pipe_slow ); 7850 %} 7851 7852 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7853 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7854 n->bottom_type()->isa_vectmask() == nullptr && 7855 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7856 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7857 effect(TEMP ktmp); 7858 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7859 ins_encode %{ 7860 int vlen_enc = Assembler::AVX_512bit; 7861 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7862 KRegister mask = k0; // The comparison itself is not being masked. 7863 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7864 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7865 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7866 } else { 7867 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7868 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7869 } 7870 %} 7871 ins_pipe( pipe_slow ); 7872 %} 7873 7874 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7875 predicate(n->bottom_type()->isa_vectmask() && 7876 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7877 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7878 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7879 ins_encode %{ 7880 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7881 int vlen_enc = vector_length_encoding(this, $src1); 7882 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7883 KRegister mask = k0; // The comparison itself is not being masked. 7884 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7885 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7886 } else { 7887 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7888 } 7889 %} 7890 ins_pipe( pipe_slow ); 7891 %} 7892 7893 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7894 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7895 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7896 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7897 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7898 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7899 (n->in(2)->get_int() == BoolTest::eq || 7900 n->in(2)->get_int() == BoolTest::lt || 7901 n->in(2)->get_int() == BoolTest::gt)); // cond 7902 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7903 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7904 ins_encode %{ 7905 int vlen_enc = vector_length_encoding(this, $src1); 7906 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7907 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7908 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 7909 %} 7910 ins_pipe( pipe_slow ); 7911 %} 7912 7913 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7914 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7915 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7916 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7917 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7918 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7919 (n->in(2)->get_int() == BoolTest::ne || 7920 n->in(2)->get_int() == BoolTest::le || 7921 n->in(2)->get_int() == BoolTest::ge)); // cond 7922 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7923 effect(TEMP dst, TEMP xtmp); 7924 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7925 ins_encode %{ 7926 int vlen_enc = vector_length_encoding(this, $src1); 7927 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7928 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7929 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7930 %} 7931 ins_pipe( pipe_slow ); 7932 %} 7933 7934 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7935 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7936 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7937 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7938 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7939 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7940 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7941 effect(TEMP dst, TEMP xtmp); 7942 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7943 ins_encode %{ 7944 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 7945 int vlen_enc = vector_length_encoding(this, $src1); 7946 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7947 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7948 7949 if (vlen_enc == Assembler::AVX_128bit) { 7950 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7951 } else { 7952 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7953 } 7954 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7955 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7956 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7957 %} 7958 ins_pipe( pipe_slow ); 7959 %} 7960 7961 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7962 predicate((n->bottom_type()->isa_vectmask() == nullptr && 7963 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 7964 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7965 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7966 effect(TEMP ktmp); 7967 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7968 ins_encode %{ 7969 assert(UseAVX > 2, "required"); 7970 7971 int vlen_enc = vector_length_encoding(this, $src1); 7972 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7973 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7974 KRegister mask = k0; // The comparison itself is not being masked. 7975 bool merge = false; 7976 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7977 7978 switch (src1_elem_bt) { 7979 case T_INT: { 7980 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7981 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7982 break; 7983 } 7984 case T_LONG: { 7985 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7986 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7987 break; 7988 } 7989 default: assert(false, "%s", type2name(src1_elem_bt)); 7990 } 7991 %} 7992 ins_pipe( pipe_slow ); 7993 %} 7994 7995 7996 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 7997 predicate(n->bottom_type()->isa_vectmask() && 7998 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7999 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8000 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 8001 ins_encode %{ 8002 assert(UseAVX > 2, "required"); 8003 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 8004 8005 int vlen_enc = vector_length_encoding(this, $src1); 8006 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8007 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8008 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8009 8010 // Comparison i 8011 switch (src1_elem_bt) { 8012 case T_BYTE: { 8013 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8014 break; 8015 } 8016 case T_SHORT: { 8017 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8018 break; 8019 } 8020 case T_INT: { 8021 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8022 break; 8023 } 8024 case T_LONG: { 8025 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8026 break; 8027 } 8028 default: assert(false, "%s", type2name(src1_elem_bt)); 8029 } 8030 %} 8031 ins_pipe( pipe_slow ); 8032 %} 8033 8034 // Extract 8035 8036 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 8037 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 8038 match(Set dst (ExtractI src idx)); 8039 match(Set dst (ExtractS src idx)); 8040 match(Set dst (ExtractB src idx)); 8041 format %{ "extractI $dst,$src,$idx\t!" %} 8042 ins_encode %{ 8043 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8044 8045 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8046 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8047 %} 8048 ins_pipe( pipe_slow ); 8049 %} 8050 8051 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 8052 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 8053 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 8054 match(Set dst (ExtractI src idx)); 8055 match(Set dst (ExtractS src idx)); 8056 match(Set dst (ExtractB src idx)); 8057 effect(TEMP vtmp); 8058 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 8059 ins_encode %{ 8060 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8061 8062 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8063 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8064 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 8065 %} 8066 ins_pipe( pipe_slow ); 8067 %} 8068 8069 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 8070 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 8071 match(Set dst (ExtractL src idx)); 8072 format %{ "extractL $dst,$src,$idx\t!" %} 8073 ins_encode %{ 8074 assert(UseSSE >= 4, "required"); 8075 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8076 8077 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8078 %} 8079 ins_pipe( pipe_slow ); 8080 %} 8081 8082 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 8083 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8084 Matcher::vector_length(n->in(1)) == 8); // src 8085 match(Set dst (ExtractL src idx)); 8086 effect(TEMP vtmp); 8087 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 8088 ins_encode %{ 8089 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8090 8091 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8092 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 8093 %} 8094 ins_pipe( pipe_slow ); 8095 %} 8096 8097 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8098 predicate(Matcher::vector_length(n->in(1)) <= 4); 8099 match(Set dst (ExtractF src idx)); 8100 effect(TEMP dst, TEMP vtmp); 8101 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8102 ins_encode %{ 8103 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8104 8105 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 8106 %} 8107 ins_pipe( pipe_slow ); 8108 %} 8109 8110 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8111 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 8112 Matcher::vector_length(n->in(1)/*src*/) == 16); 8113 match(Set dst (ExtractF src idx)); 8114 effect(TEMP vtmp); 8115 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8116 ins_encode %{ 8117 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8118 8119 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8120 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 8121 %} 8122 ins_pipe( pipe_slow ); 8123 %} 8124 8125 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 8126 predicate(Matcher::vector_length(n->in(1)) == 2); // src 8127 match(Set dst (ExtractD src idx)); 8128 format %{ "extractD $dst,$src,$idx\t!" %} 8129 ins_encode %{ 8130 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8131 8132 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8133 %} 8134 ins_pipe( pipe_slow ); 8135 %} 8136 8137 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 8138 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8139 Matcher::vector_length(n->in(1)) == 8); // src 8140 match(Set dst (ExtractD src idx)); 8141 effect(TEMP vtmp); 8142 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 8143 ins_encode %{ 8144 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8145 8146 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8147 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 8148 %} 8149 ins_pipe( pipe_slow ); 8150 %} 8151 8152 // --------------------------------- Vector Blend -------------------------------------- 8153 8154 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 8155 predicate(UseAVX == 0); 8156 match(Set dst (VectorBlend (Binary dst src) mask)); 8157 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 8158 effect(TEMP tmp); 8159 ins_encode %{ 8160 assert(UseSSE >= 4, "required"); 8161 8162 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 8163 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 8164 } 8165 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 8166 %} 8167 ins_pipe( pipe_slow ); 8168 %} 8169 8170 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8171 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8172 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8173 Matcher::vector_length_in_bytes(n) <= 32 && 8174 is_integral_type(Matcher::vector_element_basic_type(n))); 8175 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8176 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8177 ins_encode %{ 8178 int vlen_enc = vector_length_encoding(this); 8179 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8180 %} 8181 ins_pipe( pipe_slow ); 8182 %} 8183 8184 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8185 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8186 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8187 Matcher::vector_length_in_bytes(n) <= 32 && 8188 !is_integral_type(Matcher::vector_element_basic_type(n))); 8189 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8190 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8191 ins_encode %{ 8192 int vlen_enc = vector_length_encoding(this); 8193 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8194 %} 8195 ins_pipe( pipe_slow ); 8196 %} 8197 8198 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ 8199 predicate(UseAVX > 0 && EnableX86ECoreOpts && 8200 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8201 Matcher::vector_length_in_bytes(n) <= 32); 8202 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8203 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} 8204 effect(TEMP vtmp, TEMP dst); 8205 ins_encode %{ 8206 int vlen_enc = vector_length_encoding(this); 8207 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8208 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8209 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8210 %} 8211 ins_pipe( pipe_slow ); 8212 %} 8213 8214 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 8215 predicate(Matcher::vector_length_in_bytes(n) == 64 && 8216 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 8217 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8218 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8219 effect(TEMP ktmp); 8220 ins_encode %{ 8221 int vlen_enc = Assembler::AVX_512bit; 8222 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8223 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 8224 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8225 %} 8226 ins_pipe( pipe_slow ); 8227 %} 8228 8229 8230 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 8231 predicate(n->in(2)->bottom_type()->isa_vectmask() && 8232 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 8233 VM_Version::supports_avx512bw())); 8234 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8235 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8236 ins_encode %{ 8237 int vlen_enc = vector_length_encoding(this); 8238 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8239 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8240 %} 8241 ins_pipe( pipe_slow ); 8242 %} 8243 8244 // --------------------------------- ABS -------------------------------------- 8245 // a = |a| 8246 instruct vabsB_reg(vec dst, vec src) %{ 8247 match(Set dst (AbsVB src)); 8248 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 8249 ins_encode %{ 8250 uint vlen = Matcher::vector_length(this); 8251 if (vlen <= 16) { 8252 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8253 } else { 8254 int vlen_enc = vector_length_encoding(this); 8255 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8256 } 8257 %} 8258 ins_pipe( pipe_slow ); 8259 %} 8260 8261 instruct vabsS_reg(vec dst, vec src) %{ 8262 match(Set dst (AbsVS src)); 8263 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 8264 ins_encode %{ 8265 uint vlen = Matcher::vector_length(this); 8266 if (vlen <= 8) { 8267 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8268 } else { 8269 int vlen_enc = vector_length_encoding(this); 8270 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8271 } 8272 %} 8273 ins_pipe( pipe_slow ); 8274 %} 8275 8276 instruct vabsI_reg(vec dst, vec src) %{ 8277 match(Set dst (AbsVI src)); 8278 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 8279 ins_encode %{ 8280 uint vlen = Matcher::vector_length(this); 8281 if (vlen <= 4) { 8282 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8283 } else { 8284 int vlen_enc = vector_length_encoding(this); 8285 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8286 } 8287 %} 8288 ins_pipe( pipe_slow ); 8289 %} 8290 8291 instruct vabsL_reg(vec dst, vec src) %{ 8292 match(Set dst (AbsVL src)); 8293 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 8294 ins_encode %{ 8295 assert(UseAVX > 2, "required"); 8296 int vlen_enc = vector_length_encoding(this); 8297 if (!VM_Version::supports_avx512vl()) { 8298 vlen_enc = Assembler::AVX_512bit; 8299 } 8300 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8301 %} 8302 ins_pipe( pipe_slow ); 8303 %} 8304 8305 // --------------------------------- ABSNEG -------------------------------------- 8306 8307 instruct vabsnegF(vec dst, vec src) %{ 8308 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 8309 match(Set dst (AbsVF src)); 8310 match(Set dst (NegVF src)); 8311 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 8312 ins_cost(150); 8313 ins_encode %{ 8314 int opcode = this->ideal_Opcode(); 8315 int vlen = Matcher::vector_length(this); 8316 if (vlen == 2) { 8317 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8318 } else { 8319 assert(vlen == 8 || vlen == 16, "required"); 8320 int vlen_enc = vector_length_encoding(this); 8321 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8322 } 8323 %} 8324 ins_pipe( pipe_slow ); 8325 %} 8326 8327 instruct vabsneg4F(vec dst) %{ 8328 predicate(Matcher::vector_length(n) == 4); 8329 match(Set dst (AbsVF dst)); 8330 match(Set dst (NegVF dst)); 8331 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 8332 ins_cost(150); 8333 ins_encode %{ 8334 int opcode = this->ideal_Opcode(); 8335 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 8336 %} 8337 ins_pipe( pipe_slow ); 8338 %} 8339 8340 instruct vabsnegD(vec dst, vec src) %{ 8341 match(Set dst (AbsVD src)); 8342 match(Set dst (NegVD src)); 8343 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 8344 ins_encode %{ 8345 int opcode = this->ideal_Opcode(); 8346 uint vlen = Matcher::vector_length(this); 8347 if (vlen == 2) { 8348 assert(UseSSE >= 2, "required"); 8349 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8350 } else { 8351 int vlen_enc = vector_length_encoding(this); 8352 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8353 } 8354 %} 8355 ins_pipe( pipe_slow ); 8356 %} 8357 8358 //------------------------------------- VectorTest -------------------------------------------- 8359 8360 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8361 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8362 match(Set cr (VectorTest src1 src2)); 8363 effect(TEMP vtmp); 8364 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8365 ins_encode %{ 8366 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8367 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8368 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8369 %} 8370 ins_pipe( pipe_slow ); 8371 %} 8372 8373 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8374 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8375 match(Set cr (VectorTest src1 src2)); 8376 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8377 ins_encode %{ 8378 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8379 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8380 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8381 %} 8382 ins_pipe( pipe_slow ); 8383 %} 8384 8385 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8386 predicate((Matcher::vector_length(n->in(1)) < 8 || 8387 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8388 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8389 match(Set cr (VectorTest src1 src2)); 8390 effect(TEMP tmp); 8391 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8392 ins_encode %{ 8393 uint masklen = Matcher::vector_length(this, $src1); 8394 __ kmovwl($tmp$$Register, $src1$$KRegister); 8395 __ andl($tmp$$Register, (1 << masklen) - 1); 8396 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8397 %} 8398 ins_pipe( pipe_slow ); 8399 %} 8400 8401 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8402 predicate((Matcher::vector_length(n->in(1)) < 8 || 8403 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8404 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8405 match(Set cr (VectorTest src1 src2)); 8406 effect(TEMP tmp); 8407 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8408 ins_encode %{ 8409 uint masklen = Matcher::vector_length(this, $src1); 8410 __ kmovwl($tmp$$Register, $src1$$KRegister); 8411 __ andl($tmp$$Register, (1 << masklen) - 1); 8412 %} 8413 ins_pipe( pipe_slow ); 8414 %} 8415 8416 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8417 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8418 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8419 match(Set cr (VectorTest src1 src2)); 8420 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8421 ins_encode %{ 8422 uint masklen = Matcher::vector_length(this, $src1); 8423 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8424 %} 8425 ins_pipe( pipe_slow ); 8426 %} 8427 8428 //------------------------------------- LoadMask -------------------------------------------- 8429 8430 instruct loadMask(legVec dst, legVec src) %{ 8431 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8432 match(Set dst (VectorLoadMask src)); 8433 effect(TEMP dst); 8434 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8435 ins_encode %{ 8436 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8437 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8438 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8439 %} 8440 ins_pipe( pipe_slow ); 8441 %} 8442 8443 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8444 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8445 match(Set dst (VectorLoadMask src)); 8446 effect(TEMP xtmp); 8447 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8448 ins_encode %{ 8449 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8450 true, Assembler::AVX_512bit); 8451 %} 8452 ins_pipe( pipe_slow ); 8453 %} 8454 8455 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8456 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8457 match(Set dst (VectorLoadMask src)); 8458 effect(TEMP xtmp); 8459 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8460 ins_encode %{ 8461 int vlen_enc = vector_length_encoding(in(1)); 8462 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8463 false, vlen_enc); 8464 %} 8465 ins_pipe( pipe_slow ); 8466 %} 8467 8468 //------------------------------------- StoreMask -------------------------------------------- 8469 8470 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8471 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8472 match(Set dst (VectorStoreMask src size)); 8473 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8474 ins_encode %{ 8475 int vlen = Matcher::vector_length(this); 8476 if (vlen <= 16 && UseAVX <= 2) { 8477 assert(UseSSE >= 3, "required"); 8478 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8479 } else { 8480 assert(UseAVX > 0, "required"); 8481 int src_vlen_enc = vector_length_encoding(this, $src); 8482 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8483 } 8484 %} 8485 ins_pipe( pipe_slow ); 8486 %} 8487 8488 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8489 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8490 match(Set dst (VectorStoreMask src size)); 8491 effect(TEMP_DEF dst, TEMP xtmp); 8492 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8493 ins_encode %{ 8494 int vlen_enc = Assembler::AVX_128bit; 8495 int vlen = Matcher::vector_length(this); 8496 if (vlen <= 8) { 8497 assert(UseSSE >= 3, "required"); 8498 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8499 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8500 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8501 } else { 8502 assert(UseAVX > 0, "required"); 8503 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8504 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8505 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8506 } 8507 %} 8508 ins_pipe( pipe_slow ); 8509 %} 8510 8511 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8512 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8513 match(Set dst (VectorStoreMask src size)); 8514 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8515 effect(TEMP_DEF dst, TEMP xtmp); 8516 ins_encode %{ 8517 int vlen_enc = Assembler::AVX_128bit; 8518 int vlen = Matcher::vector_length(this); 8519 if (vlen <= 4) { 8520 assert(UseSSE >= 3, "required"); 8521 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8522 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8523 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8524 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8525 } else { 8526 assert(UseAVX > 0, "required"); 8527 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8528 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8529 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8530 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8531 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8532 } 8533 %} 8534 ins_pipe( pipe_slow ); 8535 %} 8536 8537 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8538 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8539 match(Set dst (VectorStoreMask src size)); 8540 effect(TEMP_DEF dst, TEMP xtmp); 8541 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8542 ins_encode %{ 8543 assert(UseSSE >= 3, "required"); 8544 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8545 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8546 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8547 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8548 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8549 %} 8550 ins_pipe( pipe_slow ); 8551 %} 8552 8553 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8554 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8555 match(Set dst (VectorStoreMask src size)); 8556 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8557 effect(TEMP_DEF dst, TEMP vtmp); 8558 ins_encode %{ 8559 int vlen_enc = Assembler::AVX_128bit; 8560 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8561 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8562 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8563 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8564 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8565 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8566 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8567 %} 8568 ins_pipe( pipe_slow ); 8569 %} 8570 8571 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8572 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8573 match(Set dst (VectorStoreMask src size)); 8574 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8575 ins_encode %{ 8576 int src_vlen_enc = vector_length_encoding(this, $src); 8577 int dst_vlen_enc = vector_length_encoding(this); 8578 if (!VM_Version::supports_avx512vl()) { 8579 src_vlen_enc = Assembler::AVX_512bit; 8580 } 8581 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8582 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8583 %} 8584 ins_pipe( pipe_slow ); 8585 %} 8586 8587 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8588 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8589 match(Set dst (VectorStoreMask src size)); 8590 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8591 ins_encode %{ 8592 int src_vlen_enc = vector_length_encoding(this, $src); 8593 int dst_vlen_enc = vector_length_encoding(this); 8594 if (!VM_Version::supports_avx512vl()) { 8595 src_vlen_enc = Assembler::AVX_512bit; 8596 } 8597 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8598 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8599 %} 8600 ins_pipe( pipe_slow ); 8601 %} 8602 8603 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8604 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8605 match(Set dst (VectorStoreMask mask size)); 8606 effect(TEMP_DEF dst); 8607 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8608 ins_encode %{ 8609 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8610 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8611 false, Assembler::AVX_512bit, noreg); 8612 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8613 %} 8614 ins_pipe( pipe_slow ); 8615 %} 8616 8617 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8618 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8619 match(Set dst (VectorStoreMask mask size)); 8620 effect(TEMP_DEF dst); 8621 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8622 ins_encode %{ 8623 int dst_vlen_enc = vector_length_encoding(this); 8624 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8625 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8626 %} 8627 ins_pipe( pipe_slow ); 8628 %} 8629 8630 instruct vmaskcast_evex(kReg dst) %{ 8631 match(Set dst (VectorMaskCast dst)); 8632 ins_cost(0); 8633 format %{ "vector_mask_cast $dst" %} 8634 ins_encode %{ 8635 // empty 8636 %} 8637 ins_pipe(empty); 8638 %} 8639 8640 instruct vmaskcast(vec dst) %{ 8641 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8642 match(Set dst (VectorMaskCast dst)); 8643 ins_cost(0); 8644 format %{ "vector_mask_cast $dst" %} 8645 ins_encode %{ 8646 // empty 8647 %} 8648 ins_pipe(empty); 8649 %} 8650 8651 instruct vmaskcast_avx(vec dst, vec src) %{ 8652 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8653 match(Set dst (VectorMaskCast src)); 8654 format %{ "vector_mask_cast $dst, $src" %} 8655 ins_encode %{ 8656 int vlen = Matcher::vector_length(this); 8657 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8658 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8659 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8660 %} 8661 ins_pipe(pipe_slow); 8662 %} 8663 8664 //-------------------------------- Load Iota Indices ---------------------------------- 8665 8666 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8667 match(Set dst (VectorLoadConst src)); 8668 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8669 ins_encode %{ 8670 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8671 BasicType bt = Matcher::vector_element_basic_type(this); 8672 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8673 %} 8674 ins_pipe( pipe_slow ); 8675 %} 8676 8677 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8678 match(Set dst (PopulateIndex src1 src2)); 8679 effect(TEMP dst, TEMP vtmp); 8680 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8681 ins_encode %{ 8682 assert($src2$$constant == 1, "required"); 8683 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8684 int vlen_enc = vector_length_encoding(this); 8685 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8686 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8687 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8688 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8689 %} 8690 ins_pipe( pipe_slow ); 8691 %} 8692 8693 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8694 match(Set dst (PopulateIndex src1 src2)); 8695 effect(TEMP dst, TEMP vtmp); 8696 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8697 ins_encode %{ 8698 assert($src2$$constant == 1, "required"); 8699 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8700 int vlen_enc = vector_length_encoding(this); 8701 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8702 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8703 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8704 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8705 %} 8706 ins_pipe( pipe_slow ); 8707 %} 8708 8709 //-------------------------------- Rearrange ---------------------------------- 8710 8711 // LoadShuffle/Rearrange for Byte 8712 instruct rearrangeB(vec dst, vec shuffle) %{ 8713 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8714 Matcher::vector_length(n) < 32); 8715 match(Set dst (VectorRearrange dst shuffle)); 8716 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8717 ins_encode %{ 8718 assert(UseSSE >= 4, "required"); 8719 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8720 %} 8721 ins_pipe( pipe_slow ); 8722 %} 8723 8724 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8725 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8726 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8727 match(Set dst (VectorRearrange src shuffle)); 8728 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8729 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8730 ins_encode %{ 8731 assert(UseAVX >= 2, "required"); 8732 // Swap src into vtmp1 8733 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8734 // Shuffle swapped src to get entries from other 128 bit lane 8735 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8736 // Shuffle original src to get entries from self 128 bit lane 8737 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8738 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8739 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8740 // Perform the blend 8741 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8742 %} 8743 ins_pipe( pipe_slow ); 8744 %} 8745 8746 8747 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8748 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8749 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8750 match(Set dst (VectorRearrange src shuffle)); 8751 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8752 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8753 ins_encode %{ 8754 int vlen_enc = vector_length_encoding(this); 8755 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8756 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8757 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8758 %} 8759 ins_pipe( pipe_slow ); 8760 %} 8761 8762 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8763 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8764 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8765 match(Set dst (VectorRearrange src shuffle)); 8766 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8767 ins_encode %{ 8768 int vlen_enc = vector_length_encoding(this); 8769 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8770 %} 8771 ins_pipe( pipe_slow ); 8772 %} 8773 8774 // LoadShuffle/Rearrange for Short 8775 8776 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8777 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8778 !VM_Version::supports_avx512bw()); 8779 match(Set dst (VectorLoadShuffle src)); 8780 effect(TEMP dst, TEMP vtmp); 8781 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8782 ins_encode %{ 8783 // Create a byte shuffle mask from short shuffle mask 8784 // only byte shuffle instruction available on these platforms 8785 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8786 if (UseAVX == 0) { 8787 assert(vlen_in_bytes <= 16, "required"); 8788 // Multiply each shuffle by two to get byte index 8789 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 8790 __ psllw($vtmp$$XMMRegister, 1); 8791 8792 // Duplicate to create 2 copies of byte index 8793 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8794 __ psllw($dst$$XMMRegister, 8); 8795 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8796 8797 // Add one to get alternate byte index 8798 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8799 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8800 } else { 8801 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8802 int vlen_enc = vector_length_encoding(this); 8803 // Multiply each shuffle by two to get byte index 8804 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 8805 8806 // Duplicate to create 2 copies of byte index 8807 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8808 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8809 8810 // Add one to get alternate byte index 8811 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 8812 } 8813 %} 8814 ins_pipe( pipe_slow ); 8815 %} 8816 8817 instruct rearrangeS(vec dst, vec shuffle) %{ 8818 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8819 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8820 match(Set dst (VectorRearrange dst shuffle)); 8821 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8822 ins_encode %{ 8823 assert(UseSSE >= 4, "required"); 8824 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8825 %} 8826 ins_pipe( pipe_slow ); 8827 %} 8828 8829 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8830 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8831 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8832 match(Set dst (VectorRearrange src shuffle)); 8833 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8834 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8835 ins_encode %{ 8836 assert(UseAVX >= 2, "required"); 8837 // Swap src into vtmp1 8838 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8839 // Shuffle swapped src to get entries from other 128 bit lane 8840 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8841 // Shuffle original src to get entries from self 128 bit lane 8842 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8843 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8844 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8845 // Perform the blend 8846 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8847 %} 8848 ins_pipe( pipe_slow ); 8849 %} 8850 8851 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8852 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8853 VM_Version::supports_avx512bw()); 8854 match(Set dst (VectorRearrange src shuffle)); 8855 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8856 ins_encode %{ 8857 int vlen_enc = vector_length_encoding(this); 8858 if (!VM_Version::supports_avx512vl()) { 8859 vlen_enc = Assembler::AVX_512bit; 8860 } 8861 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8862 %} 8863 ins_pipe( pipe_slow ); 8864 %} 8865 8866 // LoadShuffle/Rearrange for Integer and Float 8867 8868 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 8869 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8870 Matcher::vector_length(n) == 4 && UseAVX == 0); 8871 match(Set dst (VectorLoadShuffle src)); 8872 effect(TEMP dst, TEMP vtmp); 8873 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8874 ins_encode %{ 8875 assert(UseSSE >= 4, "required"); 8876 8877 // Create a byte shuffle mask from int shuffle mask 8878 // only byte shuffle instruction available on these platforms 8879 8880 // Duplicate and multiply each shuffle by 4 8881 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 8882 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8883 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8884 __ psllw($vtmp$$XMMRegister, 2); 8885 8886 // Duplicate again to create 4 copies of byte index 8887 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8888 __ psllw($dst$$XMMRegister, 8); 8889 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 8890 8891 // Add 3,2,1,0 to get alternate byte index 8892 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 8893 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8894 %} 8895 ins_pipe( pipe_slow ); 8896 %} 8897 8898 instruct rearrangeI(vec dst, vec shuffle) %{ 8899 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8900 UseAVX == 0); 8901 match(Set dst (VectorRearrange dst shuffle)); 8902 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8903 ins_encode %{ 8904 assert(UseSSE >= 4, "required"); 8905 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8906 %} 8907 ins_pipe( pipe_slow ); 8908 %} 8909 8910 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 8911 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8912 UseAVX > 0); 8913 match(Set dst (VectorRearrange src shuffle)); 8914 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8915 ins_encode %{ 8916 int vlen_enc = vector_length_encoding(this); 8917 BasicType bt = Matcher::vector_element_basic_type(this); 8918 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8919 %} 8920 ins_pipe( pipe_slow ); 8921 %} 8922 8923 // LoadShuffle/Rearrange for Long and Double 8924 8925 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 8926 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8927 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8928 match(Set dst (VectorLoadShuffle src)); 8929 effect(TEMP dst, TEMP vtmp); 8930 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8931 ins_encode %{ 8932 assert(UseAVX >= 2, "required"); 8933 8934 int vlen_enc = vector_length_encoding(this); 8935 // Create a double word shuffle mask from long shuffle mask 8936 // only double word shuffle instruction available on these platforms 8937 8938 // Multiply each shuffle by two to get double word index 8939 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 8940 8941 // Duplicate each double word shuffle 8942 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 8943 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8944 8945 // Add one to get alternate double word index 8946 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 8947 %} 8948 ins_pipe( pipe_slow ); 8949 %} 8950 8951 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 8952 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8953 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8954 match(Set dst (VectorRearrange src shuffle)); 8955 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8956 ins_encode %{ 8957 assert(UseAVX >= 2, "required"); 8958 8959 int vlen_enc = vector_length_encoding(this); 8960 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8961 %} 8962 ins_pipe( pipe_slow ); 8963 %} 8964 8965 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 8966 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8967 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8968 match(Set dst (VectorRearrange src shuffle)); 8969 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8970 ins_encode %{ 8971 assert(UseAVX > 2, "required"); 8972 8973 int vlen_enc = vector_length_encoding(this); 8974 if (vlen_enc == Assembler::AVX_128bit) { 8975 vlen_enc = Assembler::AVX_256bit; 8976 } 8977 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8978 %} 8979 ins_pipe( pipe_slow ); 8980 %} 8981 8982 // --------------------------------- FMA -------------------------------------- 8983 // a * b + c 8984 8985 instruct vfmaF_reg(vec a, vec b, vec c) %{ 8986 match(Set c (FmaVF c (Binary a b))); 8987 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8988 ins_cost(150); 8989 ins_encode %{ 8990 assert(UseFMA, "not enabled"); 8991 int vlen_enc = vector_length_encoding(this); 8992 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8993 %} 8994 ins_pipe( pipe_slow ); 8995 %} 8996 8997 instruct vfmaF_mem(vec a, memory b, vec c) %{ 8998 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8999 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9000 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9001 ins_cost(150); 9002 ins_encode %{ 9003 assert(UseFMA, "not enabled"); 9004 int vlen_enc = vector_length_encoding(this); 9005 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9006 %} 9007 ins_pipe( pipe_slow ); 9008 %} 9009 9010 instruct vfmaD_reg(vec a, vec b, vec c) %{ 9011 match(Set c (FmaVD c (Binary a b))); 9012 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9013 ins_cost(150); 9014 ins_encode %{ 9015 assert(UseFMA, "not enabled"); 9016 int vlen_enc = vector_length_encoding(this); 9017 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9018 %} 9019 ins_pipe( pipe_slow ); 9020 %} 9021 9022 instruct vfmaD_mem(vec a, memory b, vec c) %{ 9023 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9024 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9025 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9026 ins_cost(150); 9027 ins_encode %{ 9028 assert(UseFMA, "not enabled"); 9029 int vlen_enc = vector_length_encoding(this); 9030 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9031 %} 9032 ins_pipe( pipe_slow ); 9033 %} 9034 9035 // --------------------------------- Vector Multiply Add -------------------------------------- 9036 9037 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 9038 predicate(UseAVX == 0); 9039 match(Set dst (MulAddVS2VI dst src1)); 9040 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 9041 ins_encode %{ 9042 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9043 %} 9044 ins_pipe( pipe_slow ); 9045 %} 9046 9047 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 9048 predicate(UseAVX > 0); 9049 match(Set dst (MulAddVS2VI src1 src2)); 9050 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 9051 ins_encode %{ 9052 int vlen_enc = vector_length_encoding(this); 9053 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9054 %} 9055 ins_pipe( pipe_slow ); 9056 %} 9057 9058 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9059 9060 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 9061 predicate(VM_Version::supports_avx512_vnni()); 9062 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9063 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 9064 ins_encode %{ 9065 assert(UseAVX > 2, "required"); 9066 int vlen_enc = vector_length_encoding(this); 9067 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9068 %} 9069 ins_pipe( pipe_slow ); 9070 ins_cost(10); 9071 %} 9072 9073 // --------------------------------- PopCount -------------------------------------- 9074 9075 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 9076 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9077 match(Set dst (PopCountVI src)); 9078 match(Set dst (PopCountVL src)); 9079 format %{ "vector_popcount_integral $dst, $src" %} 9080 ins_encode %{ 9081 int opcode = this->ideal_Opcode(); 9082 int vlen_enc = vector_length_encoding(this, $src); 9083 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9084 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 9085 %} 9086 ins_pipe( pipe_slow ); 9087 %} 9088 9089 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9090 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9091 match(Set dst (PopCountVI src mask)); 9092 match(Set dst (PopCountVL src mask)); 9093 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 9094 ins_encode %{ 9095 int vlen_enc = vector_length_encoding(this, $src); 9096 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9097 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9098 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 9099 %} 9100 ins_pipe( pipe_slow ); 9101 %} 9102 9103 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 9104 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9105 match(Set dst (PopCountVI src)); 9106 match(Set dst (PopCountVL src)); 9107 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9108 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 9109 ins_encode %{ 9110 int opcode = this->ideal_Opcode(); 9111 int vlen_enc = vector_length_encoding(this, $src); 9112 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9113 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9114 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 9115 %} 9116 ins_pipe( pipe_slow ); 9117 %} 9118 9119 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 9120 9121 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 9122 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9123 Matcher::vector_length_in_bytes(n->in(1)))); 9124 match(Set dst (CountTrailingZerosV src)); 9125 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 9126 ins_cost(400); 9127 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 9128 ins_encode %{ 9129 int vlen_enc = vector_length_encoding(this, $src); 9130 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9131 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9132 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9133 %} 9134 ins_pipe( pipe_slow ); 9135 %} 9136 9137 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9138 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9139 VM_Version::supports_avx512cd() && 9140 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9141 match(Set dst (CountTrailingZerosV src)); 9142 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9143 ins_cost(400); 9144 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 9145 ins_encode %{ 9146 int vlen_enc = vector_length_encoding(this, $src); 9147 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9148 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9149 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9150 %} 9151 ins_pipe( pipe_slow ); 9152 %} 9153 9154 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 9155 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9156 match(Set dst (CountTrailingZerosV src)); 9157 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 9158 ins_cost(400); 9159 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 9160 ins_encode %{ 9161 int vlen_enc = vector_length_encoding(this, $src); 9162 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9163 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9164 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 9165 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 9166 %} 9167 ins_pipe( pipe_slow ); 9168 %} 9169 9170 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9171 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9172 match(Set dst (CountTrailingZerosV src)); 9173 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9174 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9175 ins_encode %{ 9176 int vlen_enc = vector_length_encoding(this, $src); 9177 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9178 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9179 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9180 %} 9181 ins_pipe( pipe_slow ); 9182 %} 9183 9184 9185 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 9186 9187 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 9188 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 9189 effect(TEMP dst); 9190 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9191 ins_encode %{ 9192 int vector_len = vector_length_encoding(this); 9193 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 9194 %} 9195 ins_pipe( pipe_slow ); 9196 %} 9197 9198 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 9199 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 9200 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 9201 effect(TEMP dst); 9202 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9203 ins_encode %{ 9204 int vector_len = vector_length_encoding(this); 9205 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 9206 %} 9207 ins_pipe( pipe_slow ); 9208 %} 9209 9210 // --------------------------------- Rotation Operations ---------------------------------- 9211 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 9212 match(Set dst (RotateLeftV src shift)); 9213 match(Set dst (RotateRightV src shift)); 9214 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 9215 ins_encode %{ 9216 int opcode = this->ideal_Opcode(); 9217 int vector_len = vector_length_encoding(this); 9218 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9219 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 9220 %} 9221 ins_pipe( pipe_slow ); 9222 %} 9223 9224 instruct vprorate(vec dst, vec src, vec shift) %{ 9225 match(Set dst (RotateLeftV src shift)); 9226 match(Set dst (RotateRightV src shift)); 9227 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 9228 ins_encode %{ 9229 int opcode = this->ideal_Opcode(); 9230 int vector_len = vector_length_encoding(this); 9231 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9232 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9233 %} 9234 ins_pipe( pipe_slow ); 9235 %} 9236 9237 // ---------------------------------- Masked Operations ------------------------------------ 9238 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 9239 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 9240 match(Set dst (LoadVectorMasked mem mask)); 9241 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9242 ins_encode %{ 9243 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9244 int vlen_enc = vector_length_encoding(this); 9245 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 9246 %} 9247 ins_pipe( pipe_slow ); 9248 %} 9249 9250 9251 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 9252 predicate(n->in(3)->bottom_type()->isa_vectmask()); 9253 match(Set dst (LoadVectorMasked mem mask)); 9254 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9255 ins_encode %{ 9256 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9257 int vector_len = vector_length_encoding(this); 9258 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 9259 %} 9260 ins_pipe( pipe_slow ); 9261 %} 9262 9263 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 9264 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9265 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9266 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9267 ins_encode %{ 9268 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9269 int vlen_enc = vector_length_encoding(src_node); 9270 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9271 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 9272 %} 9273 ins_pipe( pipe_slow ); 9274 %} 9275 9276 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 9277 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9278 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9279 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9280 ins_encode %{ 9281 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9282 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9283 int vlen_enc = vector_length_encoding(src_node); 9284 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 9285 %} 9286 ins_pipe( pipe_slow ); 9287 %} 9288 9289 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{ 9290 match(Set addr (VerifyVectorAlignment addr mask)); 9291 effect(KILL cr); 9292 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %} 9293 ins_encode %{ 9294 Label Lskip; 9295 // check if masked bits of addr are zero 9296 __ testq($addr$$Register, $mask$$constant); 9297 __ jccb(Assembler::equal, Lskip); 9298 __ stop("verify_vector_alignment found a misaligned vector memory access"); 9299 __ bind(Lskip); 9300 %} 9301 ins_pipe(pipe_slow); 9302 %} 9303 9304 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9305 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9306 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9307 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9308 ins_encode %{ 9309 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9310 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9311 9312 Label DONE; 9313 int vlen_enc = vector_length_encoding(this, $src1); 9314 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9315 9316 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9317 __ mov64($dst$$Register, -1L); 9318 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9319 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9320 __ jccb(Assembler::carrySet, DONE); 9321 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9322 __ notq($dst$$Register); 9323 __ tzcntq($dst$$Register, $dst$$Register); 9324 __ bind(DONE); 9325 %} 9326 ins_pipe( pipe_slow ); 9327 %} 9328 9329 9330 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9331 match(Set dst (VectorMaskGen len)); 9332 effect(TEMP temp, KILL cr); 9333 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9334 ins_encode %{ 9335 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9336 %} 9337 ins_pipe( pipe_slow ); 9338 %} 9339 9340 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9341 match(Set dst (VectorMaskGen len)); 9342 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9343 effect(TEMP temp); 9344 ins_encode %{ 9345 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9346 __ kmovql($dst$$KRegister, $temp$$Register); 9347 %} 9348 ins_pipe( pipe_slow ); 9349 %} 9350 9351 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9352 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9353 match(Set dst (VectorMaskToLong mask)); 9354 effect(TEMP dst, KILL cr); 9355 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9356 ins_encode %{ 9357 int opcode = this->ideal_Opcode(); 9358 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9359 int mask_len = Matcher::vector_length(this, $mask); 9360 int mask_size = mask_len * type2aelembytes(mbt); 9361 int vlen_enc = vector_length_encoding(this, $mask); 9362 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9363 $dst$$Register, mask_len, mask_size, vlen_enc); 9364 %} 9365 ins_pipe( pipe_slow ); 9366 %} 9367 9368 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9369 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9370 match(Set dst (VectorMaskToLong mask)); 9371 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9372 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9373 ins_encode %{ 9374 int opcode = this->ideal_Opcode(); 9375 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9376 int mask_len = Matcher::vector_length(this, $mask); 9377 int vlen_enc = vector_length_encoding(this, $mask); 9378 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9379 $dst$$Register, mask_len, mbt, vlen_enc); 9380 %} 9381 ins_pipe( pipe_slow ); 9382 %} 9383 9384 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9385 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9386 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9387 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9388 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9389 ins_encode %{ 9390 int opcode = this->ideal_Opcode(); 9391 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9392 int mask_len = Matcher::vector_length(this, $mask); 9393 int vlen_enc = vector_length_encoding(this, $mask); 9394 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9395 $dst$$Register, mask_len, mbt, vlen_enc); 9396 %} 9397 ins_pipe( pipe_slow ); 9398 %} 9399 9400 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9401 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9402 match(Set dst (VectorMaskTrueCount mask)); 9403 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9404 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9405 ins_encode %{ 9406 int opcode = this->ideal_Opcode(); 9407 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9408 int mask_len = Matcher::vector_length(this, $mask); 9409 int mask_size = mask_len * type2aelembytes(mbt); 9410 int vlen_enc = vector_length_encoding(this, $mask); 9411 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9412 $tmp$$Register, mask_len, mask_size, vlen_enc); 9413 %} 9414 ins_pipe( pipe_slow ); 9415 %} 9416 9417 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9418 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9419 match(Set dst (VectorMaskTrueCount mask)); 9420 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9421 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9422 ins_encode %{ 9423 int opcode = this->ideal_Opcode(); 9424 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9425 int mask_len = Matcher::vector_length(this, $mask); 9426 int vlen_enc = vector_length_encoding(this, $mask); 9427 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9428 $tmp$$Register, mask_len, mbt, vlen_enc); 9429 %} 9430 ins_pipe( pipe_slow ); 9431 %} 9432 9433 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9434 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9435 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9436 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9437 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9438 ins_encode %{ 9439 int opcode = this->ideal_Opcode(); 9440 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9441 int mask_len = Matcher::vector_length(this, $mask); 9442 int vlen_enc = vector_length_encoding(this, $mask); 9443 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9444 $tmp$$Register, mask_len, mbt, vlen_enc); 9445 %} 9446 ins_pipe( pipe_slow ); 9447 %} 9448 9449 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9450 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9451 match(Set dst (VectorMaskFirstTrue mask)); 9452 match(Set dst (VectorMaskLastTrue mask)); 9453 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9454 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9455 ins_encode %{ 9456 int opcode = this->ideal_Opcode(); 9457 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9458 int mask_len = Matcher::vector_length(this, $mask); 9459 int mask_size = mask_len * type2aelembytes(mbt); 9460 int vlen_enc = vector_length_encoding(this, $mask); 9461 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9462 $tmp$$Register, mask_len, mask_size, vlen_enc); 9463 %} 9464 ins_pipe( pipe_slow ); 9465 %} 9466 9467 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9468 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9469 match(Set dst (VectorMaskFirstTrue mask)); 9470 match(Set dst (VectorMaskLastTrue mask)); 9471 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9472 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9473 ins_encode %{ 9474 int opcode = this->ideal_Opcode(); 9475 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9476 int mask_len = Matcher::vector_length(this, $mask); 9477 int vlen_enc = vector_length_encoding(this, $mask); 9478 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9479 $tmp$$Register, mask_len, mbt, vlen_enc); 9480 %} 9481 ins_pipe( pipe_slow ); 9482 %} 9483 9484 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9485 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9486 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9487 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9488 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9489 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9490 ins_encode %{ 9491 int opcode = this->ideal_Opcode(); 9492 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9493 int mask_len = Matcher::vector_length(this, $mask); 9494 int vlen_enc = vector_length_encoding(this, $mask); 9495 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9496 $tmp$$Register, mask_len, mbt, vlen_enc); 9497 %} 9498 ins_pipe( pipe_slow ); 9499 %} 9500 9501 // --------------------------------- Compress/Expand Operations --------------------------- 9502 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{ 9503 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 9504 match(Set dst (CompressV src mask)); 9505 match(Set dst (ExpandV src mask)); 9506 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr); 9507 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %} 9508 ins_encode %{ 9509 int opcode = this->ideal_Opcode(); 9510 int vlen_enc = vector_length_encoding(this); 9511 BasicType bt = Matcher::vector_element_basic_type(this); 9512 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register, 9513 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc); 9514 %} 9515 ins_pipe( pipe_slow ); 9516 %} 9517 9518 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9519 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 9520 match(Set dst (CompressV src mask)); 9521 match(Set dst (ExpandV src mask)); 9522 format %{ "vector_compress_expand $dst, $src, $mask" %} 9523 ins_encode %{ 9524 int opcode = this->ideal_Opcode(); 9525 int vector_len = vector_length_encoding(this); 9526 BasicType bt = Matcher::vector_element_basic_type(this); 9527 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9528 %} 9529 ins_pipe( pipe_slow ); 9530 %} 9531 9532 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9533 match(Set dst (CompressM mask)); 9534 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9535 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9536 ins_encode %{ 9537 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9538 int mask_len = Matcher::vector_length(this); 9539 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9540 %} 9541 ins_pipe( pipe_slow ); 9542 %} 9543 9544 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9545 9546 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9547 predicate(!VM_Version::supports_gfni()); 9548 match(Set dst (ReverseV src)); 9549 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9550 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9551 ins_encode %{ 9552 int vec_enc = vector_length_encoding(this); 9553 BasicType bt = Matcher::vector_element_basic_type(this); 9554 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9555 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9556 %} 9557 ins_pipe( pipe_slow ); 9558 %} 9559 9560 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9561 predicate(VM_Version::supports_gfni()); 9562 match(Set dst (ReverseV src)); 9563 effect(TEMP dst, TEMP xtmp); 9564 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9565 ins_encode %{ 9566 int vec_enc = vector_length_encoding(this); 9567 BasicType bt = Matcher::vector_element_basic_type(this); 9568 InternalAddress addr = $constantaddress(jlong(0x8040201008040201)); 9569 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9570 $xtmp$$XMMRegister); 9571 %} 9572 ins_pipe( pipe_slow ); 9573 %} 9574 9575 instruct vreverse_byte_reg(vec dst, vec src) %{ 9576 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9577 match(Set dst (ReverseBytesV src)); 9578 effect(TEMP dst); 9579 format %{ "vector_reverse_byte $dst, $src" %} 9580 ins_encode %{ 9581 int vec_enc = vector_length_encoding(this); 9582 BasicType bt = Matcher::vector_element_basic_type(this); 9583 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9584 %} 9585 ins_pipe( pipe_slow ); 9586 %} 9587 9588 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9589 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9590 match(Set dst (ReverseBytesV src)); 9591 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9592 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9593 ins_encode %{ 9594 int vec_enc = vector_length_encoding(this); 9595 BasicType bt = Matcher::vector_element_basic_type(this); 9596 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9597 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9598 %} 9599 ins_pipe( pipe_slow ); 9600 %} 9601 9602 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9603 9604 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9605 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9606 Matcher::vector_length_in_bytes(n->in(1)))); 9607 match(Set dst (CountLeadingZerosV src)); 9608 format %{ "vector_count_leading_zeros $dst, $src" %} 9609 ins_encode %{ 9610 int vlen_enc = vector_length_encoding(this, $src); 9611 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9612 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9613 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9614 %} 9615 ins_pipe( pipe_slow ); 9616 %} 9617 9618 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9619 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9620 Matcher::vector_length_in_bytes(n->in(1)))); 9621 match(Set dst (CountLeadingZerosV src mask)); 9622 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9623 ins_encode %{ 9624 int vlen_enc = vector_length_encoding(this, $src); 9625 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9626 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9627 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9628 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9629 %} 9630 ins_pipe( pipe_slow ); 9631 %} 9632 9633 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9634 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9635 VM_Version::supports_avx512cd() && 9636 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9637 match(Set dst (CountLeadingZerosV src)); 9638 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9639 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9640 ins_encode %{ 9641 int vlen_enc = vector_length_encoding(this, $src); 9642 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9643 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9644 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9645 %} 9646 ins_pipe( pipe_slow ); 9647 %} 9648 9649 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9650 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9651 match(Set dst (CountLeadingZerosV src)); 9652 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9653 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9654 ins_encode %{ 9655 int vlen_enc = vector_length_encoding(this, $src); 9656 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9657 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9658 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9659 $rtmp$$Register, true, vlen_enc); 9660 %} 9661 ins_pipe( pipe_slow ); 9662 %} 9663 9664 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9665 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9666 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9667 match(Set dst (CountLeadingZerosV src)); 9668 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9669 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9670 ins_encode %{ 9671 int vlen_enc = vector_length_encoding(this, $src); 9672 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9673 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9674 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9675 %} 9676 ins_pipe( pipe_slow ); 9677 %} 9678 9679 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9680 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9681 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9682 match(Set dst (CountLeadingZerosV src)); 9683 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9684 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9685 ins_encode %{ 9686 int vlen_enc = vector_length_encoding(this, $src); 9687 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9688 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9689 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9690 %} 9691 ins_pipe( pipe_slow ); 9692 %} 9693 9694 // ---------------------------------- Vector Masked Operations ------------------------------------ 9695 9696 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9697 match(Set dst (AddVB (Binary dst src2) mask)); 9698 match(Set dst (AddVS (Binary dst src2) mask)); 9699 match(Set dst (AddVI (Binary dst src2) mask)); 9700 match(Set dst (AddVL (Binary dst src2) mask)); 9701 match(Set dst (AddVF (Binary dst src2) mask)); 9702 match(Set dst (AddVD (Binary dst src2) mask)); 9703 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9704 ins_encode %{ 9705 int vlen_enc = vector_length_encoding(this); 9706 BasicType bt = Matcher::vector_element_basic_type(this); 9707 int opc = this->ideal_Opcode(); 9708 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9709 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9710 %} 9711 ins_pipe( pipe_slow ); 9712 %} 9713 9714 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9715 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9716 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9717 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9718 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9719 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9720 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9721 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9722 ins_encode %{ 9723 int vlen_enc = vector_length_encoding(this); 9724 BasicType bt = Matcher::vector_element_basic_type(this); 9725 int opc = this->ideal_Opcode(); 9726 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9727 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9728 %} 9729 ins_pipe( pipe_slow ); 9730 %} 9731 9732 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9733 match(Set dst (XorV (Binary dst src2) mask)); 9734 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9735 ins_encode %{ 9736 int vlen_enc = vector_length_encoding(this); 9737 BasicType bt = Matcher::vector_element_basic_type(this); 9738 int opc = this->ideal_Opcode(); 9739 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9740 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9741 %} 9742 ins_pipe( pipe_slow ); 9743 %} 9744 9745 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9746 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9747 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9748 ins_encode %{ 9749 int vlen_enc = vector_length_encoding(this); 9750 BasicType bt = Matcher::vector_element_basic_type(this); 9751 int opc = this->ideal_Opcode(); 9752 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9753 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9754 %} 9755 ins_pipe( pipe_slow ); 9756 %} 9757 9758 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9759 match(Set dst (OrV (Binary dst src2) mask)); 9760 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9761 ins_encode %{ 9762 int vlen_enc = vector_length_encoding(this); 9763 BasicType bt = Matcher::vector_element_basic_type(this); 9764 int opc = this->ideal_Opcode(); 9765 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9766 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9767 %} 9768 ins_pipe( pipe_slow ); 9769 %} 9770 9771 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9772 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9773 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9774 ins_encode %{ 9775 int vlen_enc = vector_length_encoding(this); 9776 BasicType bt = Matcher::vector_element_basic_type(this); 9777 int opc = this->ideal_Opcode(); 9778 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9779 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9780 %} 9781 ins_pipe( pipe_slow ); 9782 %} 9783 9784 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9785 match(Set dst (AndV (Binary dst src2) mask)); 9786 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9787 ins_encode %{ 9788 int vlen_enc = vector_length_encoding(this); 9789 BasicType bt = Matcher::vector_element_basic_type(this); 9790 int opc = this->ideal_Opcode(); 9791 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9792 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9793 %} 9794 ins_pipe( pipe_slow ); 9795 %} 9796 9797 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9798 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9799 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9800 ins_encode %{ 9801 int vlen_enc = vector_length_encoding(this); 9802 BasicType bt = Matcher::vector_element_basic_type(this); 9803 int opc = this->ideal_Opcode(); 9804 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9805 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9806 %} 9807 ins_pipe( pipe_slow ); 9808 %} 9809 9810 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9811 match(Set dst (SubVB (Binary dst src2) mask)); 9812 match(Set dst (SubVS (Binary dst src2) mask)); 9813 match(Set dst (SubVI (Binary dst src2) mask)); 9814 match(Set dst (SubVL (Binary dst src2) mask)); 9815 match(Set dst (SubVF (Binary dst src2) mask)); 9816 match(Set dst (SubVD (Binary dst src2) mask)); 9817 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9818 ins_encode %{ 9819 int vlen_enc = vector_length_encoding(this); 9820 BasicType bt = Matcher::vector_element_basic_type(this); 9821 int opc = this->ideal_Opcode(); 9822 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9823 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9824 %} 9825 ins_pipe( pipe_slow ); 9826 %} 9827 9828 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9829 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9830 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9831 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9832 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9833 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9834 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9835 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9836 ins_encode %{ 9837 int vlen_enc = vector_length_encoding(this); 9838 BasicType bt = Matcher::vector_element_basic_type(this); 9839 int opc = this->ideal_Opcode(); 9840 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9841 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9842 %} 9843 ins_pipe( pipe_slow ); 9844 %} 9845 9846 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9847 match(Set dst (MulVS (Binary dst src2) mask)); 9848 match(Set dst (MulVI (Binary dst src2) mask)); 9849 match(Set dst (MulVL (Binary dst src2) mask)); 9850 match(Set dst (MulVF (Binary dst src2) mask)); 9851 match(Set dst (MulVD (Binary dst src2) mask)); 9852 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9853 ins_encode %{ 9854 int vlen_enc = vector_length_encoding(this); 9855 BasicType bt = Matcher::vector_element_basic_type(this); 9856 int opc = this->ideal_Opcode(); 9857 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9858 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9859 %} 9860 ins_pipe( pipe_slow ); 9861 %} 9862 9863 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 9864 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 9865 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 9866 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 9867 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 9868 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 9869 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9870 ins_encode %{ 9871 int vlen_enc = vector_length_encoding(this); 9872 BasicType bt = Matcher::vector_element_basic_type(this); 9873 int opc = this->ideal_Opcode(); 9874 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9875 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9876 %} 9877 ins_pipe( pipe_slow ); 9878 %} 9879 9880 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 9881 match(Set dst (SqrtVF dst mask)); 9882 match(Set dst (SqrtVD dst mask)); 9883 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 9884 ins_encode %{ 9885 int vlen_enc = vector_length_encoding(this); 9886 BasicType bt = Matcher::vector_element_basic_type(this); 9887 int opc = this->ideal_Opcode(); 9888 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9889 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9890 %} 9891 ins_pipe( pipe_slow ); 9892 %} 9893 9894 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 9895 match(Set dst (DivVF (Binary dst src2) mask)); 9896 match(Set dst (DivVD (Binary dst src2) mask)); 9897 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9898 ins_encode %{ 9899 int vlen_enc = vector_length_encoding(this); 9900 BasicType bt = Matcher::vector_element_basic_type(this); 9901 int opc = this->ideal_Opcode(); 9902 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9903 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9904 %} 9905 ins_pipe( pipe_slow ); 9906 %} 9907 9908 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 9909 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 9910 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 9911 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9912 ins_encode %{ 9913 int vlen_enc = vector_length_encoding(this); 9914 BasicType bt = Matcher::vector_element_basic_type(this); 9915 int opc = this->ideal_Opcode(); 9916 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9917 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9918 %} 9919 ins_pipe( pipe_slow ); 9920 %} 9921 9922 9923 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9924 match(Set dst (RotateLeftV (Binary dst shift) mask)); 9925 match(Set dst (RotateRightV (Binary dst shift) mask)); 9926 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 9927 ins_encode %{ 9928 int vlen_enc = vector_length_encoding(this); 9929 BasicType bt = Matcher::vector_element_basic_type(this); 9930 int opc = this->ideal_Opcode(); 9931 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9932 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9933 %} 9934 ins_pipe( pipe_slow ); 9935 %} 9936 9937 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 9938 match(Set dst (RotateLeftV (Binary dst src2) mask)); 9939 match(Set dst (RotateRightV (Binary dst src2) mask)); 9940 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 9941 ins_encode %{ 9942 int vlen_enc = vector_length_encoding(this); 9943 BasicType bt = Matcher::vector_element_basic_type(this); 9944 int opc = this->ideal_Opcode(); 9945 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9946 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9947 %} 9948 ins_pipe( pipe_slow ); 9949 %} 9950 9951 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9952 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 9953 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 9954 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 9955 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 9956 ins_encode %{ 9957 int vlen_enc = vector_length_encoding(this); 9958 BasicType bt = Matcher::vector_element_basic_type(this); 9959 int opc = this->ideal_Opcode(); 9960 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9961 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9962 %} 9963 ins_pipe( pipe_slow ); 9964 %} 9965 9966 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9967 predicate(!n->as_ShiftV()->is_var_shift()); 9968 match(Set dst (LShiftVS (Binary dst src2) mask)); 9969 match(Set dst (LShiftVI (Binary dst src2) mask)); 9970 match(Set dst (LShiftVL (Binary dst src2) mask)); 9971 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9972 ins_encode %{ 9973 int vlen_enc = vector_length_encoding(this); 9974 BasicType bt = Matcher::vector_element_basic_type(this); 9975 int opc = this->ideal_Opcode(); 9976 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9977 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9978 %} 9979 ins_pipe( pipe_slow ); 9980 %} 9981 9982 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9983 predicate(n->as_ShiftV()->is_var_shift()); 9984 match(Set dst (LShiftVS (Binary dst src2) mask)); 9985 match(Set dst (LShiftVI (Binary dst src2) mask)); 9986 match(Set dst (LShiftVL (Binary dst src2) mask)); 9987 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9988 ins_encode %{ 9989 int vlen_enc = vector_length_encoding(this); 9990 BasicType bt = Matcher::vector_element_basic_type(this); 9991 int opc = this->ideal_Opcode(); 9992 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9993 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9994 %} 9995 ins_pipe( pipe_slow ); 9996 %} 9997 9998 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9999 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 10000 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 10001 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 10002 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 10003 ins_encode %{ 10004 int vlen_enc = vector_length_encoding(this); 10005 BasicType bt = Matcher::vector_element_basic_type(this); 10006 int opc = this->ideal_Opcode(); 10007 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10008 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10009 %} 10010 ins_pipe( pipe_slow ); 10011 %} 10012 10013 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10014 predicate(!n->as_ShiftV()->is_var_shift()); 10015 match(Set dst (RShiftVS (Binary dst src2) mask)); 10016 match(Set dst (RShiftVI (Binary dst src2) mask)); 10017 match(Set dst (RShiftVL (Binary dst src2) mask)); 10018 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10019 ins_encode %{ 10020 int vlen_enc = vector_length_encoding(this); 10021 BasicType bt = Matcher::vector_element_basic_type(this); 10022 int opc = this->ideal_Opcode(); 10023 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10024 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10025 %} 10026 ins_pipe( pipe_slow ); 10027 %} 10028 10029 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10030 predicate(n->as_ShiftV()->is_var_shift()); 10031 match(Set dst (RShiftVS (Binary dst src2) mask)); 10032 match(Set dst (RShiftVI (Binary dst src2) mask)); 10033 match(Set dst (RShiftVL (Binary dst src2) mask)); 10034 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10035 ins_encode %{ 10036 int vlen_enc = vector_length_encoding(this); 10037 BasicType bt = Matcher::vector_element_basic_type(this); 10038 int opc = this->ideal_Opcode(); 10039 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10040 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10041 %} 10042 ins_pipe( pipe_slow ); 10043 %} 10044 10045 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10046 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 10047 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 10048 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 10049 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 10050 ins_encode %{ 10051 int vlen_enc = vector_length_encoding(this); 10052 BasicType bt = Matcher::vector_element_basic_type(this); 10053 int opc = this->ideal_Opcode(); 10054 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10055 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10056 %} 10057 ins_pipe( pipe_slow ); 10058 %} 10059 10060 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10061 predicate(!n->as_ShiftV()->is_var_shift()); 10062 match(Set dst (URShiftVS (Binary dst src2) mask)); 10063 match(Set dst (URShiftVI (Binary dst src2) mask)); 10064 match(Set dst (URShiftVL (Binary dst src2) mask)); 10065 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10066 ins_encode %{ 10067 int vlen_enc = vector_length_encoding(this); 10068 BasicType bt = Matcher::vector_element_basic_type(this); 10069 int opc = this->ideal_Opcode(); 10070 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10071 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10072 %} 10073 ins_pipe( pipe_slow ); 10074 %} 10075 10076 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10077 predicate(n->as_ShiftV()->is_var_shift()); 10078 match(Set dst (URShiftVS (Binary dst src2) mask)); 10079 match(Set dst (URShiftVI (Binary dst src2) mask)); 10080 match(Set dst (URShiftVL (Binary dst src2) mask)); 10081 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10082 ins_encode %{ 10083 int vlen_enc = vector_length_encoding(this); 10084 BasicType bt = Matcher::vector_element_basic_type(this); 10085 int opc = this->ideal_Opcode(); 10086 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10087 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10088 %} 10089 ins_pipe( pipe_slow ); 10090 %} 10091 10092 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 10093 match(Set dst (MaxV (Binary dst src2) mask)); 10094 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10095 ins_encode %{ 10096 int vlen_enc = vector_length_encoding(this); 10097 BasicType bt = Matcher::vector_element_basic_type(this); 10098 int opc = this->ideal_Opcode(); 10099 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10100 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10101 %} 10102 ins_pipe( pipe_slow ); 10103 %} 10104 10105 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 10106 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 10107 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10108 ins_encode %{ 10109 int vlen_enc = vector_length_encoding(this); 10110 BasicType bt = Matcher::vector_element_basic_type(this); 10111 int opc = this->ideal_Opcode(); 10112 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10113 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10114 %} 10115 ins_pipe( pipe_slow ); 10116 %} 10117 10118 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 10119 match(Set dst (MinV (Binary dst src2) mask)); 10120 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10121 ins_encode %{ 10122 int vlen_enc = vector_length_encoding(this); 10123 BasicType bt = Matcher::vector_element_basic_type(this); 10124 int opc = this->ideal_Opcode(); 10125 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10126 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10127 %} 10128 ins_pipe( pipe_slow ); 10129 %} 10130 10131 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 10132 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 10133 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10134 ins_encode %{ 10135 int vlen_enc = vector_length_encoding(this); 10136 BasicType bt = Matcher::vector_element_basic_type(this); 10137 int opc = this->ideal_Opcode(); 10138 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10139 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10140 %} 10141 ins_pipe( pipe_slow ); 10142 %} 10143 10144 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 10145 match(Set dst (VectorRearrange (Binary dst src2) mask)); 10146 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 10147 ins_encode %{ 10148 int vlen_enc = vector_length_encoding(this); 10149 BasicType bt = Matcher::vector_element_basic_type(this); 10150 int opc = this->ideal_Opcode(); 10151 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10152 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10153 %} 10154 ins_pipe( pipe_slow ); 10155 %} 10156 10157 instruct vabs_masked(vec dst, kReg mask) %{ 10158 match(Set dst (AbsVB dst mask)); 10159 match(Set dst (AbsVS dst mask)); 10160 match(Set dst (AbsVI dst mask)); 10161 match(Set dst (AbsVL dst mask)); 10162 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 10163 ins_encode %{ 10164 int vlen_enc = vector_length_encoding(this); 10165 BasicType bt = Matcher::vector_element_basic_type(this); 10166 int opc = this->ideal_Opcode(); 10167 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10168 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10169 %} 10170 ins_pipe( pipe_slow ); 10171 %} 10172 10173 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 10174 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 10175 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 10176 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10177 ins_encode %{ 10178 assert(UseFMA, "Needs FMA instructions support."); 10179 int vlen_enc = vector_length_encoding(this); 10180 BasicType bt = Matcher::vector_element_basic_type(this); 10181 int opc = this->ideal_Opcode(); 10182 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10183 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 10184 %} 10185 ins_pipe( pipe_slow ); 10186 %} 10187 10188 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 10189 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 10190 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 10191 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10192 ins_encode %{ 10193 assert(UseFMA, "Needs FMA instructions support."); 10194 int vlen_enc = vector_length_encoding(this); 10195 BasicType bt = Matcher::vector_element_basic_type(this); 10196 int opc = this->ideal_Opcode(); 10197 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10198 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 10199 %} 10200 ins_pipe( pipe_slow ); 10201 %} 10202 10203 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 10204 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 10205 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 10206 ins_encode %{ 10207 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 10208 int vlen_enc = vector_length_encoding(this, $src1); 10209 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 10210 10211 // Comparison i 10212 switch (src1_elem_bt) { 10213 case T_BYTE: { 10214 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10215 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10216 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10217 break; 10218 } 10219 case T_SHORT: { 10220 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10221 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10222 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10223 break; 10224 } 10225 case T_INT: { 10226 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10227 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10228 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10229 break; 10230 } 10231 case T_LONG: { 10232 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10233 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10234 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10235 break; 10236 } 10237 case T_FLOAT: { 10238 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10239 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10240 break; 10241 } 10242 case T_DOUBLE: { 10243 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10244 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10245 break; 10246 } 10247 default: assert(false, "%s", type2name(src1_elem_bt)); break; 10248 } 10249 %} 10250 ins_pipe( pipe_slow ); 10251 %} 10252 10253 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 10254 predicate(Matcher::vector_length(n) <= 32); 10255 match(Set dst (MaskAll src)); 10256 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 10257 ins_encode %{ 10258 int mask_len = Matcher::vector_length(this); 10259 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 10260 %} 10261 ins_pipe( pipe_slow ); 10262 %} 10263 10264 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10265 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10266 match(Set dst (XorVMask src (MaskAll cnt))); 10267 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10268 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10269 ins_encode %{ 10270 uint masklen = Matcher::vector_length(this); 10271 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10272 %} 10273 ins_pipe( pipe_slow ); 10274 %} 10275 10276 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10277 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10278 (Matcher::vector_length(n) == 16) || 10279 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10280 match(Set dst (XorVMask src (MaskAll cnt))); 10281 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10282 ins_encode %{ 10283 uint masklen = Matcher::vector_length(this); 10284 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10285 %} 10286 ins_pipe( pipe_slow ); 10287 %} 10288 10289 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10290 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10291 match(Set dst (VectorLongToMask src)); 10292 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10293 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10294 ins_encode %{ 10295 int mask_len = Matcher::vector_length(this); 10296 int vec_enc = vector_length_encoding(mask_len); 10297 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10298 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10299 %} 10300 ins_pipe( pipe_slow ); 10301 %} 10302 10303 10304 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10305 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10306 match(Set dst (VectorLongToMask src)); 10307 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10308 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10309 ins_encode %{ 10310 int mask_len = Matcher::vector_length(this); 10311 assert(mask_len <= 32, "invalid mask length"); 10312 int vec_enc = vector_length_encoding(mask_len); 10313 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10314 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10315 %} 10316 ins_pipe( pipe_slow ); 10317 %} 10318 10319 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10320 predicate(n->bottom_type()->isa_vectmask()); 10321 match(Set dst (VectorLongToMask src)); 10322 format %{ "long_to_mask_evex $dst, $src\t!" %} 10323 ins_encode %{ 10324 __ kmov($dst$$KRegister, $src$$Register); 10325 %} 10326 ins_pipe( pipe_slow ); 10327 %} 10328 10329 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10330 match(Set dst (AndVMask src1 src2)); 10331 match(Set dst (OrVMask src1 src2)); 10332 match(Set dst (XorVMask src1 src2)); 10333 effect(TEMP kscratch); 10334 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10335 ins_encode %{ 10336 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10337 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10338 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal"); 10339 uint masklen = Matcher::vector_length(this); 10340 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10341 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10342 %} 10343 ins_pipe( pipe_slow ); 10344 %} 10345 10346 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10347 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10348 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10349 ins_encode %{ 10350 int vlen_enc = vector_length_encoding(this); 10351 BasicType bt = Matcher::vector_element_basic_type(this); 10352 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10353 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10354 %} 10355 ins_pipe( pipe_slow ); 10356 %} 10357 10358 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10359 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10360 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10361 ins_encode %{ 10362 int vlen_enc = vector_length_encoding(this); 10363 BasicType bt = Matcher::vector_element_basic_type(this); 10364 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10365 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10366 %} 10367 ins_pipe( pipe_slow ); 10368 %} 10369 10370 instruct castMM(kReg dst) 10371 %{ 10372 match(Set dst (CastVV dst)); 10373 10374 size(0); 10375 format %{ "# castVV of $dst" %} 10376 ins_encode(/* empty encoding */); 10377 ins_cost(0); 10378 ins_pipe(empty); 10379 %} 10380 10381 instruct castVV(vec dst) 10382 %{ 10383 match(Set dst (CastVV dst)); 10384 10385 size(0); 10386 format %{ "# castVV of $dst" %} 10387 ins_encode(/* empty encoding */); 10388 ins_cost(0); 10389 ins_pipe(empty); 10390 %} 10391 10392 instruct castVVLeg(legVec dst) 10393 %{ 10394 match(Set dst (CastVV dst)); 10395 10396 size(0); 10397 format %{ "# castVV of $dst" %} 10398 ins_encode(/* empty encoding */); 10399 ins_cost(0); 10400 ins_pipe(empty); 10401 %} 10402 10403 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10404 %{ 10405 match(Set dst (IsInfiniteF src)); 10406 effect(TEMP ktmp, KILL cr); 10407 format %{ "float_class_check $dst, $src" %} 10408 ins_encode %{ 10409 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10410 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10411 %} 10412 ins_pipe(pipe_slow); 10413 %} 10414 10415 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10416 %{ 10417 match(Set dst (IsInfiniteD src)); 10418 effect(TEMP ktmp, KILL cr); 10419 format %{ "double_class_check $dst, $src" %} 10420 ins_encode %{ 10421 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10422 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10423 %} 10424 ins_pipe(pipe_slow); 10425 %} 10426 10427 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2) 10428 %{ 10429 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10430 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10431 match(Set dst (SaturatingAddV src1 src2)); 10432 match(Set dst (SaturatingSubV src1 src2)); 10433 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10434 ins_encode %{ 10435 int vlen_enc = vector_length_encoding(this); 10436 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10437 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10438 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10439 %} 10440 ins_pipe(pipe_slow); 10441 %} 10442 10443 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2) 10444 %{ 10445 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10446 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10447 match(Set dst (SaturatingAddV src1 src2)); 10448 match(Set dst (SaturatingSubV src1 src2)); 10449 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10450 ins_encode %{ 10451 int vlen_enc = vector_length_encoding(this); 10452 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10453 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10454 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10455 %} 10456 ins_pipe(pipe_slow); 10457 %} 10458 10459 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2) 10460 %{ 10461 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10462 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10463 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10464 match(Set dst (SaturatingAddV src1 src2)); 10465 match(Set dst (SaturatingSubV src1 src2)); 10466 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2); 10467 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 10468 ins_encode %{ 10469 int vlen_enc = vector_length_encoding(this); 10470 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10471 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10472 $src1$$XMMRegister, $src2$$XMMRegister, 10473 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10474 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc); 10475 %} 10476 ins_pipe(pipe_slow); 10477 %} 10478 10479 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4) 10480 %{ 10481 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10482 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10483 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10484 match(Set dst (SaturatingAddV src1 src2)); 10485 match(Set dst (SaturatingSubV src1 src2)); 10486 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4); 10487 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 10488 ins_encode %{ 10489 int vlen_enc = vector_length_encoding(this); 10490 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10491 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10492 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10493 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc); 10494 %} 10495 ins_pipe(pipe_slow); 10496 %} 10497 10498 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp) 10499 %{ 10500 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10501 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10502 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10503 match(Set dst (SaturatingAddV src1 src2)); 10504 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp); 10505 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %} 10506 ins_encode %{ 10507 int vlen_enc = vector_length_encoding(this); 10508 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10509 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10510 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10511 %} 10512 ins_pipe(pipe_slow); 10513 %} 10514 10515 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3) 10516 %{ 10517 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10518 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10519 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10520 match(Set dst (SaturatingAddV src1 src2)); 10521 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 10522 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 10523 ins_encode %{ 10524 int vlen_enc = vector_length_encoding(this); 10525 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10526 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10527 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc); 10528 %} 10529 ins_pipe(pipe_slow); 10530 %} 10531 10532 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp) 10533 %{ 10534 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10535 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10536 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10537 match(Set dst (SaturatingSubV src1 src2)); 10538 effect(TEMP ktmp); 10539 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %} 10540 ins_encode %{ 10541 int vlen_enc = vector_length_encoding(this); 10542 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10543 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10544 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10545 %} 10546 ins_pipe(pipe_slow); 10547 %} 10548 10549 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) 10550 %{ 10551 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10552 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10553 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10554 match(Set dst (SaturatingSubV src1 src2)); 10555 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 10556 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %} 10557 ins_encode %{ 10558 int vlen_enc = vector_length_encoding(this); 10559 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10560 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10561 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 10562 %} 10563 ins_pipe(pipe_slow); 10564 %} 10565 10566 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2) 10567 %{ 10568 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10569 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10570 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10571 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10572 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10573 ins_encode %{ 10574 int vlen_enc = vector_length_encoding(this); 10575 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10576 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10577 $src1$$XMMRegister, $src2$$Address, false, vlen_enc); 10578 %} 10579 ins_pipe(pipe_slow); 10580 %} 10581 10582 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2) 10583 %{ 10584 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10585 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10586 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10587 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10588 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10589 ins_encode %{ 10590 int vlen_enc = vector_length_encoding(this); 10591 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10592 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10593 $src1$$XMMRegister, $src2$$Address, true, vlen_enc); 10594 %} 10595 ins_pipe(pipe_slow); 10596 %} 10597 10598 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10599 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10600 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10601 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10602 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10603 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10604 ins_encode %{ 10605 int vlen_enc = vector_length_encoding(this); 10606 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10607 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10608 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc); 10609 %} 10610 ins_pipe( pipe_slow ); 10611 %} 10612 10613 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10614 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10615 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10616 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10617 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10618 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10619 ins_encode %{ 10620 int vlen_enc = vector_length_encoding(this); 10621 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10622 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10623 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc); 10624 %} 10625 ins_pipe( pipe_slow ); 10626 %} 10627 10628 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10629 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10630 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10631 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10632 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10633 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10634 ins_encode %{ 10635 int vlen_enc = vector_length_encoding(this); 10636 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10637 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10638 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc); 10639 %} 10640 ins_pipe( pipe_slow ); 10641 %} 10642 10643 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10644 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10645 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10646 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10647 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10648 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10649 ins_encode %{ 10650 int vlen_enc = vector_length_encoding(this); 10651 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10652 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10653 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc); 10654 %} 10655 ins_pipe( pipe_slow ); 10656 %} 10657 10658 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2) 10659 %{ 10660 match(Set index (SelectFromTwoVector (Binary index src1) src2)); 10661 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %} 10662 ins_encode %{ 10663 int vlen_enc = vector_length_encoding(this); 10664 BasicType bt = Matcher::vector_element_basic_type(this); 10665 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10666 %} 10667 ins_pipe(pipe_slow); 10668 %} 10669 10670 instruct reinterpretS2HF(regF dst, rRegI src) 10671 %{ 10672 match(Set dst (ReinterpretS2HF src)); 10673 format %{ "vmovw $dst, $src" %} 10674 ins_encode %{ 10675 __ vmovw($dst$$XMMRegister, $src$$Register); 10676 %} 10677 ins_pipe(pipe_slow); 10678 %} 10679 10680 instruct convF2HFAndS2HF(regF dst, regF src) 10681 %{ 10682 match(Set dst (ReinterpretS2HF (ConvF2HF src))); 10683 format %{ "convF2HFAndS2HF $dst, $src" %} 10684 ins_encode %{ 10685 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 10686 %} 10687 ins_pipe(pipe_slow); 10688 %} 10689 10690 instruct convHF2SAndHF2F(regF dst, regF src) 10691 %{ 10692 match(Set dst (ConvHF2F (ReinterpretHF2S src))); 10693 format %{ "convHF2SAndHF2F $dst, $src" %} 10694 ins_encode %{ 10695 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit); 10696 %} 10697 ins_pipe(pipe_slow); 10698 %} 10699 10700 instruct reinterpretHF2S(rRegI dst, regF src) 10701 %{ 10702 match(Set dst (ReinterpretHF2S src)); 10703 format %{ "vmovw $dst, $src" %} 10704 ins_encode %{ 10705 __ vmovw($dst$$Register, $src$$XMMRegister); 10706 %} 10707 ins_pipe(pipe_slow); 10708 %} 10709 10710 instruct scalar_sqrt_HF_reg(regF dst, regF src) 10711 %{ 10712 match(Set dst (SqrtHF src)); 10713 format %{ "scalar_sqrt_fp16 $dst, $src" %} 10714 ins_encode %{ 10715 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister); 10716 %} 10717 ins_pipe(pipe_slow); 10718 %} 10719 10720 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2) 10721 %{ 10722 match(Set dst (AddHF src1 src2)); 10723 match(Set dst (DivHF src1 src2)); 10724 match(Set dst (MulHF src1 src2)); 10725 match(Set dst (SubHF src1 src2)); 10726 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %} 10727 ins_encode %{ 10728 int opcode = this->ideal_Opcode(); 10729 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 10730 %} 10731 ins_pipe(pipe_slow); 10732 %} 10733 10734 instruct scalar_minmax_HF_reg(regF dst, regF src1, regF src2, kReg ktmp, regF xtmp1, regF xtmp2) 10735 %{ 10736 match(Set dst (MaxHF src1 src2)); 10737 match(Set dst (MinHF src1 src2)); 10738 effect(TEMP_DEF dst, TEMP ktmp, TEMP xtmp1, TEMP xtmp2); 10739 format %{ "scalar_min_max_fp16 $dst, $src1, $src2\t using $ktmp, $xtmp1 and $xtmp2 as TEMP" %} 10740 ins_encode %{ 10741 int opcode = this->ideal_Opcode(); 10742 __ scalar_max_min_fp16(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $ktmp$$KRegister, 10743 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, Assembler::AVX_128bit); 10744 %} 10745 ins_pipe( pipe_slow ); 10746 %} 10747 10748 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2) 10749 %{ 10750 match(Set dst (FmaHF src2 (Binary dst src1))); 10751 effect(DEF dst); 10752 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10753 ins_encode %{ 10754 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister); 10755 %} 10756 ins_pipe( pipe_slow ); 10757 %}