1 // 2 // Copyright (c) 2011, 2025, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 // AVX3 Mask Registers. 632 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 633 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 634 635 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 636 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 637 638 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 639 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 640 641 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 642 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 643 644 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 645 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 646 647 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 648 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 649 650 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 651 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 652 653 654 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 655 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 656 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 657 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 658 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 659 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 660 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 661 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 662 #ifdef _LP64 663 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 664 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 665 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 666 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 667 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 668 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 669 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 670 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 671 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 672 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 673 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 674 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 675 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 676 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 677 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 678 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 679 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 680 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 681 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 682 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 683 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 684 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 685 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 686 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 687 #endif 688 ); 689 690 alloc_class chunk2(K7, K7_H, 691 K6, K6_H, 692 K5, K5_H, 693 K4, K4_H, 694 K3, K3_H, 695 K2, K2_H, 696 K1, K1_H); 697 698 reg_class vectmask_reg(K1, K1_H, 699 K2, K2_H, 700 K3, K3_H, 701 K4, K4_H, 702 K5, K5_H, 703 K6, K6_H, 704 K7, K7_H); 705 706 reg_class vectmask_reg_K1(K1, K1_H); 707 reg_class vectmask_reg_K2(K2, K2_H); 708 reg_class vectmask_reg_K3(K3, K3_H); 709 reg_class vectmask_reg_K4(K4, K4_H); 710 reg_class vectmask_reg_K5(K5, K5_H); 711 reg_class vectmask_reg_K6(K6, K6_H); 712 reg_class vectmask_reg_K7(K7, K7_H); 713 714 // flags allocation class should be last. 715 alloc_class chunk3(RFLAGS); 716 717 718 // Singleton class for condition codes 719 reg_class int_flags(RFLAGS); 720 721 // Class for pre evex float registers 722 reg_class float_reg_legacy(XMM0, 723 XMM1, 724 XMM2, 725 XMM3, 726 XMM4, 727 XMM5, 728 XMM6, 729 XMM7 730 #ifdef _LP64 731 ,XMM8, 732 XMM9, 733 XMM10, 734 XMM11, 735 XMM12, 736 XMM13, 737 XMM14, 738 XMM15 739 #endif 740 ); 741 742 // Class for evex float registers 743 reg_class float_reg_evex(XMM0, 744 XMM1, 745 XMM2, 746 XMM3, 747 XMM4, 748 XMM5, 749 XMM6, 750 XMM7 751 #ifdef _LP64 752 ,XMM8, 753 XMM9, 754 XMM10, 755 XMM11, 756 XMM12, 757 XMM13, 758 XMM14, 759 XMM15, 760 XMM16, 761 XMM17, 762 XMM18, 763 XMM19, 764 XMM20, 765 XMM21, 766 XMM22, 767 XMM23, 768 XMM24, 769 XMM25, 770 XMM26, 771 XMM27, 772 XMM28, 773 XMM29, 774 XMM30, 775 XMM31 776 #endif 777 ); 778 779 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 780 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 781 782 // Class for pre evex double registers 783 reg_class double_reg_legacy(XMM0, XMM0b, 784 XMM1, XMM1b, 785 XMM2, XMM2b, 786 XMM3, XMM3b, 787 XMM4, XMM4b, 788 XMM5, XMM5b, 789 XMM6, XMM6b, 790 XMM7, XMM7b 791 #ifdef _LP64 792 ,XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b 800 #endif 801 ); 802 803 // Class for evex double registers 804 reg_class double_reg_evex(XMM0, XMM0b, 805 XMM1, XMM1b, 806 XMM2, XMM2b, 807 XMM3, XMM3b, 808 XMM4, XMM4b, 809 XMM5, XMM5b, 810 XMM6, XMM6b, 811 XMM7, XMM7b 812 #ifdef _LP64 813 ,XMM8, XMM8b, 814 XMM9, XMM9b, 815 XMM10, XMM10b, 816 XMM11, XMM11b, 817 XMM12, XMM12b, 818 XMM13, XMM13b, 819 XMM14, XMM14b, 820 XMM15, XMM15b, 821 XMM16, XMM16b, 822 XMM17, XMM17b, 823 XMM18, XMM18b, 824 XMM19, XMM19b, 825 XMM20, XMM20b, 826 XMM21, XMM21b, 827 XMM22, XMM22b, 828 XMM23, XMM23b, 829 XMM24, XMM24b, 830 XMM25, XMM25b, 831 XMM26, XMM26b, 832 XMM27, XMM27b, 833 XMM28, XMM28b, 834 XMM29, XMM29b, 835 XMM30, XMM30b, 836 XMM31, XMM31b 837 #endif 838 ); 839 840 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 841 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 842 843 // Class for pre evex 32bit vector registers 844 reg_class vectors_reg_legacy(XMM0, 845 XMM1, 846 XMM2, 847 XMM3, 848 XMM4, 849 XMM5, 850 XMM6, 851 XMM7 852 #ifdef _LP64 853 ,XMM8, 854 XMM9, 855 XMM10, 856 XMM11, 857 XMM12, 858 XMM13, 859 XMM14, 860 XMM15 861 #endif 862 ); 863 864 // Class for evex 32bit vector registers 865 reg_class vectors_reg_evex(XMM0, 866 XMM1, 867 XMM2, 868 XMM3, 869 XMM4, 870 XMM5, 871 XMM6, 872 XMM7 873 #ifdef _LP64 874 ,XMM8, 875 XMM9, 876 XMM10, 877 XMM11, 878 XMM12, 879 XMM13, 880 XMM14, 881 XMM15, 882 XMM16, 883 XMM17, 884 XMM18, 885 XMM19, 886 XMM20, 887 XMM21, 888 XMM22, 889 XMM23, 890 XMM24, 891 XMM25, 892 XMM26, 893 XMM27, 894 XMM28, 895 XMM29, 896 XMM30, 897 XMM31 898 #endif 899 ); 900 901 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 902 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 903 904 // Class for all 64bit vector registers 905 reg_class vectord_reg_legacy(XMM0, XMM0b, 906 XMM1, XMM1b, 907 XMM2, XMM2b, 908 XMM3, XMM3b, 909 XMM4, XMM4b, 910 XMM5, XMM5b, 911 XMM6, XMM6b, 912 XMM7, XMM7b 913 #ifdef _LP64 914 ,XMM8, XMM8b, 915 XMM9, XMM9b, 916 XMM10, XMM10b, 917 XMM11, XMM11b, 918 XMM12, XMM12b, 919 XMM13, XMM13b, 920 XMM14, XMM14b, 921 XMM15, XMM15b 922 #endif 923 ); 924 925 // Class for all 64bit vector registers 926 reg_class vectord_reg_evex(XMM0, XMM0b, 927 XMM1, XMM1b, 928 XMM2, XMM2b, 929 XMM3, XMM3b, 930 XMM4, XMM4b, 931 XMM5, XMM5b, 932 XMM6, XMM6b, 933 XMM7, XMM7b 934 #ifdef _LP64 935 ,XMM8, XMM8b, 936 XMM9, XMM9b, 937 XMM10, XMM10b, 938 XMM11, XMM11b, 939 XMM12, XMM12b, 940 XMM13, XMM13b, 941 XMM14, XMM14b, 942 XMM15, XMM15b, 943 XMM16, XMM16b, 944 XMM17, XMM17b, 945 XMM18, XMM18b, 946 XMM19, XMM19b, 947 XMM20, XMM20b, 948 XMM21, XMM21b, 949 XMM22, XMM22b, 950 XMM23, XMM23b, 951 XMM24, XMM24b, 952 XMM25, XMM25b, 953 XMM26, XMM26b, 954 XMM27, XMM27b, 955 XMM28, XMM28b, 956 XMM29, XMM29b, 957 XMM30, XMM30b, 958 XMM31, XMM31b 959 #endif 960 ); 961 962 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 963 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 964 965 // Class for all 128bit vector registers 966 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 967 XMM1, XMM1b, XMM1c, XMM1d, 968 XMM2, XMM2b, XMM2c, XMM2d, 969 XMM3, XMM3b, XMM3c, XMM3d, 970 XMM4, XMM4b, XMM4c, XMM4d, 971 XMM5, XMM5b, XMM5c, XMM5d, 972 XMM6, XMM6b, XMM6c, XMM6d, 973 XMM7, XMM7b, XMM7c, XMM7d 974 #ifdef _LP64 975 ,XMM8, XMM8b, XMM8c, XMM8d, 976 XMM9, XMM9b, XMM9c, XMM9d, 977 XMM10, XMM10b, XMM10c, XMM10d, 978 XMM11, XMM11b, XMM11c, XMM11d, 979 XMM12, XMM12b, XMM12c, XMM12d, 980 XMM13, XMM13b, XMM13c, XMM13d, 981 XMM14, XMM14b, XMM14c, XMM14d, 982 XMM15, XMM15b, XMM15c, XMM15d 983 #endif 984 ); 985 986 // Class for all 128bit vector registers 987 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 988 XMM1, XMM1b, XMM1c, XMM1d, 989 XMM2, XMM2b, XMM2c, XMM2d, 990 XMM3, XMM3b, XMM3c, XMM3d, 991 XMM4, XMM4b, XMM4c, XMM4d, 992 XMM5, XMM5b, XMM5c, XMM5d, 993 XMM6, XMM6b, XMM6c, XMM6d, 994 XMM7, XMM7b, XMM7c, XMM7d 995 #ifdef _LP64 996 ,XMM8, XMM8b, XMM8c, XMM8d, 997 XMM9, XMM9b, XMM9c, XMM9d, 998 XMM10, XMM10b, XMM10c, XMM10d, 999 XMM11, XMM11b, XMM11c, XMM11d, 1000 XMM12, XMM12b, XMM12c, XMM12d, 1001 XMM13, XMM13b, XMM13c, XMM13d, 1002 XMM14, XMM14b, XMM14c, XMM14d, 1003 XMM15, XMM15b, XMM15c, XMM15d, 1004 XMM16, XMM16b, XMM16c, XMM16d, 1005 XMM17, XMM17b, XMM17c, XMM17d, 1006 XMM18, XMM18b, XMM18c, XMM18d, 1007 XMM19, XMM19b, XMM19c, XMM19d, 1008 XMM20, XMM20b, XMM20c, XMM20d, 1009 XMM21, XMM21b, XMM21c, XMM21d, 1010 XMM22, XMM22b, XMM22c, XMM22d, 1011 XMM23, XMM23b, XMM23c, XMM23d, 1012 XMM24, XMM24b, XMM24c, XMM24d, 1013 XMM25, XMM25b, XMM25c, XMM25d, 1014 XMM26, XMM26b, XMM26c, XMM26d, 1015 XMM27, XMM27b, XMM27c, XMM27d, 1016 XMM28, XMM28b, XMM28c, XMM28d, 1017 XMM29, XMM29b, XMM29c, XMM29d, 1018 XMM30, XMM30b, XMM30c, XMM30d, 1019 XMM31, XMM31b, XMM31c, XMM31d 1020 #endif 1021 ); 1022 1023 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1024 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1025 1026 // Class for all 256bit vector registers 1027 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1028 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1029 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1030 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1031 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1032 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1033 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1034 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1035 #ifdef _LP64 1036 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1037 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1038 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1039 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1040 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1041 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1042 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1043 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1044 #endif 1045 ); 1046 1047 // Class for all 256bit vector registers 1048 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1049 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1050 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1051 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1052 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1053 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1054 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1055 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1056 #ifdef _LP64 1057 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1058 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1059 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1060 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1061 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1062 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1063 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1064 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1065 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1066 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1067 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1068 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1069 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1070 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1071 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1072 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1073 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1074 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1075 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1076 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1077 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1078 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1079 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1080 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1081 #endif 1082 ); 1083 1084 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1085 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1086 1087 // Class for all 512bit vector registers 1088 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1089 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1090 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1091 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1092 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1093 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1094 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1095 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1096 #ifdef _LP64 1097 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1098 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1099 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1100 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1101 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1102 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1103 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1104 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1105 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1106 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1107 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1108 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1109 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1110 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1111 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1112 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1113 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1114 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1115 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1116 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1117 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1118 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1119 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1120 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1121 #endif 1122 ); 1123 1124 // Class for restricted 512bit vector registers 1125 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1126 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1127 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1128 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1129 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1130 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1131 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1132 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1133 #ifdef _LP64 1134 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1135 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1136 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1137 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1138 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1139 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1140 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1141 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1142 #endif 1143 ); 1144 1145 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1146 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1147 1148 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1149 %} 1150 1151 1152 //----------SOURCE BLOCK------------------------------------------------------- 1153 // This is a block of C++ code which provides values, functions, and 1154 // definitions necessary in the rest of the architecture description 1155 1156 source_hpp %{ 1157 // Header information of the source block. 1158 // Method declarations/definitions which are used outside 1159 // the ad-scope can conveniently be defined here. 1160 // 1161 // To keep related declarations/definitions/uses close together, 1162 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1163 1164 #include "runtime/vm_version.hpp" 1165 1166 class NativeJump; 1167 1168 class CallStubImpl { 1169 1170 //-------------------------------------------------------------- 1171 //---< Used for optimization in Compile::shorten_branches >--- 1172 //-------------------------------------------------------------- 1173 1174 public: 1175 // Size of call trampoline stub. 1176 static uint size_call_trampoline() { 1177 return 0; // no call trampolines on this platform 1178 } 1179 1180 // number of relocations needed by a call trampoline stub 1181 static uint reloc_call_trampoline() { 1182 return 0; // no call trampolines on this platform 1183 } 1184 }; 1185 1186 class HandlerImpl { 1187 1188 public: 1189 1190 static int emit_exception_handler(C2_MacroAssembler *masm); 1191 static int emit_deopt_handler(C2_MacroAssembler* masm); 1192 1193 static uint size_exception_handler() { 1194 // NativeCall instruction size is the same as NativeJump. 1195 // exception handler starts out as jump and can be patched to 1196 // a call be deoptimization. (4932387) 1197 // Note that this value is also credited (in output.cpp) to 1198 // the size of the code section. 1199 return NativeJump::instruction_size; 1200 } 1201 1202 #ifdef _LP64 1203 static uint size_deopt_handler() { 1204 // three 5 byte instructions plus one move for unreachable address. 1205 return 15+3; 1206 } 1207 #else 1208 static uint size_deopt_handler() { 1209 // NativeCall instruction size is the same as NativeJump. 1210 // exception handler starts out as jump and can be patched to 1211 // a call be deoptimization. (4932387) 1212 // Note that this value is also credited (in output.cpp) to 1213 // the size of the code section. 1214 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1215 } 1216 #endif 1217 }; 1218 1219 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1220 switch(bytes) { 1221 case 4: // fall-through 1222 case 8: // fall-through 1223 case 16: return Assembler::AVX_128bit; 1224 case 32: return Assembler::AVX_256bit; 1225 case 64: return Assembler::AVX_512bit; 1226 1227 default: { 1228 ShouldNotReachHere(); 1229 return Assembler::AVX_NoVec; 1230 } 1231 } 1232 } 1233 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1236 } 1237 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1239 uint def_idx = use->operand_index(opnd); 1240 Node* def = use->in(def_idx); 1241 return vector_length_encoding(def); 1242 } 1243 1244 static inline bool is_vector_popcount_predicate(BasicType bt) { 1245 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1246 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1247 } 1248 1249 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1250 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1251 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1252 } 1253 1254 class Node::PD { 1255 public: 1256 enum NodeFlags { 1257 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1258 Flag_sets_carry_flag = Node::_last_flag << 2, 1259 Flag_sets_parity_flag = Node::_last_flag << 3, 1260 Flag_sets_zero_flag = Node::_last_flag << 4, 1261 Flag_sets_overflow_flag = Node::_last_flag << 5, 1262 Flag_sets_sign_flag = Node::_last_flag << 6, 1263 Flag_clears_carry_flag = Node::_last_flag << 7, 1264 Flag_clears_parity_flag = Node::_last_flag << 8, 1265 Flag_clears_zero_flag = Node::_last_flag << 9, 1266 Flag_clears_overflow_flag = Node::_last_flag << 10, 1267 Flag_clears_sign_flag = Node::_last_flag << 11, 1268 _last_flag = Flag_clears_sign_flag 1269 }; 1270 }; 1271 1272 %} // end source_hpp 1273 1274 source %{ 1275 1276 #include "opto/addnode.hpp" 1277 #include "c2_intelJccErratum_x86.hpp" 1278 1279 void PhaseOutput::pd_perform_mach_node_analysis() { 1280 if (VM_Version::has_intel_jcc_erratum()) { 1281 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1282 _buf_sizes._code += extra_padding; 1283 } 1284 } 1285 1286 int MachNode::pd_alignment_required() const { 1287 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1288 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1289 return IntelJccErratum::largest_jcc_size() + 1; 1290 } else { 1291 return 1; 1292 } 1293 } 1294 1295 int MachNode::compute_padding(int current_offset) const { 1296 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1297 Compile* C = Compile::current(); 1298 PhaseOutput* output = C->output(); 1299 Block* block = output->block(); 1300 int index = output->index(); 1301 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1302 } else { 1303 return 0; 1304 } 1305 } 1306 1307 // Emit exception handler code. 1308 // Stuff framesize into a register and call a VM stub routine. 1309 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) { 1310 1311 // Note that the code buffer's insts_mark is always relative to insts. 1312 // That's why we must use the macroassembler to generate a handler. 1313 address base = __ start_a_stub(size_exception_handler()); 1314 if (base == nullptr) { 1315 ciEnv::current()->record_failure("CodeCache is full"); 1316 return 0; // CodeBuffer::expand failed 1317 } 1318 int offset = __ offset(); 1319 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1320 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1321 __ end_a_stub(); 1322 return offset; 1323 } 1324 1325 // Emit deopt handler code. 1326 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 address base = __ start_a_stub(size_deopt_handler()); 1331 if (base == nullptr) { 1332 ciEnv::current()->record_failure("CodeCache is full"); 1333 return 0; // CodeBuffer::expand failed 1334 } 1335 int offset = __ offset(); 1336 1337 #ifdef _LP64 1338 address the_pc = (address) __ pc(); 1339 Label next; 1340 // push a "the_pc" on the stack without destroying any registers 1341 // as they all may be live. 1342 1343 // push address of "next" 1344 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1345 __ bind(next); 1346 // adjust it so it matches "the_pc" 1347 __ subptr(Address(rsp, 0), __ offset() - offset); 1348 #else 1349 InternalAddress here(__ pc()); 1350 __ pushptr(here.addr(), noreg); 1351 #endif 1352 1353 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1354 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1355 __ end_a_stub(); 1356 return offset; 1357 } 1358 1359 static Assembler::Width widthForType(BasicType bt) { 1360 if (bt == T_BYTE) { 1361 return Assembler::B; 1362 } else if (bt == T_SHORT) { 1363 return Assembler::W; 1364 } else if (bt == T_INT) { 1365 return Assembler::D; 1366 } else { 1367 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1368 return Assembler::Q; 1369 } 1370 } 1371 1372 //============================================================================= 1373 1374 // Float masks come from different places depending on platform. 1375 #ifdef _LP64 1376 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1377 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1378 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1379 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1380 #else 1381 static address float_signmask() { return (address)float_signmask_pool; } 1382 static address float_signflip() { return (address)float_signflip_pool; } 1383 static address double_signmask() { return (address)double_signmask_pool; } 1384 static address double_signflip() { return (address)double_signflip_pool; } 1385 #endif 1386 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1387 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1388 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1389 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1390 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1391 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1392 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1393 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1394 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1395 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1396 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1397 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1398 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1399 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1400 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1401 1402 //============================================================================= 1403 bool Matcher::match_rule_supported(int opcode) { 1404 if (!has_match_rule(opcode)) { 1405 return false; // no match rule present 1406 } 1407 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1408 switch (opcode) { 1409 case Op_AbsVL: 1410 case Op_StoreVectorScatter: 1411 if (UseAVX < 3) { 1412 return false; 1413 } 1414 break; 1415 case Op_PopCountI: 1416 case Op_PopCountL: 1417 if (!UsePopCountInstruction) { 1418 return false; 1419 } 1420 break; 1421 case Op_PopCountVI: 1422 if (UseAVX < 2) { 1423 return false; 1424 } 1425 break; 1426 case Op_CompressV: 1427 case Op_ExpandV: 1428 case Op_PopCountVL: 1429 if (UseAVX < 2) { 1430 return false; 1431 } 1432 break; 1433 case Op_MulVI: 1434 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1435 return false; 1436 } 1437 break; 1438 case Op_MulVL: 1439 if (UseSSE < 4) { // only with SSE4_1 or AVX 1440 return false; 1441 } 1442 break; 1443 case Op_MulReductionVL: 1444 if (VM_Version::supports_avx512dq() == false) { 1445 return false; 1446 } 1447 break; 1448 case Op_AddReductionVL: 1449 if (UseSSE < 2) { // requires at least SSE2 1450 return false; 1451 } 1452 break; 1453 case Op_AbsVB: 1454 case Op_AbsVS: 1455 case Op_AbsVI: 1456 case Op_AddReductionVI: 1457 case Op_AndReductionV: 1458 case Op_OrReductionV: 1459 case Op_XorReductionV: 1460 if (UseSSE < 3) { // requires at least SSSE3 1461 return false; 1462 } 1463 break; 1464 case Op_AddHF: 1465 case Op_DivHF: 1466 case Op_FmaHF: 1467 case Op_MaxHF: 1468 case Op_MinHF: 1469 case Op_MulHF: 1470 case Op_ReinterpretS2HF: 1471 case Op_ReinterpretHF2S: 1472 case Op_SubHF: 1473 case Op_SqrtHF: 1474 if (!VM_Version::supports_avx512_fp16()) { 1475 return false; 1476 } 1477 break; 1478 case Op_VectorLoadShuffle: 1479 case Op_VectorRearrange: 1480 case Op_MulReductionVI: 1481 if (UseSSE < 4) { // requires at least SSE4 1482 return false; 1483 } 1484 break; 1485 case Op_IsInfiniteF: 1486 case Op_IsInfiniteD: 1487 if (!VM_Version::supports_avx512dq()) { 1488 return false; 1489 } 1490 break; 1491 case Op_SqrtVD: 1492 case Op_SqrtVF: 1493 case Op_VectorMaskCmp: 1494 case Op_VectorCastB2X: 1495 case Op_VectorCastS2X: 1496 case Op_VectorCastI2X: 1497 case Op_VectorCastL2X: 1498 case Op_VectorCastF2X: 1499 case Op_VectorCastD2X: 1500 case Op_VectorUCastB2X: 1501 case Op_VectorUCastS2X: 1502 case Op_VectorUCastI2X: 1503 case Op_VectorMaskCast: 1504 if (UseAVX < 1) { // enabled for AVX only 1505 return false; 1506 } 1507 break; 1508 case Op_PopulateIndex: 1509 if (!is_LP64 || (UseAVX < 2)) { 1510 return false; 1511 } 1512 break; 1513 case Op_RoundVF: 1514 if (UseAVX < 2) { // enabled for AVX2 only 1515 return false; 1516 } 1517 break; 1518 case Op_RoundVD: 1519 if (UseAVX < 3) { 1520 return false; // enabled for AVX3 only 1521 } 1522 break; 1523 case Op_CompareAndSwapL: 1524 #ifdef _LP64 1525 case Op_CompareAndSwapP: 1526 #endif 1527 break; 1528 case Op_StrIndexOf: 1529 if (!UseSSE42Intrinsics) { 1530 return false; 1531 } 1532 break; 1533 case Op_StrIndexOfChar: 1534 if (!UseSSE42Intrinsics) { 1535 return false; 1536 } 1537 break; 1538 case Op_OnSpinWait: 1539 if (VM_Version::supports_on_spin_wait() == false) { 1540 return false; 1541 } 1542 break; 1543 case Op_MulVB: 1544 case Op_LShiftVB: 1545 case Op_RShiftVB: 1546 case Op_URShiftVB: 1547 case Op_VectorInsert: 1548 case Op_VectorLoadMask: 1549 case Op_VectorStoreMask: 1550 case Op_VectorBlend: 1551 if (UseSSE < 4) { 1552 return false; 1553 } 1554 break; 1555 #ifdef _LP64 1556 case Op_MaxD: 1557 case Op_MaxF: 1558 case Op_MinD: 1559 case Op_MinF: 1560 if (UseAVX < 1) { // enabled for AVX only 1561 return false; 1562 } 1563 break; 1564 #endif 1565 case Op_CacheWB: 1566 case Op_CacheWBPreSync: 1567 case Op_CacheWBPostSync: 1568 if (!VM_Version::supports_data_cache_line_flush()) { 1569 return false; 1570 } 1571 break; 1572 case Op_ExtractB: 1573 case Op_ExtractL: 1574 case Op_ExtractI: 1575 case Op_RoundDoubleMode: 1576 if (UseSSE < 4) { 1577 return false; 1578 } 1579 break; 1580 case Op_RoundDoubleModeV: 1581 if (VM_Version::supports_avx() == false) { 1582 return false; // 128bit vroundpd is not available 1583 } 1584 break; 1585 case Op_LoadVectorGather: 1586 case Op_LoadVectorGatherMasked: 1587 if (UseAVX < 2) { 1588 return false; 1589 } 1590 break; 1591 case Op_FmaF: 1592 case Op_FmaD: 1593 case Op_FmaVD: 1594 case Op_FmaVF: 1595 if (!UseFMA) { 1596 return false; 1597 } 1598 break; 1599 case Op_MacroLogicV: 1600 if (UseAVX < 3 || !UseVectorMacroLogic) { 1601 return false; 1602 } 1603 break; 1604 1605 case Op_VectorCmpMasked: 1606 case Op_VectorMaskGen: 1607 if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { 1608 return false; 1609 } 1610 break; 1611 case Op_VectorMaskFirstTrue: 1612 case Op_VectorMaskLastTrue: 1613 case Op_VectorMaskTrueCount: 1614 case Op_VectorMaskToLong: 1615 if (!is_LP64 || UseAVX < 1) { 1616 return false; 1617 } 1618 break; 1619 case Op_RoundF: 1620 case Op_RoundD: 1621 if (!is_LP64) { 1622 return false; 1623 } 1624 break; 1625 case Op_CopySignD: 1626 case Op_CopySignF: 1627 if (UseAVX < 3 || !is_LP64) { 1628 return false; 1629 } 1630 if (!VM_Version::supports_avx512vl()) { 1631 return false; 1632 } 1633 break; 1634 #ifndef _LP64 1635 case Op_AddReductionVF: 1636 case Op_AddReductionVD: 1637 case Op_MulReductionVF: 1638 case Op_MulReductionVD: 1639 if (UseSSE < 1) { // requires at least SSE 1640 return false; 1641 } 1642 break; 1643 case Op_MulAddVS2VI: 1644 case Op_RShiftVL: 1645 case Op_AbsVD: 1646 case Op_NegVD: 1647 if (UseSSE < 2) { 1648 return false; 1649 } 1650 break; 1651 #endif // !LP64 1652 case Op_CompressBits: 1653 if (!VM_Version::supports_bmi2() || (!is_LP64 && UseSSE < 2)) { 1654 return false; 1655 } 1656 break; 1657 case Op_ExpandBits: 1658 if (!VM_Version::supports_bmi2() || (!is_LP64 && (UseSSE < 2 || !VM_Version::supports_bmi1()))) { 1659 return false; 1660 } 1661 break; 1662 case Op_SignumF: 1663 if (UseSSE < 1) { 1664 return false; 1665 } 1666 break; 1667 case Op_SignumD: 1668 if (UseSSE < 2) { 1669 return false; 1670 } 1671 break; 1672 case Op_CompressM: 1673 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1674 return false; 1675 } 1676 break; 1677 case Op_SqrtF: 1678 if (UseSSE < 1) { 1679 return false; 1680 } 1681 break; 1682 case Op_SqrtD: 1683 #ifdef _LP64 1684 if (UseSSE < 2) { 1685 return false; 1686 } 1687 #else 1688 // x86_32.ad has a special match rule for SqrtD. 1689 // Together with common x86 rules, this handles all UseSSE cases. 1690 #endif 1691 break; 1692 case Op_ConvF2HF: 1693 case Op_ConvHF2F: 1694 if (!VM_Version::supports_float16()) { 1695 return false; 1696 } 1697 break; 1698 case Op_VectorCastF2HF: 1699 case Op_VectorCastHF2F: 1700 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1701 return false; 1702 } 1703 break; 1704 } 1705 return true; // Match rules are supported by default. 1706 } 1707 1708 //------------------------------------------------------------------------ 1709 1710 static inline bool is_pop_count_instr_target(BasicType bt) { 1711 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1712 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1713 } 1714 1715 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) { 1716 return match_rule_supported_vector(opcode, vlen, bt); 1717 } 1718 1719 // Identify extra cases that we might want to provide match rules for vector nodes and 1720 // other intrinsics guarded with vector length (vlen) and element type (bt). 1721 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1722 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1723 if (!match_rule_supported(opcode)) { 1724 return false; 1725 } 1726 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1727 // * SSE2 supports 128bit vectors for all types; 1728 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1729 // * AVX2 supports 256bit vectors for all types; 1730 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1731 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1732 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1733 // And MaxVectorSize is taken into account as well. 1734 if (!vector_size_supported(bt, vlen)) { 1735 return false; 1736 } 1737 // Special cases which require vector length follow: 1738 // * implementation limitations 1739 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1740 // * 128bit vroundpd instruction is present only in AVX1 1741 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1742 switch (opcode) { 1743 case Op_AbsVF: 1744 case Op_NegVF: 1745 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1746 return false; // 512bit vandps and vxorps are not available 1747 } 1748 break; 1749 case Op_AbsVD: 1750 case Op_NegVD: 1751 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1752 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1753 } 1754 break; 1755 case Op_RotateRightV: 1756 case Op_RotateLeftV: 1757 if (bt != T_INT && bt != T_LONG) { 1758 return false; 1759 } // fallthrough 1760 case Op_MacroLogicV: 1761 if (!VM_Version::supports_evex() || 1762 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1763 return false; 1764 } 1765 break; 1766 case Op_ClearArray: 1767 case Op_VectorMaskGen: 1768 case Op_VectorCmpMasked: 1769 if (!is_LP64 || !VM_Version::supports_avx512bw()) { 1770 return false; 1771 } 1772 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1773 return false; 1774 } 1775 break; 1776 case Op_LoadVectorMasked: 1777 case Op_StoreVectorMasked: 1778 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1779 return false; 1780 } 1781 break; 1782 case Op_UMinV: 1783 case Op_UMaxV: 1784 if (UseAVX == 0) { 1785 return false; 1786 } 1787 break; 1788 case Op_MaxV: 1789 case Op_MinV: 1790 if (UseSSE < 4 && is_integral_type(bt)) { 1791 return false; 1792 } 1793 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1794 // Float/Double intrinsics are enabled for AVX family currently. 1795 if (UseAVX == 0) { 1796 return false; 1797 } 1798 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1799 return false; 1800 } 1801 } 1802 break; 1803 case Op_CallLeafVector: 1804 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1805 return false; 1806 } 1807 break; 1808 case Op_AddReductionVI: 1809 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1810 return false; 1811 } 1812 // fallthrough 1813 case Op_AndReductionV: 1814 case Op_OrReductionV: 1815 case Op_XorReductionV: 1816 if (is_subword_type(bt) && (UseSSE < 4)) { 1817 return false; 1818 } 1819 #ifndef _LP64 1820 if (bt == T_BYTE || bt == T_LONG) { 1821 return false; 1822 } 1823 #endif 1824 break; 1825 #ifndef _LP64 1826 case Op_VectorInsert: 1827 if (bt == T_LONG || bt == T_DOUBLE) { 1828 return false; 1829 } 1830 break; 1831 #endif 1832 case Op_MinReductionV: 1833 case Op_MaxReductionV: 1834 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1835 return false; 1836 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1837 return false; 1838 } 1839 // Float/Double intrinsics enabled for AVX family. 1840 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1841 return false; 1842 } 1843 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1844 return false; 1845 } 1846 #ifndef _LP64 1847 if (bt == T_BYTE || bt == T_LONG) { 1848 return false; 1849 } 1850 #endif 1851 break; 1852 case Op_VectorTest: 1853 if (UseSSE < 4) { 1854 return false; // Implementation limitation 1855 } else if (size_in_bits < 32) { 1856 return false; // Implementation limitation 1857 } 1858 break; 1859 case Op_VectorLoadShuffle: 1860 case Op_VectorRearrange: 1861 if(vlen == 2) { 1862 return false; // Implementation limitation due to how shuffle is loaded 1863 } else if (size_in_bits == 256 && UseAVX < 2) { 1864 return false; // Implementation limitation 1865 } 1866 break; 1867 case Op_VectorLoadMask: 1868 case Op_VectorMaskCast: 1869 if (size_in_bits == 256 && UseAVX < 2) { 1870 return false; // Implementation limitation 1871 } 1872 // fallthrough 1873 case Op_VectorStoreMask: 1874 if (vlen == 2) { 1875 return false; // Implementation limitation 1876 } 1877 break; 1878 case Op_PopulateIndex: 1879 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1880 return false; 1881 } 1882 break; 1883 case Op_VectorCastB2X: 1884 case Op_VectorCastS2X: 1885 case Op_VectorCastI2X: 1886 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1887 return false; 1888 } 1889 break; 1890 case Op_VectorCastL2X: 1891 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1892 return false; 1893 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1894 return false; 1895 } 1896 break; 1897 case Op_VectorCastF2X: { 1898 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1899 // happen after intermediate conversion to integer and special handling 1900 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1901 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1902 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1903 return false; 1904 } 1905 } 1906 // fallthrough 1907 case Op_VectorCastD2X: 1908 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1909 return false; 1910 } 1911 break; 1912 case Op_VectorCastF2HF: 1913 case Op_VectorCastHF2F: 1914 if (!VM_Version::supports_f16c() && 1915 ((!VM_Version::supports_evex() || 1916 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1917 return false; 1918 } 1919 break; 1920 case Op_RoundVD: 1921 if (!VM_Version::supports_avx512dq()) { 1922 return false; 1923 } 1924 break; 1925 case Op_MulReductionVI: 1926 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1927 return false; 1928 } 1929 break; 1930 case Op_LoadVectorGatherMasked: 1931 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1932 return false; 1933 } 1934 if (is_subword_type(bt) && 1935 (!is_LP64 || 1936 (size_in_bits > 256 && !VM_Version::supports_avx512bw()) || 1937 (size_in_bits < 64) || 1938 (bt == T_SHORT && !VM_Version::supports_bmi2()))) { 1939 return false; 1940 } 1941 break; 1942 case Op_StoreVectorScatterMasked: 1943 case Op_StoreVectorScatter: 1944 if (is_subword_type(bt)) { 1945 return false; 1946 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1947 return false; 1948 } 1949 // fallthrough 1950 case Op_LoadVectorGather: 1951 if (!is_subword_type(bt) && size_in_bits == 64) { 1952 return false; 1953 } 1954 if (is_subword_type(bt) && size_in_bits < 64) { 1955 return false; 1956 } 1957 break; 1958 case Op_SaturatingAddV: 1959 case Op_SaturatingSubV: 1960 if (UseAVX < 1) { 1961 return false; // Implementation limitation 1962 } 1963 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1964 return false; 1965 } 1966 break; 1967 case Op_SelectFromTwoVector: 1968 if (size_in_bits < 128 || (size_in_bits < 512 && !VM_Version::supports_avx512vl())) { 1969 return false; 1970 } 1971 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 1972 return false; 1973 } 1974 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 1975 return false; 1976 } 1977 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) { 1978 return false; 1979 } 1980 break; 1981 case Op_MaskAll: 1982 if (!VM_Version::supports_evex()) { 1983 return false; 1984 } 1985 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1986 return false; 1987 } 1988 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1989 return false; 1990 } 1991 break; 1992 case Op_VectorMaskCmp: 1993 if (vlen < 2 || size_in_bits < 32) { 1994 return false; 1995 } 1996 break; 1997 case Op_CompressM: 1998 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1999 return false; 2000 } 2001 break; 2002 case Op_CompressV: 2003 case Op_ExpandV: 2004 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 2005 return false; 2006 } 2007 if (!is_LP64 && !VM_Version::supports_avx512vl() && size_in_bits < 512) { 2008 return false; 2009 } 2010 if (size_in_bits < 128 ) { 2011 return false; 2012 } 2013 case Op_VectorLongToMask: 2014 if (UseAVX < 1 || !is_LP64) { 2015 return false; 2016 } 2017 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 2018 return false; 2019 } 2020 break; 2021 case Op_SignumVD: 2022 case Op_SignumVF: 2023 if (UseAVX < 1) { 2024 return false; 2025 } 2026 break; 2027 case Op_PopCountVI: 2028 case Op_PopCountVL: { 2029 if (!is_pop_count_instr_target(bt) && 2030 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 2031 return false; 2032 } 2033 } 2034 break; 2035 case Op_ReverseV: 2036 case Op_ReverseBytesV: 2037 if (UseAVX < 2) { 2038 return false; 2039 } 2040 break; 2041 case Op_CountTrailingZerosV: 2042 case Op_CountLeadingZerosV: 2043 if (UseAVX < 2) { 2044 return false; 2045 } 2046 break; 2047 } 2048 return true; // Per default match rules are supported. 2049 } 2050 2051 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 2052 // ADLC based match_rule_supported routine checks for the existence of pattern based 2053 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 2054 // of their non-masked counterpart with mask edge being the differentiator. 2055 // This routine does a strict check on the existence of masked operation patterns 2056 // by returning a default false value for all the other opcodes apart from the 2057 // ones whose masked instruction patterns are defined in this file. 2058 if (!match_rule_supported_vector(opcode, vlen, bt)) { 2059 return false; 2060 } 2061 2062 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 2063 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 2064 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 2065 return false; 2066 } 2067 switch(opcode) { 2068 // Unary masked operations 2069 case Op_AbsVB: 2070 case Op_AbsVS: 2071 if(!VM_Version::supports_avx512bw()) { 2072 return false; // Implementation limitation 2073 } 2074 case Op_AbsVI: 2075 case Op_AbsVL: 2076 return true; 2077 2078 // Ternary masked operations 2079 case Op_FmaVF: 2080 case Op_FmaVD: 2081 return true; 2082 2083 case Op_MacroLogicV: 2084 if(bt != T_INT && bt != T_LONG) { 2085 return false; 2086 } 2087 return true; 2088 2089 // Binary masked operations 2090 case Op_AddVB: 2091 case Op_AddVS: 2092 case Op_SubVB: 2093 case Op_SubVS: 2094 case Op_MulVS: 2095 case Op_LShiftVS: 2096 case Op_RShiftVS: 2097 case Op_URShiftVS: 2098 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2099 if (!VM_Version::supports_avx512bw()) { 2100 return false; // Implementation limitation 2101 } 2102 return true; 2103 2104 case Op_MulVL: 2105 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2106 if (!VM_Version::supports_avx512dq()) { 2107 return false; // Implementation limitation 2108 } 2109 return true; 2110 2111 case Op_AndV: 2112 case Op_OrV: 2113 case Op_XorV: 2114 case Op_RotateRightV: 2115 case Op_RotateLeftV: 2116 if (bt != T_INT && bt != T_LONG) { 2117 return false; // Implementation limitation 2118 } 2119 return true; 2120 2121 case Op_VectorLoadMask: 2122 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2123 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2124 return false; 2125 } 2126 return true; 2127 2128 case Op_AddVI: 2129 case Op_AddVL: 2130 case Op_AddVF: 2131 case Op_AddVD: 2132 case Op_SubVI: 2133 case Op_SubVL: 2134 case Op_SubVF: 2135 case Op_SubVD: 2136 case Op_MulVI: 2137 case Op_MulVF: 2138 case Op_MulVD: 2139 case Op_DivVF: 2140 case Op_DivVD: 2141 case Op_SqrtVF: 2142 case Op_SqrtVD: 2143 case Op_LShiftVI: 2144 case Op_LShiftVL: 2145 case Op_RShiftVI: 2146 case Op_RShiftVL: 2147 case Op_URShiftVI: 2148 case Op_URShiftVL: 2149 case Op_LoadVectorMasked: 2150 case Op_StoreVectorMasked: 2151 case Op_LoadVectorGatherMasked: 2152 case Op_StoreVectorScatterMasked: 2153 return true; 2154 2155 case Op_UMinV: 2156 case Op_UMaxV: 2157 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 2158 return false; 2159 } // fallthrough 2160 case Op_MaxV: 2161 case Op_MinV: 2162 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2163 return false; // Implementation limitation 2164 } 2165 if (is_floating_point_type(bt)) { 2166 return false; // Implementation limitation 2167 } 2168 return true; 2169 case Op_SaturatingAddV: 2170 case Op_SaturatingSubV: 2171 if (!is_subword_type(bt)) { 2172 return false; 2173 } 2174 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) { 2175 return false; // Implementation limitation 2176 } 2177 return true; 2178 2179 case Op_VectorMaskCmp: 2180 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2181 return false; // Implementation limitation 2182 } 2183 return true; 2184 2185 case Op_VectorRearrange: 2186 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2187 return false; // Implementation limitation 2188 } 2189 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2190 return false; // Implementation limitation 2191 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2192 return false; // Implementation limitation 2193 } 2194 return true; 2195 2196 // Binary Logical operations 2197 case Op_AndVMask: 2198 case Op_OrVMask: 2199 case Op_XorVMask: 2200 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2201 return false; // Implementation limitation 2202 } 2203 return true; 2204 2205 case Op_PopCountVI: 2206 case Op_PopCountVL: 2207 if (!is_pop_count_instr_target(bt)) { 2208 return false; 2209 } 2210 return true; 2211 2212 case Op_MaskAll: 2213 return true; 2214 2215 case Op_CountLeadingZerosV: 2216 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2217 return true; 2218 } 2219 default: 2220 return false; 2221 } 2222 } 2223 2224 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2225 return false; 2226 } 2227 2228 // Return true if Vector::rearrange needs preparation of the shuffle argument 2229 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) { 2230 switch (elem_bt) { 2231 case T_BYTE: return false; 2232 case T_SHORT: return !VM_Version::supports_avx512bw(); 2233 case T_INT: return !VM_Version::supports_avx(); 2234 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl(); 2235 default: 2236 ShouldNotReachHere(); 2237 return false; 2238 } 2239 } 2240 2241 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2242 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2243 bool legacy = (generic_opnd->opcode() == LEGVEC); 2244 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2245 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2246 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2247 return new legVecZOper(); 2248 } 2249 if (legacy) { 2250 switch (ideal_reg) { 2251 case Op_VecS: return new legVecSOper(); 2252 case Op_VecD: return new legVecDOper(); 2253 case Op_VecX: return new legVecXOper(); 2254 case Op_VecY: return new legVecYOper(); 2255 case Op_VecZ: return new legVecZOper(); 2256 } 2257 } else { 2258 switch (ideal_reg) { 2259 case Op_VecS: return new vecSOper(); 2260 case Op_VecD: return new vecDOper(); 2261 case Op_VecX: return new vecXOper(); 2262 case Op_VecY: return new vecYOper(); 2263 case Op_VecZ: return new vecZOper(); 2264 } 2265 } 2266 ShouldNotReachHere(); 2267 return nullptr; 2268 } 2269 2270 bool Matcher::is_reg2reg_move(MachNode* m) { 2271 switch (m->rule()) { 2272 case MoveVec2Leg_rule: 2273 case MoveLeg2Vec_rule: 2274 case MoveF2VL_rule: 2275 case MoveF2LEG_rule: 2276 case MoveVL2F_rule: 2277 case MoveLEG2F_rule: 2278 case MoveD2VL_rule: 2279 case MoveD2LEG_rule: 2280 case MoveVL2D_rule: 2281 case MoveLEG2D_rule: 2282 return true; 2283 default: 2284 return false; 2285 } 2286 } 2287 2288 bool Matcher::is_generic_vector(MachOper* opnd) { 2289 switch (opnd->opcode()) { 2290 case VEC: 2291 case LEGVEC: 2292 return true; 2293 default: 2294 return false; 2295 } 2296 } 2297 2298 //------------------------------------------------------------------------ 2299 2300 const RegMask* Matcher::predicate_reg_mask(void) { 2301 return &_VECTMASK_REG_mask; 2302 } 2303 2304 // Max vector size in bytes. 0 if not supported. 2305 int Matcher::vector_width_in_bytes(BasicType bt) { 2306 assert(is_java_primitive(bt), "only primitive type vectors"); 2307 if (UseSSE < 2) return 0; 2308 // SSE2 supports 128bit vectors for all types. 2309 // AVX2 supports 256bit vectors for all types. 2310 // AVX2/EVEX supports 512bit vectors for all types. 2311 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2312 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2313 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2314 size = (UseAVX > 2) ? 64 : 32; 2315 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2316 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2317 // Use flag to limit vector size. 2318 size = MIN2(size,(int)MaxVectorSize); 2319 // Minimum 2 values in vector (or 4 for bytes). 2320 switch (bt) { 2321 case T_DOUBLE: 2322 case T_LONG: 2323 if (size < 16) return 0; 2324 break; 2325 case T_FLOAT: 2326 case T_INT: 2327 if (size < 8) return 0; 2328 break; 2329 case T_BOOLEAN: 2330 if (size < 4) return 0; 2331 break; 2332 case T_CHAR: 2333 if (size < 4) return 0; 2334 break; 2335 case T_BYTE: 2336 if (size < 4) return 0; 2337 break; 2338 case T_SHORT: 2339 if (size < 4) return 0; 2340 break; 2341 default: 2342 ShouldNotReachHere(); 2343 } 2344 return size; 2345 } 2346 2347 // Limits on vector size (number of elements) loaded into vector. 2348 int Matcher::max_vector_size(const BasicType bt) { 2349 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2350 } 2351 int Matcher::min_vector_size(const BasicType bt) { 2352 int max_size = max_vector_size(bt); 2353 // Min size which can be loaded into vector is 4 bytes. 2354 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2355 // Support for calling svml double64 vectors 2356 if (bt == T_DOUBLE) { 2357 size = 1; 2358 } 2359 return MIN2(size,max_size); 2360 } 2361 2362 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) { 2363 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2364 // by default on Cascade Lake 2365 if (VM_Version::is_default_intel_cascade_lake()) { 2366 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2367 } 2368 return Matcher::max_vector_size(bt); 2369 } 2370 2371 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2372 return -1; 2373 } 2374 2375 // Vector ideal reg corresponding to specified size in bytes 2376 uint Matcher::vector_ideal_reg(int size) { 2377 assert(MaxVectorSize >= size, ""); 2378 switch(size) { 2379 case 4: return Op_VecS; 2380 case 8: return Op_VecD; 2381 case 16: return Op_VecX; 2382 case 32: return Op_VecY; 2383 case 64: return Op_VecZ; 2384 } 2385 ShouldNotReachHere(); 2386 return 0; 2387 } 2388 2389 // Check for shift by small constant as well 2390 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2391 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2392 shift->in(2)->get_int() <= 3 && 2393 // Are there other uses besides address expressions? 2394 !matcher->is_visited(shift)) { 2395 address_visited.set(shift->_idx); // Flag as address_visited 2396 mstack.push(shift->in(2), Matcher::Visit); 2397 Node *conv = shift->in(1); 2398 #ifdef _LP64 2399 // Allow Matcher to match the rule which bypass 2400 // ConvI2L operation for an array index on LP64 2401 // if the index value is positive. 2402 if (conv->Opcode() == Op_ConvI2L && 2403 conv->as_Type()->type()->is_long()->_lo >= 0 && 2404 // Are there other uses besides address expressions? 2405 !matcher->is_visited(conv)) { 2406 address_visited.set(conv->_idx); // Flag as address_visited 2407 mstack.push(conv->in(1), Matcher::Pre_Visit); 2408 } else 2409 #endif 2410 mstack.push(conv, Matcher::Pre_Visit); 2411 return true; 2412 } 2413 return false; 2414 } 2415 2416 // This function identifies sub-graphs in which a 'load' node is 2417 // input to two different nodes, and such that it can be matched 2418 // with BMI instructions like blsi, blsr, etc. 2419 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2420 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2421 // refers to the same node. 2422 // 2423 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2424 // This is a temporary solution until we make DAGs expressible in ADL. 2425 template<typename ConType> 2426 class FusedPatternMatcher { 2427 Node* _op1_node; 2428 Node* _mop_node; 2429 int _con_op; 2430 2431 static int match_next(Node* n, int next_op, int next_op_idx) { 2432 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2433 return -1; 2434 } 2435 2436 if (next_op_idx == -1) { // n is commutative, try rotations 2437 if (n->in(1)->Opcode() == next_op) { 2438 return 1; 2439 } else if (n->in(2)->Opcode() == next_op) { 2440 return 2; 2441 } 2442 } else { 2443 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2444 if (n->in(next_op_idx)->Opcode() == next_op) { 2445 return next_op_idx; 2446 } 2447 } 2448 return -1; 2449 } 2450 2451 public: 2452 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2453 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2454 2455 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2456 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2457 typename ConType::NativeType con_value) { 2458 if (_op1_node->Opcode() != op1) { 2459 return false; 2460 } 2461 if (_mop_node->outcnt() > 2) { 2462 return false; 2463 } 2464 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2465 if (op1_op2_idx == -1) { 2466 return false; 2467 } 2468 // Memory operation must be the other edge 2469 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2470 2471 // Check that the mop node is really what we want 2472 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2473 Node* op2_node = _op1_node->in(op1_op2_idx); 2474 if (op2_node->outcnt() > 1) { 2475 return false; 2476 } 2477 assert(op2_node->Opcode() == op2, "Should be"); 2478 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2479 if (op2_con_idx == -1) { 2480 return false; 2481 } 2482 // Memory operation must be the other edge 2483 int op2_mop_idx = (op2_con_idx & 1) + 1; 2484 // Check that the memory operation is the same node 2485 if (op2_node->in(op2_mop_idx) == _mop_node) { 2486 // Now check the constant 2487 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2488 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2489 return true; 2490 } 2491 } 2492 } 2493 return false; 2494 } 2495 }; 2496 2497 static bool is_bmi_pattern(Node* n, Node* m) { 2498 assert(UseBMI1Instructions, "sanity"); 2499 if (n != nullptr && m != nullptr) { 2500 if (m->Opcode() == Op_LoadI) { 2501 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2502 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2503 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2504 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2505 } else if (m->Opcode() == Op_LoadL) { 2506 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2507 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2508 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2509 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2510 } 2511 } 2512 return false; 2513 } 2514 2515 // Should the matcher clone input 'm' of node 'n'? 2516 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2517 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2518 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2519 mstack.push(m, Visit); 2520 return true; 2521 } 2522 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2523 mstack.push(m, Visit); // m = ShiftCntV 2524 return true; 2525 } 2526 if (is_encode_and_store_pattern(n, m)) { 2527 mstack.push(m, Visit); 2528 return true; 2529 } 2530 return false; 2531 } 2532 2533 // Should the Matcher clone shifts on addressing modes, expecting them 2534 // to be subsumed into complex addressing expressions or compute them 2535 // into registers? 2536 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2537 Node *off = m->in(AddPNode::Offset); 2538 if (off->is_Con()) { 2539 address_visited.test_set(m->_idx); // Flag as address_visited 2540 Node *adr = m->in(AddPNode::Address); 2541 2542 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset. 2543 // AtomicAdd is not an addressing expression. 2544 // Cheap to find it by looking for screwy base. 2545 if (adr->is_AddP() && 2546 !adr->in(AddPNode::Base)->is_top() && 2547 !adr->in(AddPNode::Offset)->is_Con() && 2548 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2549 // Are there other uses besides address expressions? 2550 !is_visited(adr)) { 2551 address_visited.set(adr->_idx); // Flag as address_visited 2552 Node *shift = adr->in(AddPNode::Offset); 2553 if (!clone_shift(shift, this, mstack, address_visited)) { 2554 mstack.push(shift, Pre_Visit); 2555 } 2556 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2557 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2558 } else { 2559 mstack.push(adr, Pre_Visit); 2560 } 2561 2562 // Clone X+offset as it also folds into most addressing expressions 2563 mstack.push(off, Visit); 2564 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2565 return true; 2566 } else if (clone_shift(off, this, mstack, address_visited)) { 2567 address_visited.test_set(m->_idx); // Flag as address_visited 2568 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2569 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2570 return true; 2571 } 2572 return false; 2573 } 2574 2575 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2576 switch (bt) { 2577 case BoolTest::eq: 2578 return Assembler::eq; 2579 case BoolTest::ne: 2580 return Assembler::neq; 2581 case BoolTest::le: 2582 case BoolTest::ule: 2583 return Assembler::le; 2584 case BoolTest::ge: 2585 case BoolTest::uge: 2586 return Assembler::nlt; 2587 case BoolTest::lt: 2588 case BoolTest::ult: 2589 return Assembler::lt; 2590 case BoolTest::gt: 2591 case BoolTest::ugt: 2592 return Assembler::nle; 2593 default : ShouldNotReachHere(); return Assembler::_false; 2594 } 2595 } 2596 2597 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2598 switch (bt) { 2599 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2600 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2601 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2602 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2603 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2604 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2605 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2606 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2607 } 2608 } 2609 2610 // Helper methods for MachSpillCopyNode::implementation(). 2611 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 2612 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2613 assert(ireg == Op_VecS || // 32bit vector 2614 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2615 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi), 2616 "no non-adjacent vector moves" ); 2617 if (masm) { 2618 switch (ireg) { 2619 case Op_VecS: // copy whole register 2620 case Op_VecD: 2621 case Op_VecX: 2622 #ifndef _LP64 2623 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2624 #else 2625 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2626 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2627 } else { 2628 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2629 } 2630 #endif 2631 break; 2632 case Op_VecY: 2633 #ifndef _LP64 2634 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2635 #else 2636 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2637 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2638 } else { 2639 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2640 } 2641 #endif 2642 break; 2643 case Op_VecZ: 2644 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2645 break; 2646 default: 2647 ShouldNotReachHere(); 2648 } 2649 #ifndef PRODUCT 2650 } else { 2651 switch (ireg) { 2652 case Op_VecS: 2653 case Op_VecD: 2654 case Op_VecX: 2655 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2656 break; 2657 case Op_VecY: 2658 case Op_VecZ: 2659 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2660 break; 2661 default: 2662 ShouldNotReachHere(); 2663 } 2664 #endif 2665 } 2666 } 2667 2668 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 2669 int stack_offset, int reg, uint ireg, outputStream* st) { 2670 if (masm) { 2671 if (is_load) { 2672 switch (ireg) { 2673 case Op_VecS: 2674 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2675 break; 2676 case Op_VecD: 2677 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2678 break; 2679 case Op_VecX: 2680 #ifndef _LP64 2681 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2682 #else 2683 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2684 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2685 } else { 2686 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2687 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2688 } 2689 #endif 2690 break; 2691 case Op_VecY: 2692 #ifndef _LP64 2693 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2694 #else 2695 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2696 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2697 } else { 2698 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2699 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2700 } 2701 #endif 2702 break; 2703 case Op_VecZ: 2704 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2705 break; 2706 default: 2707 ShouldNotReachHere(); 2708 } 2709 } else { // store 2710 switch (ireg) { 2711 case Op_VecS: 2712 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2713 break; 2714 case Op_VecD: 2715 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2716 break; 2717 case Op_VecX: 2718 #ifndef _LP64 2719 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2720 #else 2721 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2722 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2723 } 2724 else { 2725 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2726 } 2727 #endif 2728 break; 2729 case Op_VecY: 2730 #ifndef _LP64 2731 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2732 #else 2733 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2734 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2735 } 2736 else { 2737 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2738 } 2739 #endif 2740 break; 2741 case Op_VecZ: 2742 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2743 break; 2744 default: 2745 ShouldNotReachHere(); 2746 } 2747 } 2748 #ifndef PRODUCT 2749 } else { 2750 if (is_load) { 2751 switch (ireg) { 2752 case Op_VecS: 2753 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2754 break; 2755 case Op_VecD: 2756 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2757 break; 2758 case Op_VecX: 2759 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2760 break; 2761 case Op_VecY: 2762 case Op_VecZ: 2763 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2764 break; 2765 default: 2766 ShouldNotReachHere(); 2767 } 2768 } else { // store 2769 switch (ireg) { 2770 case Op_VecS: 2771 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2772 break; 2773 case Op_VecD: 2774 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2775 break; 2776 case Op_VecX: 2777 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2778 break; 2779 case Op_VecY: 2780 case Op_VecZ: 2781 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2782 break; 2783 default: 2784 ShouldNotReachHere(); 2785 } 2786 } 2787 #endif 2788 } 2789 } 2790 2791 template <class T> 2792 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) { 2793 int size = type2aelembytes(bt) * len; 2794 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0); 2795 for (int i = 0; i < len; i++) { 2796 int offset = i * type2aelembytes(bt); 2797 switch (bt) { 2798 case T_BYTE: val->at(i) = con; break; 2799 case T_SHORT: { 2800 jshort c = con; 2801 memcpy(val->adr_at(offset), &c, sizeof(jshort)); 2802 break; 2803 } 2804 case T_INT: { 2805 jint c = con; 2806 memcpy(val->adr_at(offset), &c, sizeof(jint)); 2807 break; 2808 } 2809 case T_LONG: { 2810 jlong c = con; 2811 memcpy(val->adr_at(offset), &c, sizeof(jlong)); 2812 break; 2813 } 2814 case T_FLOAT: { 2815 jfloat c = con; 2816 memcpy(val->adr_at(offset), &c, sizeof(jfloat)); 2817 break; 2818 } 2819 case T_DOUBLE: { 2820 jdouble c = con; 2821 memcpy(val->adr_at(offset), &c, sizeof(jdouble)); 2822 break; 2823 } 2824 default: assert(false, "%s", type2name(bt)); 2825 } 2826 } 2827 return val; 2828 } 2829 2830 static inline jlong high_bit_set(BasicType bt) { 2831 switch (bt) { 2832 case T_BYTE: return 0x8080808080808080; 2833 case T_SHORT: return 0x8000800080008000; 2834 case T_INT: return 0x8000000080000000; 2835 case T_LONG: return 0x8000000000000000; 2836 default: 2837 ShouldNotReachHere(); 2838 return 0; 2839 } 2840 } 2841 2842 #ifndef PRODUCT 2843 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2844 st->print("nop \t# %d bytes pad for loops and calls", _count); 2845 } 2846 #endif 2847 2848 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const { 2849 __ nop(_count); 2850 } 2851 2852 uint MachNopNode::size(PhaseRegAlloc*) const { 2853 return _count; 2854 } 2855 2856 #ifndef PRODUCT 2857 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2858 st->print("# breakpoint"); 2859 } 2860 #endif 2861 2862 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const { 2863 __ int3(); 2864 } 2865 2866 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2867 return MachNode::size(ra_); 2868 } 2869 2870 %} 2871 2872 encode %{ 2873 2874 enc_class call_epilog %{ 2875 if (VerifyStackAtCalls) { 2876 // Check that stack depth is unchanged: find majik cookie on stack 2877 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2878 Label L; 2879 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2880 __ jccb(Assembler::equal, L); 2881 // Die if stack mismatch 2882 __ int3(); 2883 __ bind(L); 2884 } 2885 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) { 2886 // The last return value is not set by the callee but used to pass IsInit information to compiled code. 2887 // Search for the corresponding projection, get the register and emit code that initialized it. 2888 uint con = (tf()->range_cc()->cnt() - 1); 2889 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { 2890 ProjNode* proj = fast_out(i)->as_Proj(); 2891 if (proj->_con == con) { 2892 // Set IsInit if rax is non-null (a non-null value is returned buffered or scalarized) 2893 OptoReg::Name optoReg = ra_->get_reg_first(proj); 2894 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP)); 2895 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1; 2896 __ testq(rax, rax); 2897 __ setb(Assembler::notZero, toReg); 2898 __ movzbl(toReg, toReg); 2899 if (reg->is_stack()) { 2900 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size; 2901 __ movq(Address(rsp, st_off), toReg); 2902 } 2903 break; 2904 } 2905 } 2906 if (return_value_is_used()) { 2907 // An inline type is returned as fields in multiple registers. 2908 // Rax either contains an oop if the inline type is buffered or a pointer 2909 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax 2910 // if the lowest bit is set to allow C2 to use the oop after null checking. 2911 // rax &= (rax & 1) - 1 2912 __ movptr(rscratch1, rax); 2913 __ andptr(rscratch1, 0x1); 2914 __ subptr(rscratch1, 0x1); 2915 __ andptr(rax, rscratch1); 2916 } 2917 } 2918 %} 2919 2920 %} 2921 2922 // Operands for bound floating pointer register arguments 2923 operand rxmm0() %{ 2924 constraint(ALLOC_IN_RC(xmm0_reg)); 2925 match(VecX); 2926 format%{%} 2927 interface(REG_INTER); 2928 %} 2929 2930 //----------OPERANDS----------------------------------------------------------- 2931 // Operand definitions must precede instruction definitions for correct parsing 2932 // in the ADLC because operands constitute user defined types which are used in 2933 // instruction definitions. 2934 2935 // Vectors 2936 2937 // Dummy generic vector class. Should be used for all vector operands. 2938 // Replaced with vec[SDXYZ] during post-selection pass. 2939 operand vec() %{ 2940 constraint(ALLOC_IN_RC(dynamic)); 2941 match(VecX); 2942 match(VecY); 2943 match(VecZ); 2944 match(VecS); 2945 match(VecD); 2946 2947 format %{ %} 2948 interface(REG_INTER); 2949 %} 2950 2951 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2952 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2953 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2954 // runtime code generation via reg_class_dynamic. 2955 operand legVec() %{ 2956 constraint(ALLOC_IN_RC(dynamic)); 2957 match(VecX); 2958 match(VecY); 2959 match(VecZ); 2960 match(VecS); 2961 match(VecD); 2962 2963 format %{ %} 2964 interface(REG_INTER); 2965 %} 2966 2967 // Replaces vec during post-selection cleanup. See above. 2968 operand vecS() %{ 2969 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2970 match(VecS); 2971 2972 format %{ %} 2973 interface(REG_INTER); 2974 %} 2975 2976 // Replaces legVec during post-selection cleanup. See above. 2977 operand legVecS() %{ 2978 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2979 match(VecS); 2980 2981 format %{ %} 2982 interface(REG_INTER); 2983 %} 2984 2985 // Replaces vec during post-selection cleanup. See above. 2986 operand vecD() %{ 2987 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2988 match(VecD); 2989 2990 format %{ %} 2991 interface(REG_INTER); 2992 %} 2993 2994 // Replaces legVec during post-selection cleanup. See above. 2995 operand legVecD() %{ 2996 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2997 match(VecD); 2998 2999 format %{ %} 3000 interface(REG_INTER); 3001 %} 3002 3003 // Replaces vec during post-selection cleanup. See above. 3004 operand vecX() %{ 3005 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 3006 match(VecX); 3007 3008 format %{ %} 3009 interface(REG_INTER); 3010 %} 3011 3012 // Replaces legVec during post-selection cleanup. See above. 3013 operand legVecX() %{ 3014 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 3015 match(VecX); 3016 3017 format %{ %} 3018 interface(REG_INTER); 3019 %} 3020 3021 // Replaces vec during post-selection cleanup. See above. 3022 operand vecY() %{ 3023 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 3024 match(VecY); 3025 3026 format %{ %} 3027 interface(REG_INTER); 3028 %} 3029 3030 // Replaces legVec during post-selection cleanup. See above. 3031 operand legVecY() %{ 3032 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 3033 match(VecY); 3034 3035 format %{ %} 3036 interface(REG_INTER); 3037 %} 3038 3039 // Replaces vec during post-selection cleanup. See above. 3040 operand vecZ() %{ 3041 constraint(ALLOC_IN_RC(vectorz_reg)); 3042 match(VecZ); 3043 3044 format %{ %} 3045 interface(REG_INTER); 3046 %} 3047 3048 // Replaces legVec during post-selection cleanup. See above. 3049 operand legVecZ() %{ 3050 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 3051 match(VecZ); 3052 3053 format %{ %} 3054 interface(REG_INTER); 3055 %} 3056 3057 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 3058 3059 // ============================================================================ 3060 3061 instruct ShouldNotReachHere() %{ 3062 match(Halt); 3063 format %{ "stop\t# ShouldNotReachHere" %} 3064 ins_encode %{ 3065 if (is_reachable()) { 3066 __ stop(_halt_reason); 3067 } 3068 %} 3069 ins_pipe(pipe_slow); 3070 %} 3071 3072 // ============================================================================ 3073 3074 instruct addF_reg(regF dst, regF src) %{ 3075 predicate((UseSSE>=1) && (UseAVX == 0)); 3076 match(Set dst (AddF dst src)); 3077 3078 format %{ "addss $dst, $src" %} 3079 ins_cost(150); 3080 ins_encode %{ 3081 __ addss($dst$$XMMRegister, $src$$XMMRegister); 3082 %} 3083 ins_pipe(pipe_slow); 3084 %} 3085 3086 instruct addF_mem(regF dst, memory src) %{ 3087 predicate((UseSSE>=1) && (UseAVX == 0)); 3088 match(Set dst (AddF dst (LoadF src))); 3089 3090 format %{ "addss $dst, $src" %} 3091 ins_cost(150); 3092 ins_encode %{ 3093 __ addss($dst$$XMMRegister, $src$$Address); 3094 %} 3095 ins_pipe(pipe_slow); 3096 %} 3097 3098 instruct addF_imm(regF dst, immF con) %{ 3099 predicate((UseSSE>=1) && (UseAVX == 0)); 3100 match(Set dst (AddF dst con)); 3101 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3102 ins_cost(150); 3103 ins_encode %{ 3104 __ addss($dst$$XMMRegister, $constantaddress($con)); 3105 %} 3106 ins_pipe(pipe_slow); 3107 %} 3108 3109 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 3110 predicate(UseAVX > 0); 3111 match(Set dst (AddF src1 src2)); 3112 3113 format %{ "vaddss $dst, $src1, $src2" %} 3114 ins_cost(150); 3115 ins_encode %{ 3116 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3117 %} 3118 ins_pipe(pipe_slow); 3119 %} 3120 3121 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 3122 predicate(UseAVX > 0); 3123 match(Set dst (AddF src1 (LoadF src2))); 3124 3125 format %{ "vaddss $dst, $src1, $src2" %} 3126 ins_cost(150); 3127 ins_encode %{ 3128 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3129 %} 3130 ins_pipe(pipe_slow); 3131 %} 3132 3133 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 3134 predicate(UseAVX > 0); 3135 match(Set dst (AddF src con)); 3136 3137 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3138 ins_cost(150); 3139 ins_encode %{ 3140 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3141 %} 3142 ins_pipe(pipe_slow); 3143 %} 3144 3145 instruct addD_reg(regD dst, regD src) %{ 3146 predicate((UseSSE>=2) && (UseAVX == 0)); 3147 match(Set dst (AddD dst src)); 3148 3149 format %{ "addsd $dst, $src" %} 3150 ins_cost(150); 3151 ins_encode %{ 3152 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 3153 %} 3154 ins_pipe(pipe_slow); 3155 %} 3156 3157 instruct addD_mem(regD dst, memory src) %{ 3158 predicate((UseSSE>=2) && (UseAVX == 0)); 3159 match(Set dst (AddD dst (LoadD src))); 3160 3161 format %{ "addsd $dst, $src" %} 3162 ins_cost(150); 3163 ins_encode %{ 3164 __ addsd($dst$$XMMRegister, $src$$Address); 3165 %} 3166 ins_pipe(pipe_slow); 3167 %} 3168 3169 instruct addD_imm(regD dst, immD con) %{ 3170 predicate((UseSSE>=2) && (UseAVX == 0)); 3171 match(Set dst (AddD dst con)); 3172 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3173 ins_cost(150); 3174 ins_encode %{ 3175 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3176 %} 3177 ins_pipe(pipe_slow); 3178 %} 3179 3180 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3181 predicate(UseAVX > 0); 3182 match(Set dst (AddD src1 src2)); 3183 3184 format %{ "vaddsd $dst, $src1, $src2" %} 3185 ins_cost(150); 3186 ins_encode %{ 3187 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3188 %} 3189 ins_pipe(pipe_slow); 3190 %} 3191 3192 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3193 predicate(UseAVX > 0); 3194 match(Set dst (AddD src1 (LoadD src2))); 3195 3196 format %{ "vaddsd $dst, $src1, $src2" %} 3197 ins_cost(150); 3198 ins_encode %{ 3199 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3200 %} 3201 ins_pipe(pipe_slow); 3202 %} 3203 3204 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3205 predicate(UseAVX > 0); 3206 match(Set dst (AddD src con)); 3207 3208 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3209 ins_cost(150); 3210 ins_encode %{ 3211 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3212 %} 3213 ins_pipe(pipe_slow); 3214 %} 3215 3216 instruct subF_reg(regF dst, regF src) %{ 3217 predicate((UseSSE>=1) && (UseAVX == 0)); 3218 match(Set dst (SubF dst src)); 3219 3220 format %{ "subss $dst, $src" %} 3221 ins_cost(150); 3222 ins_encode %{ 3223 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3224 %} 3225 ins_pipe(pipe_slow); 3226 %} 3227 3228 instruct subF_mem(regF dst, memory src) %{ 3229 predicate((UseSSE>=1) && (UseAVX == 0)); 3230 match(Set dst (SubF dst (LoadF src))); 3231 3232 format %{ "subss $dst, $src" %} 3233 ins_cost(150); 3234 ins_encode %{ 3235 __ subss($dst$$XMMRegister, $src$$Address); 3236 %} 3237 ins_pipe(pipe_slow); 3238 %} 3239 3240 instruct subF_imm(regF dst, immF con) %{ 3241 predicate((UseSSE>=1) && (UseAVX == 0)); 3242 match(Set dst (SubF dst con)); 3243 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3244 ins_cost(150); 3245 ins_encode %{ 3246 __ subss($dst$$XMMRegister, $constantaddress($con)); 3247 %} 3248 ins_pipe(pipe_slow); 3249 %} 3250 3251 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3252 predicate(UseAVX > 0); 3253 match(Set dst (SubF src1 src2)); 3254 3255 format %{ "vsubss $dst, $src1, $src2" %} 3256 ins_cost(150); 3257 ins_encode %{ 3258 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3259 %} 3260 ins_pipe(pipe_slow); 3261 %} 3262 3263 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3264 predicate(UseAVX > 0); 3265 match(Set dst (SubF src1 (LoadF src2))); 3266 3267 format %{ "vsubss $dst, $src1, $src2" %} 3268 ins_cost(150); 3269 ins_encode %{ 3270 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3271 %} 3272 ins_pipe(pipe_slow); 3273 %} 3274 3275 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3276 predicate(UseAVX > 0); 3277 match(Set dst (SubF src con)); 3278 3279 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3280 ins_cost(150); 3281 ins_encode %{ 3282 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3283 %} 3284 ins_pipe(pipe_slow); 3285 %} 3286 3287 instruct subD_reg(regD dst, regD src) %{ 3288 predicate((UseSSE>=2) && (UseAVX == 0)); 3289 match(Set dst (SubD dst src)); 3290 3291 format %{ "subsd $dst, $src" %} 3292 ins_cost(150); 3293 ins_encode %{ 3294 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3295 %} 3296 ins_pipe(pipe_slow); 3297 %} 3298 3299 instruct subD_mem(regD dst, memory src) %{ 3300 predicate((UseSSE>=2) && (UseAVX == 0)); 3301 match(Set dst (SubD dst (LoadD src))); 3302 3303 format %{ "subsd $dst, $src" %} 3304 ins_cost(150); 3305 ins_encode %{ 3306 __ subsd($dst$$XMMRegister, $src$$Address); 3307 %} 3308 ins_pipe(pipe_slow); 3309 %} 3310 3311 instruct subD_imm(regD dst, immD con) %{ 3312 predicate((UseSSE>=2) && (UseAVX == 0)); 3313 match(Set dst (SubD dst con)); 3314 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3315 ins_cost(150); 3316 ins_encode %{ 3317 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3318 %} 3319 ins_pipe(pipe_slow); 3320 %} 3321 3322 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3323 predicate(UseAVX > 0); 3324 match(Set dst (SubD src1 src2)); 3325 3326 format %{ "vsubsd $dst, $src1, $src2" %} 3327 ins_cost(150); 3328 ins_encode %{ 3329 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3330 %} 3331 ins_pipe(pipe_slow); 3332 %} 3333 3334 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3335 predicate(UseAVX > 0); 3336 match(Set dst (SubD src1 (LoadD src2))); 3337 3338 format %{ "vsubsd $dst, $src1, $src2" %} 3339 ins_cost(150); 3340 ins_encode %{ 3341 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3342 %} 3343 ins_pipe(pipe_slow); 3344 %} 3345 3346 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3347 predicate(UseAVX > 0); 3348 match(Set dst (SubD src con)); 3349 3350 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3351 ins_cost(150); 3352 ins_encode %{ 3353 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3354 %} 3355 ins_pipe(pipe_slow); 3356 %} 3357 3358 instruct mulF_reg(regF dst, regF src) %{ 3359 predicate((UseSSE>=1) && (UseAVX == 0)); 3360 match(Set dst (MulF dst src)); 3361 3362 format %{ "mulss $dst, $src" %} 3363 ins_cost(150); 3364 ins_encode %{ 3365 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3366 %} 3367 ins_pipe(pipe_slow); 3368 %} 3369 3370 instruct mulF_mem(regF dst, memory src) %{ 3371 predicate((UseSSE>=1) && (UseAVX == 0)); 3372 match(Set dst (MulF dst (LoadF src))); 3373 3374 format %{ "mulss $dst, $src" %} 3375 ins_cost(150); 3376 ins_encode %{ 3377 __ mulss($dst$$XMMRegister, $src$$Address); 3378 %} 3379 ins_pipe(pipe_slow); 3380 %} 3381 3382 instruct mulF_imm(regF dst, immF con) %{ 3383 predicate((UseSSE>=1) && (UseAVX == 0)); 3384 match(Set dst (MulF dst con)); 3385 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3386 ins_cost(150); 3387 ins_encode %{ 3388 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3389 %} 3390 ins_pipe(pipe_slow); 3391 %} 3392 3393 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3394 predicate(UseAVX > 0); 3395 match(Set dst (MulF src1 src2)); 3396 3397 format %{ "vmulss $dst, $src1, $src2" %} 3398 ins_cost(150); 3399 ins_encode %{ 3400 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3401 %} 3402 ins_pipe(pipe_slow); 3403 %} 3404 3405 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3406 predicate(UseAVX > 0); 3407 match(Set dst (MulF src1 (LoadF src2))); 3408 3409 format %{ "vmulss $dst, $src1, $src2" %} 3410 ins_cost(150); 3411 ins_encode %{ 3412 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3413 %} 3414 ins_pipe(pipe_slow); 3415 %} 3416 3417 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3418 predicate(UseAVX > 0); 3419 match(Set dst (MulF src con)); 3420 3421 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3422 ins_cost(150); 3423 ins_encode %{ 3424 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3425 %} 3426 ins_pipe(pipe_slow); 3427 %} 3428 3429 instruct mulD_reg(regD dst, regD src) %{ 3430 predicate((UseSSE>=2) && (UseAVX == 0)); 3431 match(Set dst (MulD dst src)); 3432 3433 format %{ "mulsd $dst, $src" %} 3434 ins_cost(150); 3435 ins_encode %{ 3436 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3437 %} 3438 ins_pipe(pipe_slow); 3439 %} 3440 3441 instruct mulD_mem(regD dst, memory src) %{ 3442 predicate((UseSSE>=2) && (UseAVX == 0)); 3443 match(Set dst (MulD dst (LoadD src))); 3444 3445 format %{ "mulsd $dst, $src" %} 3446 ins_cost(150); 3447 ins_encode %{ 3448 __ mulsd($dst$$XMMRegister, $src$$Address); 3449 %} 3450 ins_pipe(pipe_slow); 3451 %} 3452 3453 instruct mulD_imm(regD dst, immD con) %{ 3454 predicate((UseSSE>=2) && (UseAVX == 0)); 3455 match(Set dst (MulD dst con)); 3456 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3457 ins_cost(150); 3458 ins_encode %{ 3459 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3460 %} 3461 ins_pipe(pipe_slow); 3462 %} 3463 3464 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3465 predicate(UseAVX > 0); 3466 match(Set dst (MulD src1 src2)); 3467 3468 format %{ "vmulsd $dst, $src1, $src2" %} 3469 ins_cost(150); 3470 ins_encode %{ 3471 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3472 %} 3473 ins_pipe(pipe_slow); 3474 %} 3475 3476 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3477 predicate(UseAVX > 0); 3478 match(Set dst (MulD src1 (LoadD src2))); 3479 3480 format %{ "vmulsd $dst, $src1, $src2" %} 3481 ins_cost(150); 3482 ins_encode %{ 3483 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3484 %} 3485 ins_pipe(pipe_slow); 3486 %} 3487 3488 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3489 predicate(UseAVX > 0); 3490 match(Set dst (MulD src con)); 3491 3492 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3493 ins_cost(150); 3494 ins_encode %{ 3495 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3496 %} 3497 ins_pipe(pipe_slow); 3498 %} 3499 3500 instruct divF_reg(regF dst, regF src) %{ 3501 predicate((UseSSE>=1) && (UseAVX == 0)); 3502 match(Set dst (DivF dst src)); 3503 3504 format %{ "divss $dst, $src" %} 3505 ins_cost(150); 3506 ins_encode %{ 3507 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3508 %} 3509 ins_pipe(pipe_slow); 3510 %} 3511 3512 instruct divF_mem(regF dst, memory src) %{ 3513 predicate((UseSSE>=1) && (UseAVX == 0)); 3514 match(Set dst (DivF dst (LoadF src))); 3515 3516 format %{ "divss $dst, $src" %} 3517 ins_cost(150); 3518 ins_encode %{ 3519 __ divss($dst$$XMMRegister, $src$$Address); 3520 %} 3521 ins_pipe(pipe_slow); 3522 %} 3523 3524 instruct divF_imm(regF dst, immF con) %{ 3525 predicate((UseSSE>=1) && (UseAVX == 0)); 3526 match(Set dst (DivF dst con)); 3527 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3528 ins_cost(150); 3529 ins_encode %{ 3530 __ divss($dst$$XMMRegister, $constantaddress($con)); 3531 %} 3532 ins_pipe(pipe_slow); 3533 %} 3534 3535 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3536 predicate(UseAVX > 0); 3537 match(Set dst (DivF src1 src2)); 3538 3539 format %{ "vdivss $dst, $src1, $src2" %} 3540 ins_cost(150); 3541 ins_encode %{ 3542 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3543 %} 3544 ins_pipe(pipe_slow); 3545 %} 3546 3547 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3548 predicate(UseAVX > 0); 3549 match(Set dst (DivF src1 (LoadF src2))); 3550 3551 format %{ "vdivss $dst, $src1, $src2" %} 3552 ins_cost(150); 3553 ins_encode %{ 3554 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3555 %} 3556 ins_pipe(pipe_slow); 3557 %} 3558 3559 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3560 predicate(UseAVX > 0); 3561 match(Set dst (DivF src con)); 3562 3563 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3564 ins_cost(150); 3565 ins_encode %{ 3566 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3567 %} 3568 ins_pipe(pipe_slow); 3569 %} 3570 3571 instruct divD_reg(regD dst, regD src) %{ 3572 predicate((UseSSE>=2) && (UseAVX == 0)); 3573 match(Set dst (DivD dst src)); 3574 3575 format %{ "divsd $dst, $src" %} 3576 ins_cost(150); 3577 ins_encode %{ 3578 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3579 %} 3580 ins_pipe(pipe_slow); 3581 %} 3582 3583 instruct divD_mem(regD dst, memory src) %{ 3584 predicate((UseSSE>=2) && (UseAVX == 0)); 3585 match(Set dst (DivD dst (LoadD src))); 3586 3587 format %{ "divsd $dst, $src" %} 3588 ins_cost(150); 3589 ins_encode %{ 3590 __ divsd($dst$$XMMRegister, $src$$Address); 3591 %} 3592 ins_pipe(pipe_slow); 3593 %} 3594 3595 instruct divD_imm(regD dst, immD con) %{ 3596 predicate((UseSSE>=2) && (UseAVX == 0)); 3597 match(Set dst (DivD dst con)); 3598 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3599 ins_cost(150); 3600 ins_encode %{ 3601 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3602 %} 3603 ins_pipe(pipe_slow); 3604 %} 3605 3606 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3607 predicate(UseAVX > 0); 3608 match(Set dst (DivD src1 src2)); 3609 3610 format %{ "vdivsd $dst, $src1, $src2" %} 3611 ins_cost(150); 3612 ins_encode %{ 3613 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3614 %} 3615 ins_pipe(pipe_slow); 3616 %} 3617 3618 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3619 predicate(UseAVX > 0); 3620 match(Set dst (DivD src1 (LoadD src2))); 3621 3622 format %{ "vdivsd $dst, $src1, $src2" %} 3623 ins_cost(150); 3624 ins_encode %{ 3625 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3626 %} 3627 ins_pipe(pipe_slow); 3628 %} 3629 3630 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3631 predicate(UseAVX > 0); 3632 match(Set dst (DivD src con)); 3633 3634 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3635 ins_cost(150); 3636 ins_encode %{ 3637 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3638 %} 3639 ins_pipe(pipe_slow); 3640 %} 3641 3642 instruct absF_reg(regF dst) %{ 3643 predicate((UseSSE>=1) && (UseAVX == 0)); 3644 match(Set dst (AbsF dst)); 3645 ins_cost(150); 3646 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3647 ins_encode %{ 3648 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3649 %} 3650 ins_pipe(pipe_slow); 3651 %} 3652 3653 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3654 predicate(UseAVX > 0); 3655 match(Set dst (AbsF src)); 3656 ins_cost(150); 3657 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3658 ins_encode %{ 3659 int vlen_enc = Assembler::AVX_128bit; 3660 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3661 ExternalAddress(float_signmask()), vlen_enc); 3662 %} 3663 ins_pipe(pipe_slow); 3664 %} 3665 3666 instruct absD_reg(regD dst) %{ 3667 predicate((UseSSE>=2) && (UseAVX == 0)); 3668 match(Set dst (AbsD dst)); 3669 ins_cost(150); 3670 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3671 "# abs double by sign masking" %} 3672 ins_encode %{ 3673 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3674 %} 3675 ins_pipe(pipe_slow); 3676 %} 3677 3678 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3679 predicate(UseAVX > 0); 3680 match(Set dst (AbsD src)); 3681 ins_cost(150); 3682 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3683 "# abs double by sign masking" %} 3684 ins_encode %{ 3685 int vlen_enc = Assembler::AVX_128bit; 3686 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3687 ExternalAddress(double_signmask()), vlen_enc); 3688 %} 3689 ins_pipe(pipe_slow); 3690 %} 3691 3692 instruct negF_reg(regF dst) %{ 3693 predicate((UseSSE>=1) && (UseAVX == 0)); 3694 match(Set dst (NegF dst)); 3695 ins_cost(150); 3696 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3697 ins_encode %{ 3698 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3699 %} 3700 ins_pipe(pipe_slow); 3701 %} 3702 3703 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3704 predicate(UseAVX > 0); 3705 match(Set dst (NegF src)); 3706 ins_cost(150); 3707 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3708 ins_encode %{ 3709 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3710 ExternalAddress(float_signflip())); 3711 %} 3712 ins_pipe(pipe_slow); 3713 %} 3714 3715 instruct negD_reg(regD dst) %{ 3716 predicate((UseSSE>=2) && (UseAVX == 0)); 3717 match(Set dst (NegD dst)); 3718 ins_cost(150); 3719 format %{ "xorpd $dst, [0x8000000000000000]\t" 3720 "# neg double by sign flipping" %} 3721 ins_encode %{ 3722 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3723 %} 3724 ins_pipe(pipe_slow); 3725 %} 3726 3727 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3728 predicate(UseAVX > 0); 3729 match(Set dst (NegD src)); 3730 ins_cost(150); 3731 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3732 "# neg double by sign flipping" %} 3733 ins_encode %{ 3734 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3735 ExternalAddress(double_signflip())); 3736 %} 3737 ins_pipe(pipe_slow); 3738 %} 3739 3740 // sqrtss instruction needs destination register to be pre initialized for best performance 3741 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3742 instruct sqrtF_reg(regF dst) %{ 3743 predicate(UseSSE>=1); 3744 match(Set dst (SqrtF dst)); 3745 format %{ "sqrtss $dst, $dst" %} 3746 ins_encode %{ 3747 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3748 %} 3749 ins_pipe(pipe_slow); 3750 %} 3751 3752 // sqrtsd instruction needs destination register to be pre initialized for best performance 3753 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3754 instruct sqrtD_reg(regD dst) %{ 3755 predicate(UseSSE>=2); 3756 match(Set dst (SqrtD dst)); 3757 format %{ "sqrtsd $dst, $dst" %} 3758 ins_encode %{ 3759 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3760 %} 3761 ins_pipe(pipe_slow); 3762 %} 3763 3764 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3765 effect(TEMP tmp); 3766 match(Set dst (ConvF2HF src)); 3767 ins_cost(125); 3768 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3769 ins_encode %{ 3770 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3771 %} 3772 ins_pipe( pipe_slow ); 3773 %} 3774 3775 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3776 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3777 effect(TEMP ktmp, TEMP rtmp); 3778 match(Set mem (StoreC mem (ConvF2HF src))); 3779 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3780 ins_encode %{ 3781 __ movl($rtmp$$Register, 0x1); 3782 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3783 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3784 %} 3785 ins_pipe( pipe_slow ); 3786 %} 3787 3788 instruct vconvF2HF(vec dst, vec src) %{ 3789 match(Set dst (VectorCastF2HF src)); 3790 format %{ "vector_conv_F2HF $dst $src" %} 3791 ins_encode %{ 3792 int vlen_enc = vector_length_encoding(this, $src); 3793 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3794 %} 3795 ins_pipe( pipe_slow ); 3796 %} 3797 3798 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3799 predicate(n->as_StoreVector()->memory_size() >= 16); 3800 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3801 format %{ "vcvtps2ph $mem,$src" %} 3802 ins_encode %{ 3803 int vlen_enc = vector_length_encoding(this, $src); 3804 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3805 %} 3806 ins_pipe( pipe_slow ); 3807 %} 3808 3809 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3810 match(Set dst (ConvHF2F src)); 3811 format %{ "vcvtph2ps $dst,$src" %} 3812 ins_encode %{ 3813 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3814 %} 3815 ins_pipe( pipe_slow ); 3816 %} 3817 3818 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3819 match(Set dst (VectorCastHF2F (LoadVector mem))); 3820 format %{ "vcvtph2ps $dst,$mem" %} 3821 ins_encode %{ 3822 int vlen_enc = vector_length_encoding(this); 3823 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3824 %} 3825 ins_pipe( pipe_slow ); 3826 %} 3827 3828 instruct vconvHF2F(vec dst, vec src) %{ 3829 match(Set dst (VectorCastHF2F src)); 3830 ins_cost(125); 3831 format %{ "vector_conv_HF2F $dst,$src" %} 3832 ins_encode %{ 3833 int vlen_enc = vector_length_encoding(this); 3834 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3835 %} 3836 ins_pipe( pipe_slow ); 3837 %} 3838 3839 // ---------------------------------------- VectorReinterpret ------------------------------------ 3840 instruct reinterpret_mask(kReg dst) %{ 3841 predicate(n->bottom_type()->isa_vectmask() && 3842 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3843 match(Set dst (VectorReinterpret dst)); 3844 ins_cost(125); 3845 format %{ "vector_reinterpret $dst\t!" %} 3846 ins_encode %{ 3847 // empty 3848 %} 3849 ins_pipe( pipe_slow ); 3850 %} 3851 3852 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3853 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3854 n->bottom_type()->isa_vectmask() && 3855 n->in(1)->bottom_type()->isa_vectmask() && 3856 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3857 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3858 match(Set dst (VectorReinterpret src)); 3859 effect(TEMP xtmp); 3860 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3861 ins_encode %{ 3862 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3863 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3864 assert(src_sz == dst_sz , "src and dst size mismatch"); 3865 int vlen_enc = vector_length_encoding(src_sz); 3866 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3867 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3868 %} 3869 ins_pipe( pipe_slow ); 3870 %} 3871 3872 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3873 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3874 n->bottom_type()->isa_vectmask() && 3875 n->in(1)->bottom_type()->isa_vectmask() && 3876 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3877 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3878 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3879 match(Set dst (VectorReinterpret src)); 3880 effect(TEMP xtmp); 3881 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3882 ins_encode %{ 3883 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3884 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3885 assert(src_sz == dst_sz , "src and dst size mismatch"); 3886 int vlen_enc = vector_length_encoding(src_sz); 3887 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3888 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3889 %} 3890 ins_pipe( pipe_slow ); 3891 %} 3892 3893 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3894 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3895 n->bottom_type()->isa_vectmask() && 3896 n->in(1)->bottom_type()->isa_vectmask() && 3897 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3898 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3899 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3900 match(Set dst (VectorReinterpret src)); 3901 effect(TEMP xtmp); 3902 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3903 ins_encode %{ 3904 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3905 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3906 assert(src_sz == dst_sz , "src and dst size mismatch"); 3907 int vlen_enc = vector_length_encoding(src_sz); 3908 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3909 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3910 %} 3911 ins_pipe( pipe_slow ); 3912 %} 3913 3914 instruct reinterpret(vec dst) %{ 3915 predicate(!n->bottom_type()->isa_vectmask() && 3916 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3917 match(Set dst (VectorReinterpret dst)); 3918 ins_cost(125); 3919 format %{ "vector_reinterpret $dst\t!" %} 3920 ins_encode %{ 3921 // empty 3922 %} 3923 ins_pipe( pipe_slow ); 3924 %} 3925 3926 instruct reinterpret_expand(vec dst, vec src) %{ 3927 predicate(UseAVX == 0 && 3928 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3929 match(Set dst (VectorReinterpret src)); 3930 ins_cost(125); 3931 effect(TEMP dst); 3932 format %{ "vector_reinterpret_expand $dst,$src" %} 3933 ins_encode %{ 3934 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3935 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3936 3937 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3938 if (src_vlen_in_bytes == 4) { 3939 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3940 } else { 3941 assert(src_vlen_in_bytes == 8, ""); 3942 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3943 } 3944 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3945 %} 3946 ins_pipe( pipe_slow ); 3947 %} 3948 3949 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3950 predicate(UseAVX > 0 && 3951 !n->bottom_type()->isa_vectmask() && 3952 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3953 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3954 match(Set dst (VectorReinterpret src)); 3955 ins_cost(125); 3956 format %{ "vector_reinterpret_expand $dst,$src" %} 3957 ins_encode %{ 3958 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3959 %} 3960 ins_pipe( pipe_slow ); 3961 %} 3962 3963 3964 instruct vreinterpret_expand(legVec dst, vec src) %{ 3965 predicate(UseAVX > 0 && 3966 !n->bottom_type()->isa_vectmask() && 3967 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3968 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3969 match(Set dst (VectorReinterpret src)); 3970 ins_cost(125); 3971 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3972 ins_encode %{ 3973 switch (Matcher::vector_length_in_bytes(this, $src)) { 3974 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3975 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3976 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3977 default: ShouldNotReachHere(); 3978 } 3979 %} 3980 ins_pipe( pipe_slow ); 3981 %} 3982 3983 instruct reinterpret_shrink(vec dst, legVec src) %{ 3984 predicate(!n->bottom_type()->isa_vectmask() && 3985 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3986 match(Set dst (VectorReinterpret src)); 3987 ins_cost(125); 3988 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3989 ins_encode %{ 3990 switch (Matcher::vector_length_in_bytes(this)) { 3991 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3992 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3993 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3994 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3995 default: ShouldNotReachHere(); 3996 } 3997 %} 3998 ins_pipe( pipe_slow ); 3999 %} 4000 4001 // ---------------------------------------------------------------------------------------------------- 4002 4003 #ifdef _LP64 4004 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 4005 match(Set dst (RoundDoubleMode src rmode)); 4006 format %{ "roundsd $dst,$src" %} 4007 ins_cost(150); 4008 ins_encode %{ 4009 assert(UseSSE >= 4, "required"); 4010 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) { 4011 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4012 } 4013 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 4014 %} 4015 ins_pipe(pipe_slow); 4016 %} 4017 4018 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 4019 match(Set dst (RoundDoubleMode con rmode)); 4020 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 4021 ins_cost(150); 4022 ins_encode %{ 4023 assert(UseSSE >= 4, "required"); 4024 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 4025 %} 4026 ins_pipe(pipe_slow); 4027 %} 4028 4029 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 4030 predicate(Matcher::vector_length(n) < 8); 4031 match(Set dst (RoundDoubleModeV src rmode)); 4032 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 4033 ins_encode %{ 4034 assert(UseAVX > 0, "required"); 4035 int vlen_enc = vector_length_encoding(this); 4036 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 4037 %} 4038 ins_pipe( pipe_slow ); 4039 %} 4040 4041 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 4042 predicate(Matcher::vector_length(n) == 8); 4043 match(Set dst (RoundDoubleModeV src rmode)); 4044 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 4045 ins_encode %{ 4046 assert(UseAVX > 2, "required"); 4047 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 4048 %} 4049 ins_pipe( pipe_slow ); 4050 %} 4051 4052 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 4053 predicate(Matcher::vector_length(n) < 8); 4054 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 4055 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 4056 ins_encode %{ 4057 assert(UseAVX > 0, "required"); 4058 int vlen_enc = vector_length_encoding(this); 4059 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 4060 %} 4061 ins_pipe( pipe_slow ); 4062 %} 4063 4064 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 4065 predicate(Matcher::vector_length(n) == 8); 4066 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 4067 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 4068 ins_encode %{ 4069 assert(UseAVX > 2, "required"); 4070 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 4071 %} 4072 ins_pipe( pipe_slow ); 4073 %} 4074 #endif // _LP64 4075 4076 instruct onspinwait() %{ 4077 match(OnSpinWait); 4078 ins_cost(200); 4079 4080 format %{ 4081 $$template 4082 $$emit$$"pause\t! membar_onspinwait" 4083 %} 4084 ins_encode %{ 4085 __ pause(); 4086 %} 4087 ins_pipe(pipe_slow); 4088 %} 4089 4090 // a * b + c 4091 instruct fmaD_reg(regD a, regD b, regD c) %{ 4092 match(Set c (FmaD c (Binary a b))); 4093 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 4094 ins_cost(150); 4095 ins_encode %{ 4096 assert(UseFMA, "Needs FMA instructions support."); 4097 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4098 %} 4099 ins_pipe( pipe_slow ); 4100 %} 4101 4102 // a * b + c 4103 instruct fmaF_reg(regF a, regF b, regF c) %{ 4104 match(Set c (FmaF c (Binary a b))); 4105 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 4106 ins_cost(150); 4107 ins_encode %{ 4108 assert(UseFMA, "Needs FMA instructions support."); 4109 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4110 %} 4111 ins_pipe( pipe_slow ); 4112 %} 4113 4114 // ====================VECTOR INSTRUCTIONS===================================== 4115 4116 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4117 instruct MoveVec2Leg(legVec dst, vec src) %{ 4118 match(Set dst src); 4119 format %{ "" %} 4120 ins_encode %{ 4121 ShouldNotReachHere(); 4122 %} 4123 ins_pipe( fpu_reg_reg ); 4124 %} 4125 4126 instruct MoveLeg2Vec(vec dst, legVec src) %{ 4127 match(Set dst src); 4128 format %{ "" %} 4129 ins_encode %{ 4130 ShouldNotReachHere(); 4131 %} 4132 ins_pipe( fpu_reg_reg ); 4133 %} 4134 4135 // ============================================================================ 4136 4137 // Load vectors generic operand pattern 4138 instruct loadV(vec dst, memory mem) %{ 4139 match(Set dst (LoadVector mem)); 4140 ins_cost(125); 4141 format %{ "load_vector $dst,$mem" %} 4142 ins_encode %{ 4143 BasicType bt = Matcher::vector_element_basic_type(this); 4144 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 4145 %} 4146 ins_pipe( pipe_slow ); 4147 %} 4148 4149 // Store vectors generic operand pattern. 4150 instruct storeV(memory mem, vec src) %{ 4151 match(Set mem (StoreVector mem src)); 4152 ins_cost(145); 4153 format %{ "store_vector $mem,$src\n\t" %} 4154 ins_encode %{ 4155 switch (Matcher::vector_length_in_bytes(this, $src)) { 4156 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 4157 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 4158 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 4159 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 4160 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 4161 default: ShouldNotReachHere(); 4162 } 4163 %} 4164 ins_pipe( pipe_slow ); 4165 %} 4166 4167 // ---------------------------------------- Gather ------------------------------------ 4168 4169 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE 4170 4171 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 4172 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) && 4173 Matcher::vector_length_in_bytes(n) <= 32); 4174 match(Set dst (LoadVectorGather mem idx)); 4175 effect(TEMP dst, TEMP tmp, TEMP mask); 4176 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 4177 ins_encode %{ 4178 int vlen_enc = vector_length_encoding(this); 4179 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4180 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4181 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4182 __ lea($tmp$$Register, $mem$$Address); 4183 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4184 %} 4185 ins_pipe( pipe_slow ); 4186 %} 4187 4188 4189 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 4190 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4191 !is_subword_type(Matcher::vector_element_basic_type(n))); 4192 match(Set dst (LoadVectorGather mem idx)); 4193 effect(TEMP dst, TEMP tmp, TEMP ktmp); 4194 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 4195 ins_encode %{ 4196 int vlen_enc = vector_length_encoding(this); 4197 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4198 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister); 4199 __ lea($tmp$$Register, $mem$$Address); 4200 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4201 %} 4202 ins_pipe( pipe_slow ); 4203 %} 4204 4205 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4206 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4207 !is_subword_type(Matcher::vector_element_basic_type(n))); 4208 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4209 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4210 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4211 ins_encode %{ 4212 assert(UseAVX > 2, "sanity"); 4213 int vlen_enc = vector_length_encoding(this); 4214 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4215 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4216 // Note: Since gather instruction partially updates the opmask register used 4217 // for predication hense moving mask operand to a temporary. 4218 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4219 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4220 __ lea($tmp$$Register, $mem$$Address); 4221 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4222 %} 4223 ins_pipe( pipe_slow ); 4224 %} 4225 4226 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegI rtmp) %{ 4227 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4228 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4229 effect(TEMP tmp, TEMP rtmp); 4230 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %} 4231 ins_encode %{ 4232 int vlen_enc = vector_length_encoding(this); 4233 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4234 __ lea($tmp$$Register, $mem$$Address); 4235 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp$$Register, vlen_enc); 4236 %} 4237 ins_pipe( pipe_slow ); 4238 %} 4239 4240 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegP idx_base_temp, 4241 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4242 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4243 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4244 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4245 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4246 ins_encode %{ 4247 int vlen_enc = vector_length_encoding(this); 4248 int vector_len = Matcher::vector_length(this); 4249 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4250 __ lea($tmp$$Register, $mem$$Address); 4251 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4252 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, noreg, $xtmp1$$XMMRegister, 4253 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4254 %} 4255 ins_pipe( pipe_slow ); 4256 %} 4257 4258 instruct vgather_subwordLE8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegI rtmp, rFlagsReg cr) %{ 4259 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4260 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4261 effect(TEMP tmp, TEMP rtmp, KILL cr); 4262 format %{ "vector_gatherLE8_off $dst, $mem, $idx_base, $offset\t! using $tmp and $rtmp as TEMP" %} 4263 ins_encode %{ 4264 int vlen_enc = vector_length_encoding(this); 4265 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4266 __ lea($tmp$$Register, $mem$$Address); 4267 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, $rtmp$$Register, vlen_enc); 4268 %} 4269 ins_pipe( pipe_slow ); 4270 %} 4271 4272 4273 instruct vgather_subwordGT8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegP idx_base_temp, 4274 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4275 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4276 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4277 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4278 format %{ "vector_gatherGT8_off $dst, $mem, $idx_base, $offset\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4279 ins_encode %{ 4280 int vlen_enc = vector_length_encoding(this); 4281 int vector_len = Matcher::vector_length(this); 4282 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4283 __ lea($tmp$$Register, $mem$$Address); 4284 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4285 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, noreg, $xtmp1$$XMMRegister, 4286 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4287 %} 4288 ins_pipe( pipe_slow ); 4289 %} 4290 4291 4292 #ifdef _LP64 4293 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4294 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4295 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4296 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4297 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4298 ins_encode %{ 4299 int vlen_enc = vector_length_encoding(this); 4300 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4301 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4302 __ lea($tmp$$Register, $mem$$Address); 4303 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4304 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4305 %} 4306 ins_pipe( pipe_slow ); 4307 %} 4308 4309 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4310 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4311 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4312 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4313 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4314 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4315 ins_encode %{ 4316 int vlen_enc = vector_length_encoding(this); 4317 int vector_len = Matcher::vector_length(this); 4318 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4319 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4320 __ lea($tmp$$Register, $mem$$Address); 4321 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4322 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4323 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4324 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4325 %} 4326 ins_pipe( pipe_slow ); 4327 %} 4328 4329 instruct vgather_masked_subwordLE8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4330 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4331 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4332 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4333 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4334 ins_encode %{ 4335 int vlen_enc = vector_length_encoding(this); 4336 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4337 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4338 __ lea($tmp$$Register, $mem$$Address); 4339 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4340 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4341 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4342 %} 4343 ins_pipe( pipe_slow ); 4344 %} 4345 4346 instruct vgather_masked_subwordGT8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4347 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4348 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4349 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4350 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4351 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4352 ins_encode %{ 4353 int vlen_enc = vector_length_encoding(this); 4354 int vector_len = Matcher::vector_length(this); 4355 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4356 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4357 __ lea($tmp$$Register, $mem$$Address); 4358 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4359 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4360 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4361 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4362 %} 4363 ins_pipe( pipe_slow ); 4364 %} 4365 4366 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4367 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4368 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4369 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4370 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4371 ins_encode %{ 4372 int vlen_enc = vector_length_encoding(this); 4373 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4374 __ lea($tmp$$Register, $mem$$Address); 4375 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4376 if (elem_bt == T_SHORT) { 4377 __ movl($mask_idx$$Register, 0x55555555); 4378 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4379 } 4380 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4381 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4382 %} 4383 ins_pipe( pipe_slow ); 4384 %} 4385 4386 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4387 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4388 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4389 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4390 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4391 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4392 ins_encode %{ 4393 int vlen_enc = vector_length_encoding(this); 4394 int vector_len = Matcher::vector_length(this); 4395 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4396 __ lea($tmp$$Register, $mem$$Address); 4397 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4398 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4399 if (elem_bt == T_SHORT) { 4400 __ movl($mask_idx$$Register, 0x55555555); 4401 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4402 } 4403 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4404 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4405 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4406 %} 4407 ins_pipe( pipe_slow ); 4408 %} 4409 4410 instruct vgather_masked_subwordLE8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4411 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4412 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4413 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4414 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4415 ins_encode %{ 4416 int vlen_enc = vector_length_encoding(this); 4417 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4418 __ lea($tmp$$Register, $mem$$Address); 4419 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4420 if (elem_bt == T_SHORT) { 4421 __ movl($mask_idx$$Register, 0x55555555); 4422 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4423 } 4424 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4425 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4426 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4427 %} 4428 ins_pipe( pipe_slow ); 4429 %} 4430 4431 instruct vgather_masked_subwordGT8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4432 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4433 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4434 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4435 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4436 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4437 ins_encode %{ 4438 int vlen_enc = vector_length_encoding(this); 4439 int vector_len = Matcher::vector_length(this); 4440 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4441 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4442 __ lea($tmp$$Register, $mem$$Address); 4443 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4444 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4445 if (elem_bt == T_SHORT) { 4446 __ movl($mask_idx$$Register, 0x55555555); 4447 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4448 } 4449 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4450 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4451 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4452 %} 4453 ins_pipe( pipe_slow ); 4454 %} 4455 #endif 4456 4457 // ====================Scatter======================================= 4458 4459 // Scatter INT, LONG, FLOAT, DOUBLE 4460 4461 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4462 predicate(UseAVX > 2); 4463 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4464 effect(TEMP tmp, TEMP ktmp); 4465 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4466 ins_encode %{ 4467 int vlen_enc = vector_length_encoding(this, $src); 4468 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4469 4470 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4471 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4472 4473 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4474 __ lea($tmp$$Register, $mem$$Address); 4475 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4476 %} 4477 ins_pipe( pipe_slow ); 4478 %} 4479 4480 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4481 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4482 effect(TEMP tmp, TEMP ktmp); 4483 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4484 ins_encode %{ 4485 int vlen_enc = vector_length_encoding(this, $src); 4486 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4487 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4488 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4489 // Note: Since scatter instruction partially updates the opmask register used 4490 // for predication hense moving mask operand to a temporary. 4491 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4492 __ lea($tmp$$Register, $mem$$Address); 4493 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4494 %} 4495 ins_pipe( pipe_slow ); 4496 %} 4497 4498 // ====================REPLICATE======================================= 4499 4500 // Replicate byte scalar to be vector 4501 instruct vReplB_reg(vec dst, rRegI src) %{ 4502 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4503 match(Set dst (Replicate src)); 4504 format %{ "replicateB $dst,$src" %} 4505 ins_encode %{ 4506 uint vlen = Matcher::vector_length(this); 4507 if (UseAVX >= 2) { 4508 int vlen_enc = vector_length_encoding(this); 4509 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4510 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4511 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4512 } else { 4513 __ movdl($dst$$XMMRegister, $src$$Register); 4514 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4515 } 4516 } else { 4517 assert(UseAVX < 2, ""); 4518 __ movdl($dst$$XMMRegister, $src$$Register); 4519 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4520 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4521 if (vlen >= 16) { 4522 assert(vlen == 16, ""); 4523 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4524 } 4525 } 4526 %} 4527 ins_pipe( pipe_slow ); 4528 %} 4529 4530 instruct ReplB_mem(vec dst, memory mem) %{ 4531 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4532 match(Set dst (Replicate (LoadB mem))); 4533 format %{ "replicateB $dst,$mem" %} 4534 ins_encode %{ 4535 int vlen_enc = vector_length_encoding(this); 4536 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4537 %} 4538 ins_pipe( pipe_slow ); 4539 %} 4540 4541 // ====================ReplicateS======================================= 4542 4543 instruct vReplS_reg(vec dst, rRegI src) %{ 4544 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4545 match(Set dst (Replicate src)); 4546 format %{ "replicateS $dst,$src" %} 4547 ins_encode %{ 4548 uint vlen = Matcher::vector_length(this); 4549 int vlen_enc = vector_length_encoding(this); 4550 if (UseAVX >= 2) { 4551 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4552 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4553 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4554 } else { 4555 __ movdl($dst$$XMMRegister, $src$$Register); 4556 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4557 } 4558 } else { 4559 assert(UseAVX < 2, ""); 4560 __ movdl($dst$$XMMRegister, $src$$Register); 4561 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4562 if (vlen >= 8) { 4563 assert(vlen == 8, ""); 4564 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4565 } 4566 } 4567 %} 4568 ins_pipe( pipe_slow ); 4569 %} 4570 4571 #ifdef _LP64 4572 instruct ReplHF_imm(vec dst, immH con, rRegI rtmp) %{ 4573 match(Set dst (Replicate con)); 4574 effect(TEMP rtmp); 4575 format %{ "replicateHF $dst, $con \t! using $rtmp as TEMP" %} 4576 ins_encode %{ 4577 int vlen_enc = vector_length_encoding(this); 4578 BasicType bt = Matcher::vector_element_basic_type(this); 4579 assert(VM_Version::supports_avx512_fp16() && bt == T_SHORT, ""); 4580 __ movl($rtmp$$Register, $con$$constant); 4581 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4582 %} 4583 ins_pipe( pipe_slow ); 4584 %} 4585 4586 instruct ReplHF_reg(vec dst, regF src, rRegI rtmp) %{ 4587 predicate(VM_Version::supports_avx512_fp16() && Matcher::vector_element_basic_type(n) == T_SHORT); 4588 match(Set dst (Replicate src)); 4589 effect(TEMP rtmp); 4590 format %{ "replicateHF $dst, $src \t! using $rtmp as TEMP" %} 4591 ins_encode %{ 4592 int vlen_enc = vector_length_encoding(this); 4593 __ vmovw($rtmp$$Register, $src$$XMMRegister); 4594 __ evpbroadcastw($dst$$XMMRegister, $rtmp$$Register, vlen_enc); 4595 %} 4596 ins_pipe( pipe_slow ); 4597 %} 4598 #endif 4599 4600 instruct ReplS_mem(vec dst, memory mem) %{ 4601 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4602 match(Set dst (Replicate (LoadS mem))); 4603 format %{ "replicateS $dst,$mem" %} 4604 ins_encode %{ 4605 int vlen_enc = vector_length_encoding(this); 4606 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4607 %} 4608 ins_pipe( pipe_slow ); 4609 %} 4610 4611 // ====================ReplicateI======================================= 4612 4613 instruct ReplI_reg(vec dst, rRegI src) %{ 4614 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4615 match(Set dst (Replicate src)); 4616 format %{ "replicateI $dst,$src" %} 4617 ins_encode %{ 4618 uint vlen = Matcher::vector_length(this); 4619 int vlen_enc = vector_length_encoding(this); 4620 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4621 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4622 } else if (VM_Version::supports_avx2()) { 4623 __ movdl($dst$$XMMRegister, $src$$Register); 4624 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4625 } else { 4626 __ movdl($dst$$XMMRegister, $src$$Register); 4627 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4628 } 4629 %} 4630 ins_pipe( pipe_slow ); 4631 %} 4632 4633 instruct ReplI_mem(vec dst, memory mem) %{ 4634 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4635 match(Set dst (Replicate (LoadI mem))); 4636 format %{ "replicateI $dst,$mem" %} 4637 ins_encode %{ 4638 int vlen_enc = vector_length_encoding(this); 4639 if (VM_Version::supports_avx2()) { 4640 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4641 } else if (VM_Version::supports_avx()) { 4642 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4643 } else { 4644 __ movdl($dst$$XMMRegister, $mem$$Address); 4645 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4646 } 4647 %} 4648 ins_pipe( pipe_slow ); 4649 %} 4650 4651 instruct ReplI_imm(vec dst, immI con) %{ 4652 predicate(Matcher::is_non_long_integral_vector(n)); 4653 match(Set dst (Replicate con)); 4654 format %{ "replicateI $dst,$con" %} 4655 ins_encode %{ 4656 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4657 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) / 4658 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4659 BasicType bt = Matcher::vector_element_basic_type(this); 4660 int vlen = Matcher::vector_length_in_bytes(this); 4661 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4662 %} 4663 ins_pipe( pipe_slow ); 4664 %} 4665 4666 // Replicate scalar zero to be vector 4667 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4668 predicate(Matcher::is_non_long_integral_vector(n)); 4669 match(Set dst (Replicate zero)); 4670 format %{ "replicateI $dst,$zero" %} 4671 ins_encode %{ 4672 int vlen_enc = vector_length_encoding(this); 4673 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4674 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4675 } else { 4676 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4677 } 4678 %} 4679 ins_pipe( fpu_reg_reg ); 4680 %} 4681 4682 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4683 predicate(UseSSE >= 2 && Matcher::is_non_long_integral_vector(n)); 4684 match(Set dst (Replicate con)); 4685 format %{ "vallones $dst" %} 4686 ins_encode %{ 4687 int vector_len = vector_length_encoding(this); 4688 __ vallones($dst$$XMMRegister, vector_len); 4689 %} 4690 ins_pipe( pipe_slow ); 4691 %} 4692 4693 // ====================ReplicateL======================================= 4694 4695 #ifdef _LP64 4696 // Replicate long (8 byte) scalar to be vector 4697 instruct ReplL_reg(vec dst, rRegL src) %{ 4698 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4699 match(Set dst (Replicate src)); 4700 format %{ "replicateL $dst,$src" %} 4701 ins_encode %{ 4702 int vlen = Matcher::vector_length(this); 4703 int vlen_enc = vector_length_encoding(this); 4704 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4705 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4706 } else if (VM_Version::supports_avx2()) { 4707 __ movdq($dst$$XMMRegister, $src$$Register); 4708 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4709 } else { 4710 __ movdq($dst$$XMMRegister, $src$$Register); 4711 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4712 } 4713 %} 4714 ins_pipe( pipe_slow ); 4715 %} 4716 #else // _LP64 4717 // Replicate long (8 byte) scalar to be vector 4718 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 4719 predicate(Matcher::vector_length(n) <= 4 && Matcher::vector_element_basic_type(n) == T_LONG); 4720 match(Set dst (Replicate src)); 4721 effect(TEMP dst, USE src, TEMP tmp); 4722 format %{ "replicateL $dst,$src" %} 4723 ins_encode %{ 4724 uint vlen = Matcher::vector_length(this); 4725 if (vlen == 2) { 4726 __ movdl($dst$$XMMRegister, $src$$Register); 4727 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4728 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4729 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4730 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4731 int vlen_enc = Assembler::AVX_256bit; 4732 __ movdl($dst$$XMMRegister, $src$$Register); 4733 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4734 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4735 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4736 } else { 4737 __ movdl($dst$$XMMRegister, $src$$Register); 4738 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4739 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4740 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4741 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4742 } 4743 %} 4744 ins_pipe( pipe_slow ); 4745 %} 4746 4747 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 4748 predicate(Matcher::vector_length(n) == 8 && Matcher::vector_element_basic_type(n) == T_LONG); 4749 match(Set dst (Replicate src)); 4750 effect(TEMP dst, USE src, TEMP tmp); 4751 format %{ "replicateL $dst,$src" %} 4752 ins_encode %{ 4753 if (VM_Version::supports_avx512vl()) { 4754 __ movdl($dst$$XMMRegister, $src$$Register); 4755 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4756 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4757 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4758 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4759 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4760 } else { 4761 int vlen_enc = Assembler::AVX_512bit; 4762 __ movdl($dst$$XMMRegister, $src$$Register); 4763 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4764 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4765 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4766 } 4767 %} 4768 ins_pipe( pipe_slow ); 4769 %} 4770 #endif // _LP64 4771 4772 instruct ReplL_mem(vec dst, memory mem) %{ 4773 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4774 match(Set dst (Replicate (LoadL mem))); 4775 format %{ "replicateL $dst,$mem" %} 4776 ins_encode %{ 4777 int vlen_enc = vector_length_encoding(this); 4778 if (VM_Version::supports_avx2()) { 4779 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4780 } else if (VM_Version::supports_sse3()) { 4781 __ movddup($dst$$XMMRegister, $mem$$Address); 4782 } else { 4783 __ movq($dst$$XMMRegister, $mem$$Address); 4784 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4785 } 4786 %} 4787 ins_pipe( pipe_slow ); 4788 %} 4789 4790 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4791 instruct ReplL_imm(vec dst, immL con) %{ 4792 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4793 match(Set dst (Replicate con)); 4794 format %{ "replicateL $dst,$con" %} 4795 ins_encode %{ 4796 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4797 int vlen = Matcher::vector_length_in_bytes(this); 4798 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4799 %} 4800 ins_pipe( pipe_slow ); 4801 %} 4802 4803 instruct ReplL_zero(vec dst, immL0 zero) %{ 4804 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4805 match(Set dst (Replicate zero)); 4806 format %{ "replicateL $dst,$zero" %} 4807 ins_encode %{ 4808 int vlen_enc = vector_length_encoding(this); 4809 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4810 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4811 } else { 4812 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4813 } 4814 %} 4815 ins_pipe( fpu_reg_reg ); 4816 %} 4817 4818 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4819 predicate(UseSSE >= 2 && Matcher::vector_element_basic_type(n) == T_LONG); 4820 match(Set dst (Replicate con)); 4821 format %{ "vallones $dst" %} 4822 ins_encode %{ 4823 int vector_len = vector_length_encoding(this); 4824 __ vallones($dst$$XMMRegister, vector_len); 4825 %} 4826 ins_pipe( pipe_slow ); 4827 %} 4828 4829 // ====================ReplicateF======================================= 4830 4831 instruct vReplF_reg(vec dst, vlRegF src) %{ 4832 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4833 match(Set dst (Replicate src)); 4834 format %{ "replicateF $dst,$src" %} 4835 ins_encode %{ 4836 uint vlen = Matcher::vector_length(this); 4837 int vlen_enc = vector_length_encoding(this); 4838 if (vlen <= 4) { 4839 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4840 } else if (VM_Version::supports_avx2()) { 4841 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4842 } else { 4843 assert(vlen == 8, "sanity"); 4844 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4845 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4846 } 4847 %} 4848 ins_pipe( pipe_slow ); 4849 %} 4850 4851 instruct ReplF_reg(vec dst, vlRegF src) %{ 4852 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4853 match(Set dst (Replicate src)); 4854 format %{ "replicateF $dst,$src" %} 4855 ins_encode %{ 4856 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4857 %} 4858 ins_pipe( pipe_slow ); 4859 %} 4860 4861 instruct ReplF_mem(vec dst, memory mem) %{ 4862 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4863 match(Set dst (Replicate (LoadF mem))); 4864 format %{ "replicateF $dst,$mem" %} 4865 ins_encode %{ 4866 int vlen_enc = vector_length_encoding(this); 4867 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4868 %} 4869 ins_pipe( pipe_slow ); 4870 %} 4871 4872 // Replicate float scalar immediate to be vector by loading from const table. 4873 instruct ReplF_imm(vec dst, immF con) %{ 4874 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4875 match(Set dst (Replicate con)); 4876 format %{ "replicateF $dst,$con" %} 4877 ins_encode %{ 4878 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant, 4879 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4)); 4880 int vlen = Matcher::vector_length_in_bytes(this); 4881 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4882 %} 4883 ins_pipe( pipe_slow ); 4884 %} 4885 4886 instruct ReplF_zero(vec dst, immF0 zero) %{ 4887 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4888 match(Set dst (Replicate zero)); 4889 format %{ "replicateF $dst,$zero" %} 4890 ins_encode %{ 4891 int vlen_enc = vector_length_encoding(this); 4892 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4893 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4894 } else { 4895 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4896 } 4897 %} 4898 ins_pipe( fpu_reg_reg ); 4899 %} 4900 4901 // ====================ReplicateD======================================= 4902 4903 // Replicate double (8 bytes) scalar to be vector 4904 instruct vReplD_reg(vec dst, vlRegD src) %{ 4905 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4906 match(Set dst (Replicate src)); 4907 format %{ "replicateD $dst,$src" %} 4908 ins_encode %{ 4909 uint vlen = Matcher::vector_length(this); 4910 int vlen_enc = vector_length_encoding(this); 4911 if (vlen <= 2) { 4912 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4913 } else if (VM_Version::supports_avx2()) { 4914 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4915 } else { 4916 assert(vlen == 4, "sanity"); 4917 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4918 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4919 } 4920 %} 4921 ins_pipe( pipe_slow ); 4922 %} 4923 4924 instruct ReplD_reg(vec dst, vlRegD src) %{ 4925 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4926 match(Set dst (Replicate src)); 4927 format %{ "replicateD $dst,$src" %} 4928 ins_encode %{ 4929 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4930 %} 4931 ins_pipe( pipe_slow ); 4932 %} 4933 4934 instruct ReplD_mem(vec dst, memory mem) %{ 4935 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4936 match(Set dst (Replicate (LoadD mem))); 4937 format %{ "replicateD $dst,$mem" %} 4938 ins_encode %{ 4939 if (Matcher::vector_length(this) >= 4) { 4940 int vlen_enc = vector_length_encoding(this); 4941 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4942 } else { 4943 __ movddup($dst$$XMMRegister, $mem$$Address); 4944 } 4945 %} 4946 ins_pipe( pipe_slow ); 4947 %} 4948 4949 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4950 instruct ReplD_imm(vec dst, immD con) %{ 4951 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4952 match(Set dst (Replicate con)); 4953 format %{ "replicateD $dst,$con" %} 4954 ins_encode %{ 4955 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4956 int vlen = Matcher::vector_length_in_bytes(this); 4957 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4958 %} 4959 ins_pipe( pipe_slow ); 4960 %} 4961 4962 instruct ReplD_zero(vec dst, immD0 zero) %{ 4963 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4964 match(Set dst (Replicate zero)); 4965 format %{ "replicateD $dst,$zero" %} 4966 ins_encode %{ 4967 int vlen_enc = vector_length_encoding(this); 4968 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4969 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4970 } else { 4971 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4972 } 4973 %} 4974 ins_pipe( fpu_reg_reg ); 4975 %} 4976 4977 // ====================VECTOR INSERT======================================= 4978 4979 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4980 predicate(Matcher::vector_length_in_bytes(n) < 32); 4981 match(Set dst (VectorInsert (Binary dst val) idx)); 4982 format %{ "vector_insert $dst,$val,$idx" %} 4983 ins_encode %{ 4984 assert(UseSSE >= 4, "required"); 4985 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4986 4987 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4988 4989 assert(is_integral_type(elem_bt), ""); 4990 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4991 4992 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4993 %} 4994 ins_pipe( pipe_slow ); 4995 %} 4996 4997 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4998 predicate(Matcher::vector_length_in_bytes(n) == 32); 4999 match(Set dst (VectorInsert (Binary src val) idx)); 5000 effect(TEMP vtmp); 5001 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5002 ins_encode %{ 5003 int vlen_enc = Assembler::AVX_256bit; 5004 BasicType elem_bt = Matcher::vector_element_basic_type(this); 5005 int elem_per_lane = 16/type2aelembytes(elem_bt); 5006 int log2epr = log2(elem_per_lane); 5007 5008 assert(is_integral_type(elem_bt), "sanity"); 5009 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5010 5011 uint x_idx = $idx$$constant & right_n_bits(log2epr); 5012 uint y_idx = ($idx$$constant >> log2epr) & 1; 5013 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5014 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 5015 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5016 %} 5017 ins_pipe( pipe_slow ); 5018 %} 5019 5020 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 5021 predicate(Matcher::vector_length_in_bytes(n) == 64); 5022 match(Set dst (VectorInsert (Binary src val) idx)); 5023 effect(TEMP vtmp); 5024 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5025 ins_encode %{ 5026 assert(UseAVX > 2, "sanity"); 5027 5028 BasicType elem_bt = Matcher::vector_element_basic_type(this); 5029 int elem_per_lane = 16/type2aelembytes(elem_bt); 5030 int log2epr = log2(elem_per_lane); 5031 5032 assert(is_integral_type(elem_bt), ""); 5033 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5034 5035 uint x_idx = $idx$$constant & right_n_bits(log2epr); 5036 uint y_idx = ($idx$$constant >> log2epr) & 3; 5037 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5038 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 5039 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5040 %} 5041 ins_pipe( pipe_slow ); 5042 %} 5043 5044 #ifdef _LP64 5045 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 5046 predicate(Matcher::vector_length(n) == 2); 5047 match(Set dst (VectorInsert (Binary dst val) idx)); 5048 format %{ "vector_insert $dst,$val,$idx" %} 5049 ins_encode %{ 5050 assert(UseSSE >= 4, "required"); 5051 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 5052 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5053 5054 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 5055 %} 5056 ins_pipe( pipe_slow ); 5057 %} 5058 5059 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 5060 predicate(Matcher::vector_length(n) == 4); 5061 match(Set dst (VectorInsert (Binary src val) idx)); 5062 effect(TEMP vtmp); 5063 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5064 ins_encode %{ 5065 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 5066 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5067 5068 uint x_idx = $idx$$constant & right_n_bits(1); 5069 uint y_idx = ($idx$$constant >> 1) & 1; 5070 int vlen_enc = Assembler::AVX_256bit; 5071 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5072 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 5073 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5074 %} 5075 ins_pipe( pipe_slow ); 5076 %} 5077 5078 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 5079 predicate(Matcher::vector_length(n) == 8); 5080 match(Set dst (VectorInsert (Binary src val) idx)); 5081 effect(TEMP vtmp); 5082 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5083 ins_encode %{ 5084 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 5085 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5086 5087 uint x_idx = $idx$$constant & right_n_bits(1); 5088 uint y_idx = ($idx$$constant >> 1) & 3; 5089 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5090 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 5091 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5092 %} 5093 ins_pipe( pipe_slow ); 5094 %} 5095 #endif 5096 5097 instruct insertF(vec dst, regF val, immU8 idx) %{ 5098 predicate(Matcher::vector_length(n) < 8); 5099 match(Set dst (VectorInsert (Binary dst val) idx)); 5100 format %{ "vector_insert $dst,$val,$idx" %} 5101 ins_encode %{ 5102 assert(UseSSE >= 4, "sanity"); 5103 5104 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 5105 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5106 5107 uint x_idx = $idx$$constant & right_n_bits(2); 5108 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5109 %} 5110 ins_pipe( pipe_slow ); 5111 %} 5112 5113 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 5114 predicate(Matcher::vector_length(n) >= 8); 5115 match(Set dst (VectorInsert (Binary src val) idx)); 5116 effect(TEMP vtmp); 5117 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5118 ins_encode %{ 5119 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 5120 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5121 5122 int vlen = Matcher::vector_length(this); 5123 uint x_idx = $idx$$constant & right_n_bits(2); 5124 if (vlen == 8) { 5125 uint y_idx = ($idx$$constant >> 2) & 1; 5126 int vlen_enc = Assembler::AVX_256bit; 5127 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5128 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5129 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5130 } else { 5131 assert(vlen == 16, "sanity"); 5132 uint y_idx = ($idx$$constant >> 2) & 3; 5133 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5134 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5135 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5136 } 5137 %} 5138 ins_pipe( pipe_slow ); 5139 %} 5140 5141 #ifdef _LP64 5142 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 5143 predicate(Matcher::vector_length(n) == 2); 5144 match(Set dst (VectorInsert (Binary dst val) idx)); 5145 effect(TEMP tmp); 5146 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 5147 ins_encode %{ 5148 assert(UseSSE >= 4, "sanity"); 5149 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5150 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5151 5152 __ movq($tmp$$Register, $val$$XMMRegister); 5153 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 5154 %} 5155 ins_pipe( pipe_slow ); 5156 %} 5157 5158 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 5159 predicate(Matcher::vector_length(n) == 4); 5160 match(Set dst (VectorInsert (Binary src val) idx)); 5161 effect(TEMP vtmp, TEMP tmp); 5162 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 5163 ins_encode %{ 5164 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5165 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5166 5167 uint x_idx = $idx$$constant & right_n_bits(1); 5168 uint y_idx = ($idx$$constant >> 1) & 1; 5169 int vlen_enc = Assembler::AVX_256bit; 5170 __ movq($tmp$$Register, $val$$XMMRegister); 5171 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5172 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 5173 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5174 %} 5175 ins_pipe( pipe_slow ); 5176 %} 5177 5178 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 5179 predicate(Matcher::vector_length(n) == 8); 5180 match(Set dst (VectorInsert (Binary src val) idx)); 5181 effect(TEMP tmp, TEMP vtmp); 5182 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5183 ins_encode %{ 5184 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5185 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5186 5187 uint x_idx = $idx$$constant & right_n_bits(1); 5188 uint y_idx = ($idx$$constant >> 1) & 3; 5189 __ movq($tmp$$Register, $val$$XMMRegister); 5190 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5191 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 5192 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5193 %} 5194 ins_pipe( pipe_slow ); 5195 %} 5196 #endif 5197 5198 // ====================REDUCTION ARITHMETIC======================================= 5199 5200 // =======================Int Reduction========================================== 5201 5202 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5203 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 5204 match(Set dst (AddReductionVI src1 src2)); 5205 match(Set dst (MulReductionVI src1 src2)); 5206 match(Set dst (AndReductionV src1 src2)); 5207 match(Set dst ( OrReductionV src1 src2)); 5208 match(Set dst (XorReductionV src1 src2)); 5209 match(Set dst (MinReductionV src1 src2)); 5210 match(Set dst (MaxReductionV src1 src2)); 5211 effect(TEMP vtmp1, TEMP vtmp2); 5212 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5213 ins_encode %{ 5214 int opcode = this->ideal_Opcode(); 5215 int vlen = Matcher::vector_length(this, $src2); 5216 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5217 %} 5218 ins_pipe( pipe_slow ); 5219 %} 5220 5221 // =======================Long Reduction========================================== 5222 5223 #ifdef _LP64 5224 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5225 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 5226 match(Set dst (AddReductionVL src1 src2)); 5227 match(Set dst (MulReductionVL src1 src2)); 5228 match(Set dst (AndReductionV src1 src2)); 5229 match(Set dst ( OrReductionV src1 src2)); 5230 match(Set dst (XorReductionV src1 src2)); 5231 match(Set dst (MinReductionV src1 src2)); 5232 match(Set dst (MaxReductionV src1 src2)); 5233 effect(TEMP vtmp1, TEMP vtmp2); 5234 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5235 ins_encode %{ 5236 int opcode = this->ideal_Opcode(); 5237 int vlen = Matcher::vector_length(this, $src2); 5238 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5239 %} 5240 ins_pipe( pipe_slow ); 5241 %} 5242 5243 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 5244 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 5245 match(Set dst (AddReductionVL src1 src2)); 5246 match(Set dst (MulReductionVL src1 src2)); 5247 match(Set dst (AndReductionV src1 src2)); 5248 match(Set dst ( OrReductionV src1 src2)); 5249 match(Set dst (XorReductionV src1 src2)); 5250 match(Set dst (MinReductionV src1 src2)); 5251 match(Set dst (MaxReductionV src1 src2)); 5252 effect(TEMP vtmp1, TEMP vtmp2); 5253 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5254 ins_encode %{ 5255 int opcode = this->ideal_Opcode(); 5256 int vlen = Matcher::vector_length(this, $src2); 5257 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5258 %} 5259 ins_pipe( pipe_slow ); 5260 %} 5261 #endif // _LP64 5262 5263 // =======================Float Reduction========================================== 5264 5265 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 5266 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src 5267 match(Set dst (AddReductionVF dst src)); 5268 match(Set dst (MulReductionVF dst src)); 5269 effect(TEMP dst, TEMP vtmp); 5270 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 5271 ins_encode %{ 5272 int opcode = this->ideal_Opcode(); 5273 int vlen = Matcher::vector_length(this, $src); 5274 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5275 %} 5276 ins_pipe( pipe_slow ); 5277 %} 5278 5279 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 5280 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5281 match(Set dst (AddReductionVF dst src)); 5282 match(Set dst (MulReductionVF dst src)); 5283 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5284 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5285 ins_encode %{ 5286 int opcode = this->ideal_Opcode(); 5287 int vlen = Matcher::vector_length(this, $src); 5288 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5289 %} 5290 ins_pipe( pipe_slow ); 5291 %} 5292 5293 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5294 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src 5295 match(Set dst (AddReductionVF dst src)); 5296 match(Set dst (MulReductionVF dst src)); 5297 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5298 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5299 ins_encode %{ 5300 int opcode = this->ideal_Opcode(); 5301 int vlen = Matcher::vector_length(this, $src); 5302 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5303 %} 5304 ins_pipe( pipe_slow ); 5305 %} 5306 5307 5308 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{ 5309 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5310 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5311 // src1 contains reduction identity 5312 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5313 match(Set dst (AddReductionVF src1 src2)); 5314 match(Set dst (MulReductionVF src1 src2)); 5315 effect(TEMP dst); 5316 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %} 5317 ins_encode %{ 5318 int opcode = this->ideal_Opcode(); 5319 int vlen = Matcher::vector_length(this, $src2); 5320 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5321 %} 5322 ins_pipe( pipe_slow ); 5323 %} 5324 5325 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{ 5326 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5327 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5328 // src1 contains reduction identity 5329 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5330 match(Set dst (AddReductionVF src1 src2)); 5331 match(Set dst (MulReductionVF src1 src2)); 5332 effect(TEMP dst, TEMP vtmp); 5333 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5334 ins_encode %{ 5335 int opcode = this->ideal_Opcode(); 5336 int vlen = Matcher::vector_length(this, $src2); 5337 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5338 %} 5339 ins_pipe( pipe_slow ); 5340 %} 5341 5342 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{ 5343 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5344 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5345 // src1 contains reduction identity 5346 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5347 match(Set dst (AddReductionVF src1 src2)); 5348 match(Set dst (MulReductionVF src1 src2)); 5349 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5350 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5351 ins_encode %{ 5352 int opcode = this->ideal_Opcode(); 5353 int vlen = Matcher::vector_length(this, $src2); 5354 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5355 %} 5356 ins_pipe( pipe_slow ); 5357 %} 5358 5359 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5360 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5361 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5362 // src1 contains reduction identity 5363 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2 5364 match(Set dst (AddReductionVF src1 src2)); 5365 match(Set dst (MulReductionVF src1 src2)); 5366 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5367 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5368 ins_encode %{ 5369 int opcode = this->ideal_Opcode(); 5370 int vlen = Matcher::vector_length(this, $src2); 5371 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5372 %} 5373 ins_pipe( pipe_slow ); 5374 %} 5375 5376 // =======================Double Reduction========================================== 5377 5378 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 5379 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src 5380 match(Set dst (AddReductionVD dst src)); 5381 match(Set dst (MulReductionVD dst src)); 5382 effect(TEMP dst, TEMP vtmp); 5383 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 5384 ins_encode %{ 5385 int opcode = this->ideal_Opcode(); 5386 int vlen = Matcher::vector_length(this, $src); 5387 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5388 %} 5389 ins_pipe( pipe_slow ); 5390 %} 5391 5392 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 5393 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src 5394 match(Set dst (AddReductionVD dst src)); 5395 match(Set dst (MulReductionVD dst src)); 5396 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5397 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5398 ins_encode %{ 5399 int opcode = this->ideal_Opcode(); 5400 int vlen = Matcher::vector_length(this, $src); 5401 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5402 %} 5403 ins_pipe( pipe_slow ); 5404 %} 5405 5406 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5407 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5408 match(Set dst (AddReductionVD dst src)); 5409 match(Set dst (MulReductionVD dst src)); 5410 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5411 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5412 ins_encode %{ 5413 int opcode = this->ideal_Opcode(); 5414 int vlen = Matcher::vector_length(this, $src); 5415 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5416 %} 5417 ins_pipe( pipe_slow ); 5418 %} 5419 5420 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{ 5421 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5422 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5423 // src1 contains reduction identity 5424 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5425 match(Set dst (AddReductionVD src1 src2)); 5426 match(Set dst (MulReductionVD src1 src2)); 5427 effect(TEMP dst); 5428 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %} 5429 ins_encode %{ 5430 int opcode = this->ideal_Opcode(); 5431 int vlen = Matcher::vector_length(this, $src2); 5432 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5433 %} 5434 ins_pipe( pipe_slow ); 5435 %} 5436 5437 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{ 5438 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5439 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5440 // src1 contains reduction identity 5441 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5442 match(Set dst (AddReductionVD src1 src2)); 5443 match(Set dst (MulReductionVD src1 src2)); 5444 effect(TEMP dst, TEMP vtmp); 5445 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5446 ins_encode %{ 5447 int opcode = this->ideal_Opcode(); 5448 int vlen = Matcher::vector_length(this, $src2); 5449 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5450 %} 5451 ins_pipe( pipe_slow ); 5452 %} 5453 5454 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5455 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5456 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5457 // src1 contains reduction identity 5458 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5459 match(Set dst (AddReductionVD src1 src2)); 5460 match(Set dst (MulReductionVD src1 src2)); 5461 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5462 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5463 ins_encode %{ 5464 int opcode = this->ideal_Opcode(); 5465 int vlen = Matcher::vector_length(this, $src2); 5466 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5467 %} 5468 ins_pipe( pipe_slow ); 5469 %} 5470 5471 // =======================Byte Reduction========================================== 5472 5473 #ifdef _LP64 5474 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5475 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5476 match(Set dst (AddReductionVI src1 src2)); 5477 match(Set dst (AndReductionV src1 src2)); 5478 match(Set dst ( OrReductionV src1 src2)); 5479 match(Set dst (XorReductionV src1 src2)); 5480 match(Set dst (MinReductionV src1 src2)); 5481 match(Set dst (MaxReductionV src1 src2)); 5482 effect(TEMP vtmp1, TEMP vtmp2); 5483 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5484 ins_encode %{ 5485 int opcode = this->ideal_Opcode(); 5486 int vlen = Matcher::vector_length(this, $src2); 5487 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5488 %} 5489 ins_pipe( pipe_slow ); 5490 %} 5491 5492 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5493 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5494 match(Set dst (AddReductionVI src1 src2)); 5495 match(Set dst (AndReductionV src1 src2)); 5496 match(Set dst ( OrReductionV src1 src2)); 5497 match(Set dst (XorReductionV src1 src2)); 5498 match(Set dst (MinReductionV src1 src2)); 5499 match(Set dst (MaxReductionV src1 src2)); 5500 effect(TEMP vtmp1, TEMP vtmp2); 5501 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5502 ins_encode %{ 5503 int opcode = this->ideal_Opcode(); 5504 int vlen = Matcher::vector_length(this, $src2); 5505 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5506 %} 5507 ins_pipe( pipe_slow ); 5508 %} 5509 #endif 5510 5511 // =======================Short Reduction========================================== 5512 5513 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5514 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5515 match(Set dst (AddReductionVI src1 src2)); 5516 match(Set dst (MulReductionVI src1 src2)); 5517 match(Set dst (AndReductionV src1 src2)); 5518 match(Set dst ( OrReductionV src1 src2)); 5519 match(Set dst (XorReductionV src1 src2)); 5520 match(Set dst (MinReductionV src1 src2)); 5521 match(Set dst (MaxReductionV src1 src2)); 5522 effect(TEMP vtmp1, TEMP vtmp2); 5523 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5524 ins_encode %{ 5525 int opcode = this->ideal_Opcode(); 5526 int vlen = Matcher::vector_length(this, $src2); 5527 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5528 %} 5529 ins_pipe( pipe_slow ); 5530 %} 5531 5532 // =======================Mul Reduction========================================== 5533 5534 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5535 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5536 Matcher::vector_length(n->in(2)) <= 32); // src2 5537 match(Set dst (MulReductionVI src1 src2)); 5538 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5539 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5540 ins_encode %{ 5541 int opcode = this->ideal_Opcode(); 5542 int vlen = Matcher::vector_length(this, $src2); 5543 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5544 %} 5545 ins_pipe( pipe_slow ); 5546 %} 5547 5548 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5549 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5550 Matcher::vector_length(n->in(2)) == 64); // src2 5551 match(Set dst (MulReductionVI src1 src2)); 5552 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5553 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5554 ins_encode %{ 5555 int opcode = this->ideal_Opcode(); 5556 int vlen = Matcher::vector_length(this, $src2); 5557 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5558 %} 5559 ins_pipe( pipe_slow ); 5560 %} 5561 5562 //--------------------Min/Max Float Reduction -------------------- 5563 // Float Min Reduction 5564 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 5565 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5566 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5567 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5568 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5569 Matcher::vector_length(n->in(2)) == 2); 5570 match(Set dst (MinReductionV src1 src2)); 5571 match(Set dst (MaxReductionV src1 src2)); 5572 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5573 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5574 ins_encode %{ 5575 assert(UseAVX > 0, "sanity"); 5576 5577 int opcode = this->ideal_Opcode(); 5578 int vlen = Matcher::vector_length(this, $src2); 5579 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5580 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5581 %} 5582 ins_pipe( pipe_slow ); 5583 %} 5584 5585 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5586 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5587 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5588 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5589 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5590 Matcher::vector_length(n->in(2)) >= 4); 5591 match(Set dst (MinReductionV src1 src2)); 5592 match(Set dst (MaxReductionV src1 src2)); 5593 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5594 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5595 ins_encode %{ 5596 assert(UseAVX > 0, "sanity"); 5597 5598 int opcode = this->ideal_Opcode(); 5599 int vlen = Matcher::vector_length(this, $src2); 5600 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5601 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5602 %} 5603 ins_pipe( pipe_slow ); 5604 %} 5605 5606 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 5607 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5608 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5609 Matcher::vector_length(n->in(2)) == 2); 5610 match(Set dst (MinReductionV dst src)); 5611 match(Set dst (MaxReductionV dst src)); 5612 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5613 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5614 ins_encode %{ 5615 assert(UseAVX > 0, "sanity"); 5616 5617 int opcode = this->ideal_Opcode(); 5618 int vlen = Matcher::vector_length(this, $src); 5619 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5620 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5621 %} 5622 ins_pipe( pipe_slow ); 5623 %} 5624 5625 5626 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5627 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5628 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5629 Matcher::vector_length(n->in(2)) >= 4); 5630 match(Set dst (MinReductionV dst src)); 5631 match(Set dst (MaxReductionV dst src)); 5632 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5633 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5634 ins_encode %{ 5635 assert(UseAVX > 0, "sanity"); 5636 5637 int opcode = this->ideal_Opcode(); 5638 int vlen = Matcher::vector_length(this, $src); 5639 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5640 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5641 %} 5642 ins_pipe( pipe_slow ); 5643 %} 5644 5645 5646 //--------------------Min Double Reduction -------------------- 5647 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5648 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5649 rFlagsReg cr) %{ 5650 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5651 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5652 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5653 Matcher::vector_length(n->in(2)) == 2); 5654 match(Set dst (MinReductionV src1 src2)); 5655 match(Set dst (MaxReductionV src1 src2)); 5656 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5657 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5658 ins_encode %{ 5659 assert(UseAVX > 0, "sanity"); 5660 5661 int opcode = this->ideal_Opcode(); 5662 int vlen = Matcher::vector_length(this, $src2); 5663 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5664 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5665 %} 5666 ins_pipe( pipe_slow ); 5667 %} 5668 5669 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5670 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5671 rFlagsReg cr) %{ 5672 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5673 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5674 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5675 Matcher::vector_length(n->in(2)) >= 4); 5676 match(Set dst (MinReductionV src1 src2)); 5677 match(Set dst (MaxReductionV src1 src2)); 5678 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5679 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5680 ins_encode %{ 5681 assert(UseAVX > 0, "sanity"); 5682 5683 int opcode = this->ideal_Opcode(); 5684 int vlen = Matcher::vector_length(this, $src2); 5685 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5686 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5687 %} 5688 ins_pipe( pipe_slow ); 5689 %} 5690 5691 5692 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5693 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5694 rFlagsReg cr) %{ 5695 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5696 Matcher::vector_length(n->in(2)) == 2); 5697 match(Set dst (MinReductionV dst src)); 5698 match(Set dst (MaxReductionV dst src)); 5699 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5700 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5701 ins_encode %{ 5702 assert(UseAVX > 0, "sanity"); 5703 5704 int opcode = this->ideal_Opcode(); 5705 int vlen = Matcher::vector_length(this, $src); 5706 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5707 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5708 %} 5709 ins_pipe( pipe_slow ); 5710 %} 5711 5712 instruct minmax_reductionD_av(legRegD dst, legVec src, 5713 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5714 rFlagsReg cr) %{ 5715 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5716 Matcher::vector_length(n->in(2)) >= 4); 5717 match(Set dst (MinReductionV dst src)); 5718 match(Set dst (MaxReductionV dst src)); 5719 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5720 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5721 ins_encode %{ 5722 assert(UseAVX > 0, "sanity"); 5723 5724 int opcode = this->ideal_Opcode(); 5725 int vlen = Matcher::vector_length(this, $src); 5726 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5727 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5728 %} 5729 ins_pipe( pipe_slow ); 5730 %} 5731 5732 // ====================VECTOR ARITHMETIC======================================= 5733 5734 // --------------------------------- ADD -------------------------------------- 5735 5736 // Bytes vector add 5737 instruct vaddB(vec dst, vec src) %{ 5738 predicate(UseAVX == 0); 5739 match(Set dst (AddVB dst src)); 5740 format %{ "paddb $dst,$src\t! add packedB" %} 5741 ins_encode %{ 5742 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5743 %} 5744 ins_pipe( pipe_slow ); 5745 %} 5746 5747 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5748 predicate(UseAVX > 0); 5749 match(Set dst (AddVB src1 src2)); 5750 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5751 ins_encode %{ 5752 int vlen_enc = vector_length_encoding(this); 5753 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5754 %} 5755 ins_pipe( pipe_slow ); 5756 %} 5757 5758 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5759 predicate((UseAVX > 0) && 5760 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5761 match(Set dst (AddVB src (LoadVector mem))); 5762 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5763 ins_encode %{ 5764 int vlen_enc = vector_length_encoding(this); 5765 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5766 %} 5767 ins_pipe( pipe_slow ); 5768 %} 5769 5770 // Shorts/Chars vector add 5771 instruct vaddS(vec dst, vec src) %{ 5772 predicate(UseAVX == 0); 5773 match(Set dst (AddVS dst src)); 5774 format %{ "paddw $dst,$src\t! add packedS" %} 5775 ins_encode %{ 5776 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5777 %} 5778 ins_pipe( pipe_slow ); 5779 %} 5780 5781 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5782 predicate(UseAVX > 0); 5783 match(Set dst (AddVS src1 src2)); 5784 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5785 ins_encode %{ 5786 int vlen_enc = vector_length_encoding(this); 5787 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5788 %} 5789 ins_pipe( pipe_slow ); 5790 %} 5791 5792 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5793 predicate((UseAVX > 0) && 5794 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5795 match(Set dst (AddVS src (LoadVector mem))); 5796 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5797 ins_encode %{ 5798 int vlen_enc = vector_length_encoding(this); 5799 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5800 %} 5801 ins_pipe( pipe_slow ); 5802 %} 5803 5804 // Integers vector add 5805 instruct vaddI(vec dst, vec src) %{ 5806 predicate(UseAVX == 0); 5807 match(Set dst (AddVI dst src)); 5808 format %{ "paddd $dst,$src\t! add packedI" %} 5809 ins_encode %{ 5810 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5811 %} 5812 ins_pipe( pipe_slow ); 5813 %} 5814 5815 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5816 predicate(UseAVX > 0); 5817 match(Set dst (AddVI src1 src2)); 5818 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5819 ins_encode %{ 5820 int vlen_enc = vector_length_encoding(this); 5821 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5822 %} 5823 ins_pipe( pipe_slow ); 5824 %} 5825 5826 5827 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5828 predicate((UseAVX > 0) && 5829 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5830 match(Set dst (AddVI src (LoadVector mem))); 5831 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5832 ins_encode %{ 5833 int vlen_enc = vector_length_encoding(this); 5834 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5835 %} 5836 ins_pipe( pipe_slow ); 5837 %} 5838 5839 // Longs vector add 5840 instruct vaddL(vec dst, vec src) %{ 5841 predicate(UseAVX == 0); 5842 match(Set dst (AddVL dst src)); 5843 format %{ "paddq $dst,$src\t! add packedL" %} 5844 ins_encode %{ 5845 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5846 %} 5847 ins_pipe( pipe_slow ); 5848 %} 5849 5850 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5851 predicate(UseAVX > 0); 5852 match(Set dst (AddVL src1 src2)); 5853 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5854 ins_encode %{ 5855 int vlen_enc = vector_length_encoding(this); 5856 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5857 %} 5858 ins_pipe( pipe_slow ); 5859 %} 5860 5861 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5862 predicate((UseAVX > 0) && 5863 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5864 match(Set dst (AddVL src (LoadVector mem))); 5865 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5866 ins_encode %{ 5867 int vlen_enc = vector_length_encoding(this); 5868 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5869 %} 5870 ins_pipe( pipe_slow ); 5871 %} 5872 5873 // Floats vector add 5874 instruct vaddF(vec dst, vec src) %{ 5875 predicate(UseAVX == 0); 5876 match(Set dst (AddVF dst src)); 5877 format %{ "addps $dst,$src\t! add packedF" %} 5878 ins_encode %{ 5879 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5880 %} 5881 ins_pipe( pipe_slow ); 5882 %} 5883 5884 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5885 predicate(UseAVX > 0); 5886 match(Set dst (AddVF src1 src2)); 5887 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5888 ins_encode %{ 5889 int vlen_enc = vector_length_encoding(this); 5890 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5891 %} 5892 ins_pipe( pipe_slow ); 5893 %} 5894 5895 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5896 predicate((UseAVX > 0) && 5897 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5898 match(Set dst (AddVF src (LoadVector mem))); 5899 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5900 ins_encode %{ 5901 int vlen_enc = vector_length_encoding(this); 5902 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5903 %} 5904 ins_pipe( pipe_slow ); 5905 %} 5906 5907 // Doubles vector add 5908 instruct vaddD(vec dst, vec src) %{ 5909 predicate(UseAVX == 0); 5910 match(Set dst (AddVD dst src)); 5911 format %{ "addpd $dst,$src\t! add packedD" %} 5912 ins_encode %{ 5913 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5914 %} 5915 ins_pipe( pipe_slow ); 5916 %} 5917 5918 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5919 predicate(UseAVX > 0); 5920 match(Set dst (AddVD src1 src2)); 5921 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5922 ins_encode %{ 5923 int vlen_enc = vector_length_encoding(this); 5924 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5925 %} 5926 ins_pipe( pipe_slow ); 5927 %} 5928 5929 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5930 predicate((UseAVX > 0) && 5931 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5932 match(Set dst (AddVD src (LoadVector mem))); 5933 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5934 ins_encode %{ 5935 int vlen_enc = vector_length_encoding(this); 5936 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5937 %} 5938 ins_pipe( pipe_slow ); 5939 %} 5940 5941 // --------------------------------- SUB -------------------------------------- 5942 5943 // Bytes vector sub 5944 instruct vsubB(vec dst, vec src) %{ 5945 predicate(UseAVX == 0); 5946 match(Set dst (SubVB dst src)); 5947 format %{ "psubb $dst,$src\t! sub packedB" %} 5948 ins_encode %{ 5949 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5950 %} 5951 ins_pipe( pipe_slow ); 5952 %} 5953 5954 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5955 predicate(UseAVX > 0); 5956 match(Set dst (SubVB src1 src2)); 5957 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5958 ins_encode %{ 5959 int vlen_enc = vector_length_encoding(this); 5960 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5961 %} 5962 ins_pipe( pipe_slow ); 5963 %} 5964 5965 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5966 predicate((UseAVX > 0) && 5967 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5968 match(Set dst (SubVB src (LoadVector mem))); 5969 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5970 ins_encode %{ 5971 int vlen_enc = vector_length_encoding(this); 5972 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5973 %} 5974 ins_pipe( pipe_slow ); 5975 %} 5976 5977 // Shorts/Chars vector sub 5978 instruct vsubS(vec dst, vec src) %{ 5979 predicate(UseAVX == 0); 5980 match(Set dst (SubVS dst src)); 5981 format %{ "psubw $dst,$src\t! sub packedS" %} 5982 ins_encode %{ 5983 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5984 %} 5985 ins_pipe( pipe_slow ); 5986 %} 5987 5988 5989 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5990 predicate(UseAVX > 0); 5991 match(Set dst (SubVS src1 src2)); 5992 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5993 ins_encode %{ 5994 int vlen_enc = vector_length_encoding(this); 5995 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5996 %} 5997 ins_pipe( pipe_slow ); 5998 %} 5999 6000 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 6001 predicate((UseAVX > 0) && 6002 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6003 match(Set dst (SubVS src (LoadVector mem))); 6004 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 6005 ins_encode %{ 6006 int vlen_enc = vector_length_encoding(this); 6007 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6008 %} 6009 ins_pipe( pipe_slow ); 6010 %} 6011 6012 // Integers vector sub 6013 instruct vsubI(vec dst, vec src) %{ 6014 predicate(UseAVX == 0); 6015 match(Set dst (SubVI dst src)); 6016 format %{ "psubd $dst,$src\t! sub packedI" %} 6017 ins_encode %{ 6018 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 6019 %} 6020 ins_pipe( pipe_slow ); 6021 %} 6022 6023 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 6024 predicate(UseAVX > 0); 6025 match(Set dst (SubVI src1 src2)); 6026 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 6027 ins_encode %{ 6028 int vlen_enc = vector_length_encoding(this); 6029 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6030 %} 6031 ins_pipe( pipe_slow ); 6032 %} 6033 6034 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 6035 predicate((UseAVX > 0) && 6036 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6037 match(Set dst (SubVI src (LoadVector mem))); 6038 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 6039 ins_encode %{ 6040 int vlen_enc = vector_length_encoding(this); 6041 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6042 %} 6043 ins_pipe( pipe_slow ); 6044 %} 6045 6046 // Longs vector sub 6047 instruct vsubL(vec dst, vec src) %{ 6048 predicate(UseAVX == 0); 6049 match(Set dst (SubVL dst src)); 6050 format %{ "psubq $dst,$src\t! sub packedL" %} 6051 ins_encode %{ 6052 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 6053 %} 6054 ins_pipe( pipe_slow ); 6055 %} 6056 6057 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 6058 predicate(UseAVX > 0); 6059 match(Set dst (SubVL src1 src2)); 6060 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 6061 ins_encode %{ 6062 int vlen_enc = vector_length_encoding(this); 6063 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6064 %} 6065 ins_pipe( pipe_slow ); 6066 %} 6067 6068 6069 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 6070 predicate((UseAVX > 0) && 6071 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6072 match(Set dst (SubVL src (LoadVector mem))); 6073 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 6074 ins_encode %{ 6075 int vlen_enc = vector_length_encoding(this); 6076 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6077 %} 6078 ins_pipe( pipe_slow ); 6079 %} 6080 6081 // Floats vector sub 6082 instruct vsubF(vec dst, vec src) %{ 6083 predicate(UseAVX == 0); 6084 match(Set dst (SubVF dst src)); 6085 format %{ "subps $dst,$src\t! sub packedF" %} 6086 ins_encode %{ 6087 __ subps($dst$$XMMRegister, $src$$XMMRegister); 6088 %} 6089 ins_pipe( pipe_slow ); 6090 %} 6091 6092 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 6093 predicate(UseAVX > 0); 6094 match(Set dst (SubVF src1 src2)); 6095 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 6096 ins_encode %{ 6097 int vlen_enc = vector_length_encoding(this); 6098 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6099 %} 6100 ins_pipe( pipe_slow ); 6101 %} 6102 6103 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 6104 predicate((UseAVX > 0) && 6105 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6106 match(Set dst (SubVF src (LoadVector mem))); 6107 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 6108 ins_encode %{ 6109 int vlen_enc = vector_length_encoding(this); 6110 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6111 %} 6112 ins_pipe( pipe_slow ); 6113 %} 6114 6115 // Doubles vector sub 6116 instruct vsubD(vec dst, vec src) %{ 6117 predicate(UseAVX == 0); 6118 match(Set dst (SubVD dst src)); 6119 format %{ "subpd $dst,$src\t! sub packedD" %} 6120 ins_encode %{ 6121 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 6122 %} 6123 ins_pipe( pipe_slow ); 6124 %} 6125 6126 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 6127 predicate(UseAVX > 0); 6128 match(Set dst (SubVD src1 src2)); 6129 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 6130 ins_encode %{ 6131 int vlen_enc = vector_length_encoding(this); 6132 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6133 %} 6134 ins_pipe( pipe_slow ); 6135 %} 6136 6137 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 6138 predicate((UseAVX > 0) && 6139 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6140 match(Set dst (SubVD src (LoadVector mem))); 6141 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 6142 ins_encode %{ 6143 int vlen_enc = vector_length_encoding(this); 6144 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6145 %} 6146 ins_pipe( pipe_slow ); 6147 %} 6148 6149 // --------------------------------- MUL -------------------------------------- 6150 6151 // Byte vector mul 6152 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 6153 predicate(Matcher::vector_length_in_bytes(n) <= 8); 6154 match(Set dst (MulVB src1 src2)); 6155 effect(TEMP dst, TEMP xtmp); 6156 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6157 ins_encode %{ 6158 assert(UseSSE > 3, "required"); 6159 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 6160 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 6161 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6162 __ psllw($dst$$XMMRegister, 8); 6163 __ psrlw($dst$$XMMRegister, 8); 6164 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6165 %} 6166 ins_pipe( pipe_slow ); 6167 %} 6168 6169 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 6170 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 6171 match(Set dst (MulVB src1 src2)); 6172 effect(TEMP dst, TEMP xtmp); 6173 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6174 ins_encode %{ 6175 assert(UseSSE > 3, "required"); 6176 // Odd-index elements 6177 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 6178 __ psrlw($dst$$XMMRegister, 8); 6179 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 6180 __ psrlw($xtmp$$XMMRegister, 8); 6181 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6182 __ psllw($dst$$XMMRegister, 8); 6183 // Even-index elements 6184 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6185 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 6186 __ psllw($xtmp$$XMMRegister, 8); 6187 __ psrlw($xtmp$$XMMRegister, 8); 6188 // Combine 6189 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 6190 %} 6191 ins_pipe( pipe_slow ); 6192 %} 6193 6194 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6195 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 6196 match(Set dst (MulVB src1 src2)); 6197 effect(TEMP xtmp1, TEMP xtmp2); 6198 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6199 ins_encode %{ 6200 int vlen_enc = vector_length_encoding(this); 6201 // Odd-index elements 6202 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 6203 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 6204 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6205 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 6206 // Even-index elements 6207 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6208 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6209 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6210 // Combine 6211 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6212 %} 6213 ins_pipe( pipe_slow ); 6214 %} 6215 6216 // Shorts/Chars vector mul 6217 instruct vmulS(vec dst, vec src) %{ 6218 predicate(UseAVX == 0); 6219 match(Set dst (MulVS dst src)); 6220 format %{ "pmullw $dst,$src\t! mul packedS" %} 6221 ins_encode %{ 6222 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6223 %} 6224 ins_pipe( pipe_slow ); 6225 %} 6226 6227 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 6228 predicate(UseAVX > 0); 6229 match(Set dst (MulVS src1 src2)); 6230 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 6231 ins_encode %{ 6232 int vlen_enc = vector_length_encoding(this); 6233 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6234 %} 6235 ins_pipe( pipe_slow ); 6236 %} 6237 6238 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 6239 predicate((UseAVX > 0) && 6240 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6241 match(Set dst (MulVS src (LoadVector mem))); 6242 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 6243 ins_encode %{ 6244 int vlen_enc = vector_length_encoding(this); 6245 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6246 %} 6247 ins_pipe( pipe_slow ); 6248 %} 6249 6250 // Integers vector mul 6251 instruct vmulI(vec dst, vec src) %{ 6252 predicate(UseAVX == 0); 6253 match(Set dst (MulVI dst src)); 6254 format %{ "pmulld $dst,$src\t! mul packedI" %} 6255 ins_encode %{ 6256 assert(UseSSE > 3, "required"); 6257 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6258 %} 6259 ins_pipe( pipe_slow ); 6260 %} 6261 6262 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 6263 predicate(UseAVX > 0); 6264 match(Set dst (MulVI src1 src2)); 6265 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 6266 ins_encode %{ 6267 int vlen_enc = vector_length_encoding(this); 6268 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6269 %} 6270 ins_pipe( pipe_slow ); 6271 %} 6272 6273 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 6274 predicate((UseAVX > 0) && 6275 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6276 match(Set dst (MulVI src (LoadVector mem))); 6277 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 6278 ins_encode %{ 6279 int vlen_enc = vector_length_encoding(this); 6280 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6281 %} 6282 ins_pipe( pipe_slow ); 6283 %} 6284 6285 // Longs vector mul 6286 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 6287 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6288 VM_Version::supports_avx512dq()) || 6289 VM_Version::supports_avx512vldq()); 6290 match(Set dst (MulVL src1 src2)); 6291 ins_cost(500); 6292 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 6293 ins_encode %{ 6294 assert(UseAVX > 2, "required"); 6295 int vlen_enc = vector_length_encoding(this); 6296 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6297 %} 6298 ins_pipe( pipe_slow ); 6299 %} 6300 6301 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 6302 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6303 VM_Version::supports_avx512dq()) || 6304 (Matcher::vector_length_in_bytes(n) > 8 && 6305 VM_Version::supports_avx512vldq())); 6306 match(Set dst (MulVL src (LoadVector mem))); 6307 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 6308 ins_cost(500); 6309 ins_encode %{ 6310 assert(UseAVX > 2, "required"); 6311 int vlen_enc = vector_length_encoding(this); 6312 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6313 %} 6314 ins_pipe( pipe_slow ); 6315 %} 6316 6317 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 6318 predicate(UseAVX == 0); 6319 match(Set dst (MulVL src1 src2)); 6320 ins_cost(500); 6321 effect(TEMP dst, TEMP xtmp); 6322 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6323 ins_encode %{ 6324 assert(VM_Version::supports_sse4_1(), "required"); 6325 // Get the lo-hi products, only the lower 32 bits is in concerns 6326 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 6327 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 6328 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 6329 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 6330 __ psllq($dst$$XMMRegister, 32); 6331 // Get the lo-lo products 6332 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6333 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 6334 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 6335 %} 6336 ins_pipe( pipe_slow ); 6337 %} 6338 6339 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6340 predicate(UseAVX > 0 && 6341 ((Matcher::vector_length_in_bytes(n) == 64 && 6342 !VM_Version::supports_avx512dq()) || 6343 (Matcher::vector_length_in_bytes(n) < 64 && 6344 !VM_Version::supports_avx512vldq()))); 6345 match(Set dst (MulVL src1 src2)); 6346 effect(TEMP xtmp1, TEMP xtmp2); 6347 ins_cost(500); 6348 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6349 ins_encode %{ 6350 int vlen_enc = vector_length_encoding(this); 6351 // Get the lo-hi products, only the lower 32 bits is in concerns 6352 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 6353 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6354 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 6355 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6356 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 6357 // Get the lo-lo products 6358 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6359 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6360 %} 6361 ins_pipe( pipe_slow ); 6362 %} 6363 6364 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{ 6365 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs()); 6366 match(Set dst (MulVL src1 src2)); 6367 ins_cost(100); 6368 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %} 6369 ins_encode %{ 6370 int vlen_enc = vector_length_encoding(this); 6371 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6372 %} 6373 ins_pipe( pipe_slow ); 6374 %} 6375 6376 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{ 6377 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs()); 6378 match(Set dst (MulVL src1 src2)); 6379 ins_cost(100); 6380 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %} 6381 ins_encode %{ 6382 int vlen_enc = vector_length_encoding(this); 6383 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6384 %} 6385 ins_pipe( pipe_slow ); 6386 %} 6387 6388 // Floats vector mul 6389 instruct vmulF(vec dst, vec src) %{ 6390 predicate(UseAVX == 0); 6391 match(Set dst (MulVF dst src)); 6392 format %{ "mulps $dst,$src\t! mul packedF" %} 6393 ins_encode %{ 6394 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6395 %} 6396 ins_pipe( pipe_slow ); 6397 %} 6398 6399 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 6400 predicate(UseAVX > 0); 6401 match(Set dst (MulVF src1 src2)); 6402 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 6403 ins_encode %{ 6404 int vlen_enc = vector_length_encoding(this); 6405 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6406 %} 6407 ins_pipe( pipe_slow ); 6408 %} 6409 6410 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 6411 predicate((UseAVX > 0) && 6412 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6413 match(Set dst (MulVF src (LoadVector mem))); 6414 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 6415 ins_encode %{ 6416 int vlen_enc = vector_length_encoding(this); 6417 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6418 %} 6419 ins_pipe( pipe_slow ); 6420 %} 6421 6422 // Doubles vector mul 6423 instruct vmulD(vec dst, vec src) %{ 6424 predicate(UseAVX == 0); 6425 match(Set dst (MulVD dst src)); 6426 format %{ "mulpd $dst,$src\t! mul packedD" %} 6427 ins_encode %{ 6428 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6429 %} 6430 ins_pipe( pipe_slow ); 6431 %} 6432 6433 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 6434 predicate(UseAVX > 0); 6435 match(Set dst (MulVD src1 src2)); 6436 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 6437 ins_encode %{ 6438 int vlen_enc = vector_length_encoding(this); 6439 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6440 %} 6441 ins_pipe( pipe_slow ); 6442 %} 6443 6444 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 6445 predicate((UseAVX > 0) && 6446 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6447 match(Set dst (MulVD src (LoadVector mem))); 6448 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 6449 ins_encode %{ 6450 int vlen_enc = vector_length_encoding(this); 6451 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6452 %} 6453 ins_pipe( pipe_slow ); 6454 %} 6455 6456 // --------------------------------- DIV -------------------------------------- 6457 6458 // Floats vector div 6459 instruct vdivF(vec dst, vec src) %{ 6460 predicate(UseAVX == 0); 6461 match(Set dst (DivVF dst src)); 6462 format %{ "divps $dst,$src\t! div packedF" %} 6463 ins_encode %{ 6464 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6465 %} 6466 ins_pipe( pipe_slow ); 6467 %} 6468 6469 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 6470 predicate(UseAVX > 0); 6471 match(Set dst (DivVF src1 src2)); 6472 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 6473 ins_encode %{ 6474 int vlen_enc = vector_length_encoding(this); 6475 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6476 %} 6477 ins_pipe( pipe_slow ); 6478 %} 6479 6480 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6481 predicate((UseAVX > 0) && 6482 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6483 match(Set dst (DivVF src (LoadVector mem))); 6484 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6485 ins_encode %{ 6486 int vlen_enc = vector_length_encoding(this); 6487 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6488 %} 6489 ins_pipe( pipe_slow ); 6490 %} 6491 6492 // Doubles vector div 6493 instruct vdivD(vec dst, vec src) %{ 6494 predicate(UseAVX == 0); 6495 match(Set dst (DivVD dst src)); 6496 format %{ "divpd $dst,$src\t! div packedD" %} 6497 ins_encode %{ 6498 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6499 %} 6500 ins_pipe( pipe_slow ); 6501 %} 6502 6503 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6504 predicate(UseAVX > 0); 6505 match(Set dst (DivVD src1 src2)); 6506 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6507 ins_encode %{ 6508 int vlen_enc = vector_length_encoding(this); 6509 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6510 %} 6511 ins_pipe( pipe_slow ); 6512 %} 6513 6514 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6515 predicate((UseAVX > 0) && 6516 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6517 match(Set dst (DivVD src (LoadVector mem))); 6518 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6519 ins_encode %{ 6520 int vlen_enc = vector_length_encoding(this); 6521 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6522 %} 6523 ins_pipe( pipe_slow ); 6524 %} 6525 6526 // ------------------------------ MinMax --------------------------------------- 6527 6528 // Byte, Short, Int vector Min/Max 6529 instruct minmax_reg_sse(vec dst, vec src) %{ 6530 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6531 UseAVX == 0); 6532 match(Set dst (MinV dst src)); 6533 match(Set dst (MaxV dst src)); 6534 format %{ "vector_minmax $dst,$src\t! " %} 6535 ins_encode %{ 6536 assert(UseSSE >= 4, "required"); 6537 6538 int opcode = this->ideal_Opcode(); 6539 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6540 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6541 %} 6542 ins_pipe( pipe_slow ); 6543 %} 6544 6545 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6546 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6547 UseAVX > 0); 6548 match(Set dst (MinV src1 src2)); 6549 match(Set dst (MaxV src1 src2)); 6550 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6551 ins_encode %{ 6552 int opcode = this->ideal_Opcode(); 6553 int vlen_enc = vector_length_encoding(this); 6554 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6555 6556 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6557 %} 6558 ins_pipe( pipe_slow ); 6559 %} 6560 6561 // Long vector Min/Max 6562 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6563 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6564 UseAVX == 0); 6565 match(Set dst (MinV dst src)); 6566 match(Set dst (MaxV src dst)); 6567 effect(TEMP dst, TEMP tmp); 6568 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6569 ins_encode %{ 6570 assert(UseSSE >= 4, "required"); 6571 6572 int opcode = this->ideal_Opcode(); 6573 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6574 assert(elem_bt == T_LONG, "sanity"); 6575 6576 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6577 %} 6578 ins_pipe( pipe_slow ); 6579 %} 6580 6581 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6582 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6583 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6584 match(Set dst (MinV src1 src2)); 6585 match(Set dst (MaxV src1 src2)); 6586 effect(TEMP dst); 6587 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6588 ins_encode %{ 6589 int vlen_enc = vector_length_encoding(this); 6590 int opcode = this->ideal_Opcode(); 6591 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6592 assert(elem_bt == T_LONG, "sanity"); 6593 6594 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6595 %} 6596 ins_pipe( pipe_slow ); 6597 %} 6598 6599 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6600 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6601 Matcher::vector_element_basic_type(n) == T_LONG); 6602 match(Set dst (MinV src1 src2)); 6603 match(Set dst (MaxV src1 src2)); 6604 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6605 ins_encode %{ 6606 assert(UseAVX > 2, "required"); 6607 6608 int vlen_enc = vector_length_encoding(this); 6609 int opcode = this->ideal_Opcode(); 6610 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6611 assert(elem_bt == T_LONG, "sanity"); 6612 6613 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6614 %} 6615 ins_pipe( pipe_slow ); 6616 %} 6617 6618 // Float/Double vector Min/Max 6619 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6620 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6621 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6622 UseAVX > 0); 6623 match(Set dst (MinV a b)); 6624 match(Set dst (MaxV a b)); 6625 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6626 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6627 ins_encode %{ 6628 assert(UseAVX > 0, "required"); 6629 6630 int opcode = this->ideal_Opcode(); 6631 int vlen_enc = vector_length_encoding(this); 6632 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6633 6634 __ vminmax_fp(opcode, elem_bt, 6635 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6636 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6637 %} 6638 ins_pipe( pipe_slow ); 6639 %} 6640 6641 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6642 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6643 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6644 match(Set dst (MinV a b)); 6645 match(Set dst (MaxV a b)); 6646 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6647 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6648 ins_encode %{ 6649 assert(UseAVX > 2, "required"); 6650 6651 int opcode = this->ideal_Opcode(); 6652 int vlen_enc = vector_length_encoding(this); 6653 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6654 6655 __ evminmax_fp(opcode, elem_bt, 6656 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6657 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6658 %} 6659 ins_pipe( pipe_slow ); 6660 %} 6661 6662 // ------------------------------ Unsigned vector Min/Max ---------------------- 6663 6664 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{ 6665 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6666 match(Set dst (UMinV a b)); 6667 match(Set dst (UMaxV a b)); 6668 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6669 ins_encode %{ 6670 int opcode = this->ideal_Opcode(); 6671 int vlen_enc = vector_length_encoding(this); 6672 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6673 assert(is_integral_type(elem_bt), ""); 6674 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); 6675 %} 6676 ins_pipe( pipe_slow ); 6677 %} 6678 6679 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{ 6680 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6681 match(Set dst (UMinV a (LoadVector b))); 6682 match(Set dst (UMaxV a (LoadVector b))); 6683 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6684 ins_encode %{ 6685 int opcode = this->ideal_Opcode(); 6686 int vlen_enc = vector_length_encoding(this); 6687 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6688 assert(is_integral_type(elem_bt), ""); 6689 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc); 6690 %} 6691 ins_pipe( pipe_slow ); 6692 %} 6693 6694 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{ 6695 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG); 6696 match(Set dst (UMinV a b)); 6697 match(Set dst (UMaxV a b)); 6698 effect(TEMP xtmp1, TEMP xtmp2); 6699 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %} 6700 ins_encode %{ 6701 int opcode = this->ideal_Opcode(); 6702 int vlen_enc = vector_length_encoding(this); 6703 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6704 %} 6705 ins_pipe( pipe_slow ); 6706 %} 6707 6708 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{ 6709 match(Set dst (UMinV (Binary dst src2) mask)); 6710 match(Set dst (UMaxV (Binary dst src2) mask)); 6711 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6712 ins_encode %{ 6713 int vlen_enc = vector_length_encoding(this); 6714 BasicType bt = Matcher::vector_element_basic_type(this); 6715 int opc = this->ideal_Opcode(); 6716 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6717 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 6718 %} 6719 ins_pipe( pipe_slow ); 6720 %} 6721 6722 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{ 6723 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask)); 6724 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask)); 6725 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6726 ins_encode %{ 6727 int vlen_enc = vector_length_encoding(this); 6728 BasicType bt = Matcher::vector_element_basic_type(this); 6729 int opc = this->ideal_Opcode(); 6730 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6731 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 6732 %} 6733 ins_pipe( pipe_slow ); 6734 %} 6735 6736 // --------------------------------- Signum/CopySign --------------------------- 6737 6738 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6739 match(Set dst (SignumF dst (Binary zero one))); 6740 effect(KILL cr); 6741 format %{ "signumF $dst, $dst" %} 6742 ins_encode %{ 6743 int opcode = this->ideal_Opcode(); 6744 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6745 %} 6746 ins_pipe( pipe_slow ); 6747 %} 6748 6749 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6750 match(Set dst (SignumD dst (Binary zero one))); 6751 effect(KILL cr); 6752 format %{ "signumD $dst, $dst" %} 6753 ins_encode %{ 6754 int opcode = this->ideal_Opcode(); 6755 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6756 %} 6757 ins_pipe( pipe_slow ); 6758 %} 6759 6760 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6761 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6762 match(Set dst (SignumVF src (Binary zero one))); 6763 match(Set dst (SignumVD src (Binary zero one))); 6764 effect(TEMP dst, TEMP xtmp1); 6765 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6766 ins_encode %{ 6767 int opcode = this->ideal_Opcode(); 6768 int vec_enc = vector_length_encoding(this); 6769 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6770 $xtmp1$$XMMRegister, vec_enc); 6771 %} 6772 ins_pipe( pipe_slow ); 6773 %} 6774 6775 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6776 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6777 match(Set dst (SignumVF src (Binary zero one))); 6778 match(Set dst (SignumVD src (Binary zero one))); 6779 effect(TEMP dst, TEMP ktmp1); 6780 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6781 ins_encode %{ 6782 int opcode = this->ideal_Opcode(); 6783 int vec_enc = vector_length_encoding(this); 6784 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6785 $ktmp1$$KRegister, vec_enc); 6786 %} 6787 ins_pipe( pipe_slow ); 6788 %} 6789 6790 // --------------------------------------- 6791 // For copySign use 0xE4 as writemask for vpternlog 6792 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6793 // C (xmm2) is set to 0x7FFFFFFF 6794 // Wherever xmm2 is 0, we want to pick from B (sign) 6795 // Wherever xmm2 is 1, we want to pick from A (src) 6796 // 6797 // A B C Result 6798 // 0 0 0 0 6799 // 0 0 1 0 6800 // 0 1 0 1 6801 // 0 1 1 0 6802 // 1 0 0 0 6803 // 1 0 1 1 6804 // 1 1 0 1 6805 // 1 1 1 1 6806 // 6807 // Result going from high bit to low bit is 0x11100100 = 0xe4 6808 // --------------------------------------- 6809 6810 #ifdef _LP64 6811 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6812 match(Set dst (CopySignF dst src)); 6813 effect(TEMP tmp1, TEMP tmp2); 6814 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6815 ins_encode %{ 6816 __ movl($tmp2$$Register, 0x7FFFFFFF); 6817 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6818 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6819 %} 6820 ins_pipe( pipe_slow ); 6821 %} 6822 6823 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6824 match(Set dst (CopySignD dst (Binary src zero))); 6825 ins_cost(100); 6826 effect(TEMP tmp1, TEMP tmp2); 6827 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6828 ins_encode %{ 6829 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6830 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6831 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6832 %} 6833 ins_pipe( pipe_slow ); 6834 %} 6835 6836 #endif // _LP64 6837 6838 //----------------------------- CompressBits/ExpandBits ------------------------ 6839 6840 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6841 predicate(n->bottom_type()->isa_int()); 6842 match(Set dst (CompressBits src mask)); 6843 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6844 ins_encode %{ 6845 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6846 %} 6847 ins_pipe( pipe_slow ); 6848 %} 6849 6850 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6851 predicate(n->bottom_type()->isa_int()); 6852 match(Set dst (ExpandBits src mask)); 6853 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6854 ins_encode %{ 6855 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6856 %} 6857 ins_pipe( pipe_slow ); 6858 %} 6859 6860 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6861 predicate(n->bottom_type()->isa_int()); 6862 match(Set dst (CompressBits src (LoadI mask))); 6863 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6864 ins_encode %{ 6865 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6866 %} 6867 ins_pipe( pipe_slow ); 6868 %} 6869 6870 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6871 predicate(n->bottom_type()->isa_int()); 6872 match(Set dst (ExpandBits src (LoadI mask))); 6873 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6874 ins_encode %{ 6875 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6876 %} 6877 ins_pipe( pipe_slow ); 6878 %} 6879 6880 // --------------------------------- Sqrt -------------------------------------- 6881 6882 instruct vsqrtF_reg(vec dst, vec src) %{ 6883 match(Set dst (SqrtVF src)); 6884 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6885 ins_encode %{ 6886 assert(UseAVX > 0, "required"); 6887 int vlen_enc = vector_length_encoding(this); 6888 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6889 %} 6890 ins_pipe( pipe_slow ); 6891 %} 6892 6893 instruct vsqrtF_mem(vec dst, memory mem) %{ 6894 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6895 match(Set dst (SqrtVF (LoadVector mem))); 6896 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6897 ins_encode %{ 6898 assert(UseAVX > 0, "required"); 6899 int vlen_enc = vector_length_encoding(this); 6900 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6901 %} 6902 ins_pipe( pipe_slow ); 6903 %} 6904 6905 // Floating point vector sqrt 6906 instruct vsqrtD_reg(vec dst, vec src) %{ 6907 match(Set dst (SqrtVD src)); 6908 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6909 ins_encode %{ 6910 assert(UseAVX > 0, "required"); 6911 int vlen_enc = vector_length_encoding(this); 6912 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6913 %} 6914 ins_pipe( pipe_slow ); 6915 %} 6916 6917 instruct vsqrtD_mem(vec dst, memory mem) %{ 6918 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6919 match(Set dst (SqrtVD (LoadVector mem))); 6920 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6921 ins_encode %{ 6922 assert(UseAVX > 0, "required"); 6923 int vlen_enc = vector_length_encoding(this); 6924 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6925 %} 6926 ins_pipe( pipe_slow ); 6927 %} 6928 6929 // ------------------------------ Shift --------------------------------------- 6930 6931 // Left and right shift count vectors are the same on x86 6932 // (only lowest bits of xmm reg are used for count). 6933 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6934 match(Set dst (LShiftCntV cnt)); 6935 match(Set dst (RShiftCntV cnt)); 6936 format %{ "movdl $dst,$cnt\t! load shift count" %} 6937 ins_encode %{ 6938 __ movdl($dst$$XMMRegister, $cnt$$Register); 6939 %} 6940 ins_pipe( pipe_slow ); 6941 %} 6942 6943 // Byte vector shift 6944 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6945 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6946 match(Set dst ( LShiftVB src shift)); 6947 match(Set dst ( RShiftVB src shift)); 6948 match(Set dst (URShiftVB src shift)); 6949 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6950 format %{"vector_byte_shift $dst,$src,$shift" %} 6951 ins_encode %{ 6952 assert(UseSSE > 3, "required"); 6953 int opcode = this->ideal_Opcode(); 6954 bool sign = (opcode != Op_URShiftVB); 6955 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6956 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6957 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6958 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6959 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6960 %} 6961 ins_pipe( pipe_slow ); 6962 %} 6963 6964 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6965 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6966 UseAVX <= 1); 6967 match(Set dst ( LShiftVB src shift)); 6968 match(Set dst ( RShiftVB src shift)); 6969 match(Set dst (URShiftVB src shift)); 6970 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6971 format %{"vector_byte_shift $dst,$src,$shift" %} 6972 ins_encode %{ 6973 assert(UseSSE > 3, "required"); 6974 int opcode = this->ideal_Opcode(); 6975 bool sign = (opcode != Op_URShiftVB); 6976 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6977 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6978 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6979 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6980 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6981 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6982 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6983 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6984 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6985 %} 6986 ins_pipe( pipe_slow ); 6987 %} 6988 6989 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6990 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6991 UseAVX > 1); 6992 match(Set dst ( LShiftVB src shift)); 6993 match(Set dst ( RShiftVB src shift)); 6994 match(Set dst (URShiftVB src shift)); 6995 effect(TEMP dst, TEMP tmp); 6996 format %{"vector_byte_shift $dst,$src,$shift" %} 6997 ins_encode %{ 6998 int opcode = this->ideal_Opcode(); 6999 bool sign = (opcode != Op_URShiftVB); 7000 int vlen_enc = Assembler::AVX_256bit; 7001 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 7002 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7003 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7004 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 7005 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 7006 %} 7007 ins_pipe( pipe_slow ); 7008 %} 7009 7010 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 7011 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 7012 match(Set dst ( LShiftVB src shift)); 7013 match(Set dst ( RShiftVB src shift)); 7014 match(Set dst (URShiftVB src shift)); 7015 effect(TEMP dst, TEMP tmp); 7016 format %{"vector_byte_shift $dst,$src,$shift" %} 7017 ins_encode %{ 7018 assert(UseAVX > 1, "required"); 7019 int opcode = this->ideal_Opcode(); 7020 bool sign = (opcode != Op_URShiftVB); 7021 int vlen_enc = Assembler::AVX_256bit; 7022 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 7023 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7024 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7025 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7026 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7027 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7028 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7029 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7030 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 7031 %} 7032 ins_pipe( pipe_slow ); 7033 %} 7034 7035 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 7036 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 7037 match(Set dst ( LShiftVB src shift)); 7038 match(Set dst (RShiftVB src shift)); 7039 match(Set dst (URShiftVB src shift)); 7040 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 7041 format %{"vector_byte_shift $dst,$src,$shift" %} 7042 ins_encode %{ 7043 assert(UseAVX > 2, "required"); 7044 int opcode = this->ideal_Opcode(); 7045 bool sign = (opcode != Op_URShiftVB); 7046 int vlen_enc = Assembler::AVX_512bit; 7047 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 7048 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 7049 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7050 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7051 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7052 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 7053 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7054 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7055 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7056 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 7057 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 7058 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7059 %} 7060 ins_pipe( pipe_slow ); 7061 %} 7062 7063 // Shorts vector logical right shift produces incorrect Java result 7064 // for negative data because java code convert short value into int with 7065 // sign extension before a shift. But char vectors are fine since chars are 7066 // unsigned values. 7067 // Shorts/Chars vector left shift 7068 instruct vshiftS(vec dst, vec src, vec shift) %{ 7069 predicate(!n->as_ShiftV()->is_var_shift()); 7070 match(Set dst ( LShiftVS src shift)); 7071 match(Set dst ( RShiftVS src shift)); 7072 match(Set dst (URShiftVS src shift)); 7073 effect(TEMP dst, USE src, USE shift); 7074 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 7075 ins_encode %{ 7076 int opcode = this->ideal_Opcode(); 7077 if (UseAVX > 0) { 7078 int vlen_enc = vector_length_encoding(this); 7079 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7080 } else { 7081 int vlen = Matcher::vector_length(this); 7082 if (vlen == 2) { 7083 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7084 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7085 } else if (vlen == 4) { 7086 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7087 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7088 } else { 7089 assert (vlen == 8, "sanity"); 7090 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7091 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7092 } 7093 } 7094 %} 7095 ins_pipe( pipe_slow ); 7096 %} 7097 7098 // Integers vector left shift 7099 instruct vshiftI(vec dst, vec src, vec shift) %{ 7100 predicate(!n->as_ShiftV()->is_var_shift()); 7101 match(Set dst ( LShiftVI src shift)); 7102 match(Set dst ( RShiftVI src shift)); 7103 match(Set dst (URShiftVI src shift)); 7104 effect(TEMP dst, USE src, USE shift); 7105 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 7106 ins_encode %{ 7107 int opcode = this->ideal_Opcode(); 7108 if (UseAVX > 0) { 7109 int vlen_enc = vector_length_encoding(this); 7110 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7111 } else { 7112 int vlen = Matcher::vector_length(this); 7113 if (vlen == 2) { 7114 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7115 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7116 } else { 7117 assert(vlen == 4, "sanity"); 7118 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7119 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7120 } 7121 } 7122 %} 7123 ins_pipe( pipe_slow ); 7124 %} 7125 7126 // Integers vector left constant shift 7127 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 7128 match(Set dst (LShiftVI src (LShiftCntV shift))); 7129 match(Set dst (RShiftVI src (RShiftCntV shift))); 7130 match(Set dst (URShiftVI src (RShiftCntV shift))); 7131 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 7132 ins_encode %{ 7133 int opcode = this->ideal_Opcode(); 7134 if (UseAVX > 0) { 7135 int vector_len = vector_length_encoding(this); 7136 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 7137 } else { 7138 int vlen = Matcher::vector_length(this); 7139 if (vlen == 2) { 7140 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7141 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7142 } else { 7143 assert(vlen == 4, "sanity"); 7144 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7145 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7146 } 7147 } 7148 %} 7149 ins_pipe( pipe_slow ); 7150 %} 7151 7152 // Longs vector shift 7153 instruct vshiftL(vec dst, vec src, vec shift) %{ 7154 predicate(!n->as_ShiftV()->is_var_shift()); 7155 match(Set dst ( LShiftVL src shift)); 7156 match(Set dst (URShiftVL src shift)); 7157 effect(TEMP dst, USE src, USE shift); 7158 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 7159 ins_encode %{ 7160 int opcode = this->ideal_Opcode(); 7161 if (UseAVX > 0) { 7162 int vlen_enc = vector_length_encoding(this); 7163 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7164 } else { 7165 assert(Matcher::vector_length(this) == 2, ""); 7166 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7167 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7168 } 7169 %} 7170 ins_pipe( pipe_slow ); 7171 %} 7172 7173 // Longs vector constant shift 7174 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 7175 match(Set dst (LShiftVL src (LShiftCntV shift))); 7176 match(Set dst (URShiftVL src (RShiftCntV shift))); 7177 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 7178 ins_encode %{ 7179 int opcode = this->ideal_Opcode(); 7180 if (UseAVX > 0) { 7181 int vector_len = vector_length_encoding(this); 7182 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 7183 } else { 7184 assert(Matcher::vector_length(this) == 2, ""); 7185 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7186 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7187 } 7188 %} 7189 ins_pipe( pipe_slow ); 7190 %} 7191 7192 // -------------------ArithmeticRightShift ----------------------------------- 7193 // Long vector arithmetic right shift 7194 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 7195 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 7196 match(Set dst (RShiftVL src shift)); 7197 effect(TEMP dst, TEMP tmp); 7198 format %{ "vshiftq $dst,$src,$shift" %} 7199 ins_encode %{ 7200 uint vlen = Matcher::vector_length(this); 7201 if (vlen == 2) { 7202 assert(UseSSE >= 2, "required"); 7203 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7204 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 7205 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 7206 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 7207 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 7208 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 7209 } else { 7210 assert(vlen == 4, "sanity"); 7211 assert(UseAVX > 1, "required"); 7212 int vlen_enc = Assembler::AVX_256bit; 7213 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7214 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 7215 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7216 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7217 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7218 } 7219 %} 7220 ins_pipe( pipe_slow ); 7221 %} 7222 7223 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 7224 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 7225 match(Set dst (RShiftVL src shift)); 7226 format %{ "vshiftq $dst,$src,$shift" %} 7227 ins_encode %{ 7228 int vlen_enc = vector_length_encoding(this); 7229 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7230 %} 7231 ins_pipe( pipe_slow ); 7232 %} 7233 7234 // ------------------- Variable Shift ----------------------------- 7235 // Byte variable shift 7236 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7237 predicate(Matcher::vector_length(n) <= 8 && 7238 n->as_ShiftV()->is_var_shift() && 7239 !VM_Version::supports_avx512bw()); 7240 match(Set dst ( LShiftVB src shift)); 7241 match(Set dst ( RShiftVB src shift)); 7242 match(Set dst (URShiftVB src shift)); 7243 effect(TEMP dst, TEMP vtmp); 7244 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7245 ins_encode %{ 7246 assert(UseAVX >= 2, "required"); 7247 7248 int opcode = this->ideal_Opcode(); 7249 int vlen_enc = Assembler::AVX_128bit; 7250 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7251 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7252 %} 7253 ins_pipe( pipe_slow ); 7254 %} 7255 7256 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7257 predicate(Matcher::vector_length(n) == 16 && 7258 n->as_ShiftV()->is_var_shift() && 7259 !VM_Version::supports_avx512bw()); 7260 match(Set dst ( LShiftVB src shift)); 7261 match(Set dst ( RShiftVB src shift)); 7262 match(Set dst (URShiftVB src shift)); 7263 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7264 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7265 ins_encode %{ 7266 assert(UseAVX >= 2, "required"); 7267 7268 int opcode = this->ideal_Opcode(); 7269 int vlen_enc = Assembler::AVX_128bit; 7270 // Shift lower half and get word result in dst 7271 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7272 7273 // Shift upper half and get word result in vtmp1 7274 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7275 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7276 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7277 7278 // Merge and down convert the two word results to byte in dst 7279 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7280 %} 7281 ins_pipe( pipe_slow ); 7282 %} 7283 7284 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 7285 predicate(Matcher::vector_length(n) == 32 && 7286 n->as_ShiftV()->is_var_shift() && 7287 !VM_Version::supports_avx512bw()); 7288 match(Set dst ( LShiftVB src shift)); 7289 match(Set dst ( RShiftVB src shift)); 7290 match(Set dst (URShiftVB src shift)); 7291 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 7292 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 7293 ins_encode %{ 7294 assert(UseAVX >= 2, "required"); 7295 7296 int opcode = this->ideal_Opcode(); 7297 int vlen_enc = Assembler::AVX_128bit; 7298 // Process lower 128 bits and get result in dst 7299 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7300 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7301 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7302 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7303 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7304 7305 // Process higher 128 bits and get result in vtmp3 7306 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7307 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7308 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 7309 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 7310 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 7311 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7312 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 7313 7314 // Merge the two results in dst 7315 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7316 %} 7317 ins_pipe( pipe_slow ); 7318 %} 7319 7320 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 7321 predicate(Matcher::vector_length(n) <= 32 && 7322 n->as_ShiftV()->is_var_shift() && 7323 VM_Version::supports_avx512bw()); 7324 match(Set dst ( LShiftVB src shift)); 7325 match(Set dst ( RShiftVB src shift)); 7326 match(Set dst (URShiftVB src shift)); 7327 effect(TEMP dst, TEMP vtmp); 7328 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7329 ins_encode %{ 7330 assert(UseAVX > 2, "required"); 7331 7332 int opcode = this->ideal_Opcode(); 7333 int vlen_enc = vector_length_encoding(this); 7334 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7335 %} 7336 ins_pipe( pipe_slow ); 7337 %} 7338 7339 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7340 predicate(Matcher::vector_length(n) == 64 && 7341 n->as_ShiftV()->is_var_shift() && 7342 VM_Version::supports_avx512bw()); 7343 match(Set dst ( LShiftVB src shift)); 7344 match(Set dst ( RShiftVB src shift)); 7345 match(Set dst (URShiftVB src shift)); 7346 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7347 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7348 ins_encode %{ 7349 assert(UseAVX > 2, "required"); 7350 7351 int opcode = this->ideal_Opcode(); 7352 int vlen_enc = Assembler::AVX_256bit; 7353 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7354 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7355 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7356 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7357 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7358 %} 7359 ins_pipe( pipe_slow ); 7360 %} 7361 7362 // Short variable shift 7363 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7364 predicate(Matcher::vector_length(n) <= 8 && 7365 n->as_ShiftV()->is_var_shift() && 7366 !VM_Version::supports_avx512bw()); 7367 match(Set dst ( LShiftVS src shift)); 7368 match(Set dst ( RShiftVS src shift)); 7369 match(Set dst (URShiftVS src shift)); 7370 effect(TEMP dst, TEMP vtmp); 7371 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7372 ins_encode %{ 7373 assert(UseAVX >= 2, "required"); 7374 7375 int opcode = this->ideal_Opcode(); 7376 bool sign = (opcode != Op_URShiftVS); 7377 int vlen_enc = Assembler::AVX_256bit; 7378 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 7379 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 7380 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7381 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7382 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 7383 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7384 %} 7385 ins_pipe( pipe_slow ); 7386 %} 7387 7388 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7389 predicate(Matcher::vector_length(n) == 16 && 7390 n->as_ShiftV()->is_var_shift() && 7391 !VM_Version::supports_avx512bw()); 7392 match(Set dst ( LShiftVS src shift)); 7393 match(Set dst ( RShiftVS src shift)); 7394 match(Set dst (URShiftVS src shift)); 7395 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7396 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7397 ins_encode %{ 7398 assert(UseAVX >= 2, "required"); 7399 7400 int opcode = this->ideal_Opcode(); 7401 bool sign = (opcode != Op_URShiftVS); 7402 int vlen_enc = Assembler::AVX_256bit; 7403 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 7404 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7405 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7406 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7407 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7408 7409 // Shift upper half, with result in dst using vtmp1 as TEMP 7410 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 7411 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 7412 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7413 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7414 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7415 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7416 7417 // Merge lower and upper half result into dst 7418 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7419 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 7420 %} 7421 ins_pipe( pipe_slow ); 7422 %} 7423 7424 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 7425 predicate(n->as_ShiftV()->is_var_shift() && 7426 VM_Version::supports_avx512bw()); 7427 match(Set dst ( LShiftVS src shift)); 7428 match(Set dst ( RShiftVS src shift)); 7429 match(Set dst (URShiftVS src shift)); 7430 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 7431 ins_encode %{ 7432 assert(UseAVX > 2, "required"); 7433 7434 int opcode = this->ideal_Opcode(); 7435 int vlen_enc = vector_length_encoding(this); 7436 if (!VM_Version::supports_avx512vl()) { 7437 vlen_enc = Assembler::AVX_512bit; 7438 } 7439 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7440 %} 7441 ins_pipe( pipe_slow ); 7442 %} 7443 7444 //Integer variable shift 7445 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 7446 predicate(n->as_ShiftV()->is_var_shift()); 7447 match(Set dst ( LShiftVI src shift)); 7448 match(Set dst ( RShiftVI src shift)); 7449 match(Set dst (URShiftVI src shift)); 7450 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 7451 ins_encode %{ 7452 assert(UseAVX >= 2, "required"); 7453 7454 int opcode = this->ideal_Opcode(); 7455 int vlen_enc = vector_length_encoding(this); 7456 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7457 %} 7458 ins_pipe( pipe_slow ); 7459 %} 7460 7461 //Long variable shift 7462 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 7463 predicate(n->as_ShiftV()->is_var_shift()); 7464 match(Set dst ( LShiftVL src shift)); 7465 match(Set dst (URShiftVL src shift)); 7466 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 7467 ins_encode %{ 7468 assert(UseAVX >= 2, "required"); 7469 7470 int opcode = this->ideal_Opcode(); 7471 int vlen_enc = vector_length_encoding(this); 7472 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7473 %} 7474 ins_pipe( pipe_slow ); 7475 %} 7476 7477 //Long variable right shift arithmetic 7478 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 7479 predicate(Matcher::vector_length(n) <= 4 && 7480 n->as_ShiftV()->is_var_shift() && 7481 UseAVX == 2); 7482 match(Set dst (RShiftVL src shift)); 7483 effect(TEMP dst, TEMP vtmp); 7484 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 7485 ins_encode %{ 7486 int opcode = this->ideal_Opcode(); 7487 int vlen_enc = vector_length_encoding(this); 7488 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 7489 $vtmp$$XMMRegister); 7490 %} 7491 ins_pipe( pipe_slow ); 7492 %} 7493 7494 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 7495 predicate(n->as_ShiftV()->is_var_shift() && 7496 UseAVX > 2); 7497 match(Set dst (RShiftVL src shift)); 7498 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 7499 ins_encode %{ 7500 int opcode = this->ideal_Opcode(); 7501 int vlen_enc = vector_length_encoding(this); 7502 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7503 %} 7504 ins_pipe( pipe_slow ); 7505 %} 7506 7507 // --------------------------------- AND -------------------------------------- 7508 7509 instruct vand(vec dst, vec src) %{ 7510 predicate(UseAVX == 0); 7511 match(Set dst (AndV dst src)); 7512 format %{ "pand $dst,$src\t! and vectors" %} 7513 ins_encode %{ 7514 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7515 %} 7516 ins_pipe( pipe_slow ); 7517 %} 7518 7519 instruct vand_reg(vec dst, vec src1, vec src2) %{ 7520 predicate(UseAVX > 0); 7521 match(Set dst (AndV src1 src2)); 7522 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 7523 ins_encode %{ 7524 int vlen_enc = vector_length_encoding(this); 7525 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7526 %} 7527 ins_pipe( pipe_slow ); 7528 %} 7529 7530 instruct vand_mem(vec dst, vec src, memory mem) %{ 7531 predicate((UseAVX > 0) && 7532 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7533 match(Set dst (AndV src (LoadVector mem))); 7534 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 7535 ins_encode %{ 7536 int vlen_enc = vector_length_encoding(this); 7537 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7538 %} 7539 ins_pipe( pipe_slow ); 7540 %} 7541 7542 // --------------------------------- OR --------------------------------------- 7543 7544 instruct vor(vec dst, vec src) %{ 7545 predicate(UseAVX == 0); 7546 match(Set dst (OrV dst src)); 7547 format %{ "por $dst,$src\t! or vectors" %} 7548 ins_encode %{ 7549 __ por($dst$$XMMRegister, $src$$XMMRegister); 7550 %} 7551 ins_pipe( pipe_slow ); 7552 %} 7553 7554 instruct vor_reg(vec dst, vec src1, vec src2) %{ 7555 predicate(UseAVX > 0); 7556 match(Set dst (OrV src1 src2)); 7557 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7558 ins_encode %{ 7559 int vlen_enc = vector_length_encoding(this); 7560 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7561 %} 7562 ins_pipe( pipe_slow ); 7563 %} 7564 7565 instruct vor_mem(vec dst, vec src, memory mem) %{ 7566 predicate((UseAVX > 0) && 7567 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7568 match(Set dst (OrV src (LoadVector mem))); 7569 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7570 ins_encode %{ 7571 int vlen_enc = vector_length_encoding(this); 7572 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7573 %} 7574 ins_pipe( pipe_slow ); 7575 %} 7576 7577 // --------------------------------- XOR -------------------------------------- 7578 7579 instruct vxor(vec dst, vec src) %{ 7580 predicate(UseAVX == 0); 7581 match(Set dst (XorV dst src)); 7582 format %{ "pxor $dst,$src\t! xor vectors" %} 7583 ins_encode %{ 7584 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7585 %} 7586 ins_pipe( pipe_slow ); 7587 %} 7588 7589 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7590 predicate(UseAVX > 0); 7591 match(Set dst (XorV src1 src2)); 7592 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7593 ins_encode %{ 7594 int vlen_enc = vector_length_encoding(this); 7595 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7596 %} 7597 ins_pipe( pipe_slow ); 7598 %} 7599 7600 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7601 predicate((UseAVX > 0) && 7602 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7603 match(Set dst (XorV src (LoadVector mem))); 7604 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7605 ins_encode %{ 7606 int vlen_enc = vector_length_encoding(this); 7607 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7608 %} 7609 ins_pipe( pipe_slow ); 7610 %} 7611 7612 // --------------------------------- VectorCast -------------------------------------- 7613 7614 instruct vcastBtoX(vec dst, vec src) %{ 7615 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE); 7616 match(Set dst (VectorCastB2X src)); 7617 format %{ "vector_cast_b2x $dst,$src\t!" %} 7618 ins_encode %{ 7619 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7620 int vlen_enc = vector_length_encoding(this); 7621 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7622 %} 7623 ins_pipe( pipe_slow ); 7624 %} 7625 7626 instruct vcastBtoD(legVec dst, legVec src) %{ 7627 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE); 7628 match(Set dst (VectorCastB2X src)); 7629 format %{ "vector_cast_b2x $dst,$src\t!" %} 7630 ins_encode %{ 7631 int vlen_enc = vector_length_encoding(this); 7632 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7633 %} 7634 ins_pipe( pipe_slow ); 7635 %} 7636 7637 instruct castStoX(vec dst, vec src) %{ 7638 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7639 Matcher::vector_length(n->in(1)) <= 8 && // src 7640 Matcher::vector_element_basic_type(n) == T_BYTE); 7641 match(Set dst (VectorCastS2X src)); 7642 format %{ "vector_cast_s2x $dst,$src" %} 7643 ins_encode %{ 7644 assert(UseAVX > 0, "required"); 7645 7646 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7647 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7648 %} 7649 ins_pipe( pipe_slow ); 7650 %} 7651 7652 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7653 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7654 Matcher::vector_length(n->in(1)) == 16 && // src 7655 Matcher::vector_element_basic_type(n) == T_BYTE); 7656 effect(TEMP dst, TEMP vtmp); 7657 match(Set dst (VectorCastS2X src)); 7658 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7659 ins_encode %{ 7660 assert(UseAVX > 0, "required"); 7661 7662 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7663 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7664 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7665 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7666 %} 7667 ins_pipe( pipe_slow ); 7668 %} 7669 7670 instruct vcastStoX_evex(vec dst, vec src) %{ 7671 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7672 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7673 match(Set dst (VectorCastS2X src)); 7674 format %{ "vector_cast_s2x $dst,$src\t!" %} 7675 ins_encode %{ 7676 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7677 int src_vlen_enc = vector_length_encoding(this, $src); 7678 int vlen_enc = vector_length_encoding(this); 7679 switch (to_elem_bt) { 7680 case T_BYTE: 7681 if (!VM_Version::supports_avx512vl()) { 7682 vlen_enc = Assembler::AVX_512bit; 7683 } 7684 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7685 break; 7686 case T_INT: 7687 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7688 break; 7689 case T_FLOAT: 7690 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7691 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7692 break; 7693 case T_LONG: 7694 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7695 break; 7696 case T_DOUBLE: { 7697 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7698 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7699 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7700 break; 7701 } 7702 default: 7703 ShouldNotReachHere(); 7704 } 7705 %} 7706 ins_pipe( pipe_slow ); 7707 %} 7708 7709 instruct castItoX(vec dst, vec src) %{ 7710 predicate(UseAVX <= 2 && 7711 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7712 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7713 match(Set dst (VectorCastI2X src)); 7714 format %{ "vector_cast_i2x $dst,$src" %} 7715 ins_encode %{ 7716 assert(UseAVX > 0, "required"); 7717 7718 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7719 int vlen_enc = vector_length_encoding(this, $src); 7720 7721 if (to_elem_bt == T_BYTE) { 7722 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7723 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7724 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7725 } else { 7726 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7727 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7728 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7729 } 7730 %} 7731 ins_pipe( pipe_slow ); 7732 %} 7733 7734 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7735 predicate(UseAVX <= 2 && 7736 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7737 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7738 match(Set dst (VectorCastI2X src)); 7739 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7740 effect(TEMP dst, TEMP vtmp); 7741 ins_encode %{ 7742 assert(UseAVX > 0, "required"); 7743 7744 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7745 int vlen_enc = vector_length_encoding(this, $src); 7746 7747 if (to_elem_bt == T_BYTE) { 7748 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7749 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7750 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7751 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7752 } else { 7753 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7754 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7755 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7756 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7757 } 7758 %} 7759 ins_pipe( pipe_slow ); 7760 %} 7761 7762 instruct vcastItoX_evex(vec dst, vec src) %{ 7763 predicate(UseAVX > 2 || 7764 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7765 match(Set dst (VectorCastI2X src)); 7766 format %{ "vector_cast_i2x $dst,$src\t!" %} 7767 ins_encode %{ 7768 assert(UseAVX > 0, "required"); 7769 7770 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7771 int src_vlen_enc = vector_length_encoding(this, $src); 7772 int dst_vlen_enc = vector_length_encoding(this); 7773 switch (dst_elem_bt) { 7774 case T_BYTE: 7775 if (!VM_Version::supports_avx512vl()) { 7776 src_vlen_enc = Assembler::AVX_512bit; 7777 } 7778 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7779 break; 7780 case T_SHORT: 7781 if (!VM_Version::supports_avx512vl()) { 7782 src_vlen_enc = Assembler::AVX_512bit; 7783 } 7784 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7785 break; 7786 case T_FLOAT: 7787 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7788 break; 7789 case T_LONG: 7790 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7791 break; 7792 case T_DOUBLE: 7793 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7794 break; 7795 default: 7796 ShouldNotReachHere(); 7797 } 7798 %} 7799 ins_pipe( pipe_slow ); 7800 %} 7801 7802 instruct vcastLtoBS(vec dst, vec src) %{ 7803 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7804 UseAVX <= 2); 7805 match(Set dst (VectorCastL2X src)); 7806 format %{ "vector_cast_l2x $dst,$src" %} 7807 ins_encode %{ 7808 assert(UseAVX > 0, "required"); 7809 7810 int vlen = Matcher::vector_length_in_bytes(this, $src); 7811 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7812 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7813 : ExternalAddress(vector_int_to_short_mask()); 7814 if (vlen <= 16) { 7815 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7816 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7817 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7818 } else { 7819 assert(vlen <= 32, "required"); 7820 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7821 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7822 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7823 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7824 } 7825 if (to_elem_bt == T_BYTE) { 7826 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7827 } 7828 %} 7829 ins_pipe( pipe_slow ); 7830 %} 7831 7832 instruct vcastLtoX_evex(vec dst, vec src) %{ 7833 predicate(UseAVX > 2 || 7834 (Matcher::vector_element_basic_type(n) == T_INT || 7835 Matcher::vector_element_basic_type(n) == T_FLOAT || 7836 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7837 match(Set dst (VectorCastL2X src)); 7838 format %{ "vector_cast_l2x $dst,$src\t!" %} 7839 ins_encode %{ 7840 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7841 int vlen = Matcher::vector_length_in_bytes(this, $src); 7842 int vlen_enc = vector_length_encoding(this, $src); 7843 switch (to_elem_bt) { 7844 case T_BYTE: 7845 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7846 vlen_enc = Assembler::AVX_512bit; 7847 } 7848 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7849 break; 7850 case T_SHORT: 7851 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7852 vlen_enc = Assembler::AVX_512bit; 7853 } 7854 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7855 break; 7856 case T_INT: 7857 if (vlen == 8) { 7858 if ($dst$$XMMRegister != $src$$XMMRegister) { 7859 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7860 } 7861 } else if (vlen == 16) { 7862 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7863 } else if (vlen == 32) { 7864 if (UseAVX > 2) { 7865 if (!VM_Version::supports_avx512vl()) { 7866 vlen_enc = Assembler::AVX_512bit; 7867 } 7868 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7869 } else { 7870 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7871 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7872 } 7873 } else { // vlen == 64 7874 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7875 } 7876 break; 7877 case T_FLOAT: 7878 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7879 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7880 break; 7881 case T_DOUBLE: 7882 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7883 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7884 break; 7885 7886 default: assert(false, "%s", type2name(to_elem_bt)); 7887 } 7888 %} 7889 ins_pipe( pipe_slow ); 7890 %} 7891 7892 instruct vcastFtoD_reg(vec dst, vec src) %{ 7893 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7894 match(Set dst (VectorCastF2X src)); 7895 format %{ "vector_cast_f2d $dst,$src\t!" %} 7896 ins_encode %{ 7897 int vlen_enc = vector_length_encoding(this); 7898 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7899 %} 7900 ins_pipe( pipe_slow ); 7901 %} 7902 7903 7904 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7905 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7906 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7907 match(Set dst (VectorCastF2X src)); 7908 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7909 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7910 ins_encode %{ 7911 int vlen_enc = vector_length_encoding(this, $src); 7912 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7913 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7914 // 32 bit addresses for register indirect addressing mode since stub constants 7915 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7916 // However, targets are free to increase this limit, but having a large code cache size 7917 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7918 // cap we save a temporary register allocation which in limiting case can prevent 7919 // spilling in high register pressure blocks. 7920 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7921 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7922 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7923 %} 7924 ins_pipe( pipe_slow ); 7925 %} 7926 7927 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7928 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7929 is_integral_type(Matcher::vector_element_basic_type(n))); 7930 match(Set dst (VectorCastF2X src)); 7931 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7932 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7933 ins_encode %{ 7934 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7935 if (to_elem_bt == T_LONG) { 7936 int vlen_enc = vector_length_encoding(this); 7937 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7938 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7939 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7940 } else { 7941 int vlen_enc = vector_length_encoding(this, $src); 7942 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7943 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7944 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7945 } 7946 %} 7947 ins_pipe( pipe_slow ); 7948 %} 7949 7950 instruct vcastDtoF_reg(vec dst, vec src) %{ 7951 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7952 match(Set dst (VectorCastD2X src)); 7953 format %{ "vector_cast_d2x $dst,$src\t!" %} 7954 ins_encode %{ 7955 int vlen_enc = vector_length_encoding(this, $src); 7956 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7957 %} 7958 ins_pipe( pipe_slow ); 7959 %} 7960 7961 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7962 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7963 is_integral_type(Matcher::vector_element_basic_type(n))); 7964 match(Set dst (VectorCastD2X src)); 7965 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7966 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7967 ins_encode %{ 7968 int vlen_enc = vector_length_encoding(this, $src); 7969 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7970 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7971 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7972 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7973 %} 7974 ins_pipe( pipe_slow ); 7975 %} 7976 7977 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7978 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7979 is_integral_type(Matcher::vector_element_basic_type(n))); 7980 match(Set dst (VectorCastD2X src)); 7981 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7982 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7983 ins_encode %{ 7984 int vlen_enc = vector_length_encoding(this, $src); 7985 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7986 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7987 ExternalAddress(vector_float_signflip()); 7988 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7989 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7990 %} 7991 ins_pipe( pipe_slow ); 7992 %} 7993 7994 instruct vucast(vec dst, vec src) %{ 7995 match(Set dst (VectorUCastB2X src)); 7996 match(Set dst (VectorUCastS2X src)); 7997 match(Set dst (VectorUCastI2X src)); 7998 format %{ "vector_ucast $dst,$src\t!" %} 7999 ins_encode %{ 8000 assert(UseAVX > 0, "required"); 8001 8002 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 8003 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 8004 int vlen_enc = vector_length_encoding(this); 8005 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 8006 %} 8007 ins_pipe( pipe_slow ); 8008 %} 8009 8010 #ifdef _LP64 8011 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 8012 predicate(!VM_Version::supports_avx512vl() && 8013 Matcher::vector_length_in_bytes(n) < 64 && 8014 Matcher::vector_element_basic_type(n) == T_INT); 8015 match(Set dst (RoundVF src)); 8016 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 8017 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 8018 ins_encode %{ 8019 int vlen_enc = vector_length_encoding(this); 8020 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 8021 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 8022 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 8023 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 8024 %} 8025 ins_pipe( pipe_slow ); 8026 %} 8027 8028 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 8029 predicate((VM_Version::supports_avx512vl() || 8030 Matcher::vector_length_in_bytes(n) == 64) && 8031 Matcher::vector_element_basic_type(n) == T_INT); 8032 match(Set dst (RoundVF src)); 8033 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 8034 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 8035 ins_encode %{ 8036 int vlen_enc = vector_length_encoding(this); 8037 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 8038 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 8039 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 8040 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 8041 %} 8042 ins_pipe( pipe_slow ); 8043 %} 8044 8045 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 8046 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 8047 match(Set dst (RoundVD src)); 8048 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 8049 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 8050 ins_encode %{ 8051 int vlen_enc = vector_length_encoding(this); 8052 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 8053 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 8054 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 8055 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 8056 %} 8057 ins_pipe( pipe_slow ); 8058 %} 8059 8060 #endif // _LP64 8061 8062 // --------------------------------- VectorMaskCmp -------------------------------------- 8063 8064 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 8065 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8066 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 8067 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8068 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 8069 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8070 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 8071 ins_encode %{ 8072 int vlen_enc = vector_length_encoding(this, $src1); 8073 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 8074 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 8075 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8076 } else { 8077 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8078 } 8079 %} 8080 ins_pipe( pipe_slow ); 8081 %} 8082 8083 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 8084 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 8085 n->bottom_type()->isa_vectmask() == nullptr && 8086 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 8087 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8088 effect(TEMP ktmp); 8089 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 8090 ins_encode %{ 8091 int vlen_enc = Assembler::AVX_512bit; 8092 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 8093 KRegister mask = k0; // The comparison itself is not being masked. 8094 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 8095 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8096 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 8097 } else { 8098 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8099 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 8100 } 8101 %} 8102 ins_pipe( pipe_slow ); 8103 %} 8104 8105 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 8106 predicate(n->bottom_type()->isa_vectmask() && 8107 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 8108 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8109 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 8110 ins_encode %{ 8111 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 8112 int vlen_enc = vector_length_encoding(this, $src1); 8113 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 8114 KRegister mask = k0; // The comparison itself is not being masked. 8115 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 8116 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8117 } else { 8118 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8119 } 8120 %} 8121 ins_pipe( pipe_slow ); 8122 %} 8123 8124 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 8125 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8126 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8127 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8128 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8129 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 8130 (n->in(2)->get_int() == BoolTest::eq || 8131 n->in(2)->get_int() == BoolTest::lt || 8132 n->in(2)->get_int() == BoolTest::gt)); // cond 8133 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8134 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 8135 ins_encode %{ 8136 int vlen_enc = vector_length_encoding(this, $src1); 8137 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8138 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8139 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 8140 %} 8141 ins_pipe( pipe_slow ); 8142 %} 8143 8144 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 8145 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8146 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8147 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8148 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8149 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 8150 (n->in(2)->get_int() == BoolTest::ne || 8151 n->in(2)->get_int() == BoolTest::le || 8152 n->in(2)->get_int() == BoolTest::ge)); // cond 8153 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8154 effect(TEMP dst, TEMP xtmp); 8155 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 8156 ins_encode %{ 8157 int vlen_enc = vector_length_encoding(this, $src1); 8158 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8159 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8160 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 8161 %} 8162 ins_pipe( pipe_slow ); 8163 %} 8164 8165 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 8166 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8167 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8168 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8169 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8170 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8171 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8172 effect(TEMP dst, TEMP xtmp); 8173 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 8174 ins_encode %{ 8175 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 8176 int vlen_enc = vector_length_encoding(this, $src1); 8177 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8178 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8179 8180 if (vlen_enc == Assembler::AVX_128bit) { 8181 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 8182 } else { 8183 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 8184 } 8185 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8186 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8187 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 8188 %} 8189 ins_pipe( pipe_slow ); 8190 %} 8191 8192 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 8193 predicate((n->bottom_type()->isa_vectmask() == nullptr && 8194 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 8195 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8196 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8197 effect(TEMP ktmp); 8198 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 8199 ins_encode %{ 8200 assert(UseAVX > 2, "required"); 8201 8202 int vlen_enc = vector_length_encoding(this, $src1); 8203 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8204 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8205 KRegister mask = k0; // The comparison itself is not being masked. 8206 bool merge = false; 8207 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8208 8209 switch (src1_elem_bt) { 8210 case T_INT: { 8211 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8212 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8213 break; 8214 } 8215 case T_LONG: { 8216 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8217 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8218 break; 8219 } 8220 default: assert(false, "%s", type2name(src1_elem_bt)); 8221 } 8222 %} 8223 ins_pipe( pipe_slow ); 8224 %} 8225 8226 8227 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 8228 predicate(n->bottom_type()->isa_vectmask() && 8229 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8230 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8231 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 8232 ins_encode %{ 8233 assert(UseAVX > 2, "required"); 8234 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 8235 8236 int vlen_enc = vector_length_encoding(this, $src1); 8237 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8238 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8239 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8240 8241 // Comparison i 8242 switch (src1_elem_bt) { 8243 case T_BYTE: { 8244 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8245 break; 8246 } 8247 case T_SHORT: { 8248 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8249 break; 8250 } 8251 case T_INT: { 8252 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8253 break; 8254 } 8255 case T_LONG: { 8256 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8257 break; 8258 } 8259 default: assert(false, "%s", type2name(src1_elem_bt)); 8260 } 8261 %} 8262 ins_pipe( pipe_slow ); 8263 %} 8264 8265 // Extract 8266 8267 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 8268 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 8269 match(Set dst (ExtractI src idx)); 8270 match(Set dst (ExtractS src idx)); 8271 #ifdef _LP64 8272 match(Set dst (ExtractB src idx)); 8273 #endif 8274 format %{ "extractI $dst,$src,$idx\t!" %} 8275 ins_encode %{ 8276 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8277 8278 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8279 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8280 %} 8281 ins_pipe( pipe_slow ); 8282 %} 8283 8284 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 8285 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 8286 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 8287 match(Set dst (ExtractI src idx)); 8288 match(Set dst (ExtractS src idx)); 8289 #ifdef _LP64 8290 match(Set dst (ExtractB src idx)); 8291 #endif 8292 effect(TEMP vtmp); 8293 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 8294 ins_encode %{ 8295 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8296 8297 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8298 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8299 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 8300 %} 8301 ins_pipe( pipe_slow ); 8302 %} 8303 8304 #ifdef _LP64 8305 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 8306 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 8307 match(Set dst (ExtractL src idx)); 8308 format %{ "extractL $dst,$src,$idx\t!" %} 8309 ins_encode %{ 8310 assert(UseSSE >= 4, "required"); 8311 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8312 8313 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8314 %} 8315 ins_pipe( pipe_slow ); 8316 %} 8317 8318 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 8319 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8320 Matcher::vector_length(n->in(1)) == 8); // src 8321 match(Set dst (ExtractL src idx)); 8322 effect(TEMP vtmp); 8323 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 8324 ins_encode %{ 8325 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8326 8327 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8328 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 8329 %} 8330 ins_pipe( pipe_slow ); 8331 %} 8332 #endif 8333 8334 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8335 predicate(Matcher::vector_length(n->in(1)) <= 4); 8336 match(Set dst (ExtractF src idx)); 8337 effect(TEMP dst, TEMP vtmp); 8338 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8339 ins_encode %{ 8340 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8341 8342 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 8343 %} 8344 ins_pipe( pipe_slow ); 8345 %} 8346 8347 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8348 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 8349 Matcher::vector_length(n->in(1)/*src*/) == 16); 8350 match(Set dst (ExtractF src idx)); 8351 effect(TEMP vtmp); 8352 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8353 ins_encode %{ 8354 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8355 8356 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8357 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 8358 %} 8359 ins_pipe( pipe_slow ); 8360 %} 8361 8362 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 8363 predicate(Matcher::vector_length(n->in(1)) == 2); // src 8364 match(Set dst (ExtractD src idx)); 8365 format %{ "extractD $dst,$src,$idx\t!" %} 8366 ins_encode %{ 8367 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8368 8369 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8370 %} 8371 ins_pipe( pipe_slow ); 8372 %} 8373 8374 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 8375 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8376 Matcher::vector_length(n->in(1)) == 8); // src 8377 match(Set dst (ExtractD src idx)); 8378 effect(TEMP vtmp); 8379 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 8380 ins_encode %{ 8381 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8382 8383 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8384 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 8385 %} 8386 ins_pipe( pipe_slow ); 8387 %} 8388 8389 // --------------------------------- Vector Blend -------------------------------------- 8390 8391 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 8392 predicate(UseAVX == 0); 8393 match(Set dst (VectorBlend (Binary dst src) mask)); 8394 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 8395 effect(TEMP tmp); 8396 ins_encode %{ 8397 assert(UseSSE >= 4, "required"); 8398 8399 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 8400 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 8401 } 8402 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 8403 %} 8404 ins_pipe( pipe_slow ); 8405 %} 8406 8407 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8408 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8409 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8410 Matcher::vector_length_in_bytes(n) <= 32 && 8411 is_integral_type(Matcher::vector_element_basic_type(n))); 8412 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8413 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8414 ins_encode %{ 8415 int vlen_enc = vector_length_encoding(this); 8416 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8417 %} 8418 ins_pipe( pipe_slow ); 8419 %} 8420 8421 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8422 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8423 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8424 Matcher::vector_length_in_bytes(n) <= 32 && 8425 !is_integral_type(Matcher::vector_element_basic_type(n))); 8426 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8427 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8428 ins_encode %{ 8429 int vlen_enc = vector_length_encoding(this); 8430 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8431 %} 8432 ins_pipe( pipe_slow ); 8433 %} 8434 8435 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ 8436 predicate(UseAVX > 0 && EnableX86ECoreOpts && 8437 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8438 Matcher::vector_length_in_bytes(n) <= 32); 8439 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8440 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} 8441 effect(TEMP vtmp, TEMP dst); 8442 ins_encode %{ 8443 int vlen_enc = vector_length_encoding(this); 8444 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8445 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8446 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8447 %} 8448 ins_pipe( pipe_slow ); 8449 %} 8450 8451 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 8452 predicate(Matcher::vector_length_in_bytes(n) == 64 && 8453 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 8454 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8455 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8456 effect(TEMP ktmp); 8457 ins_encode %{ 8458 int vlen_enc = Assembler::AVX_512bit; 8459 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8460 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 8461 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8462 %} 8463 ins_pipe( pipe_slow ); 8464 %} 8465 8466 8467 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 8468 predicate(n->in(2)->bottom_type()->isa_vectmask() && 8469 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 8470 VM_Version::supports_avx512bw())); 8471 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8472 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8473 ins_encode %{ 8474 int vlen_enc = vector_length_encoding(this); 8475 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8476 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8477 %} 8478 ins_pipe( pipe_slow ); 8479 %} 8480 8481 // --------------------------------- ABS -------------------------------------- 8482 // a = |a| 8483 instruct vabsB_reg(vec dst, vec src) %{ 8484 match(Set dst (AbsVB src)); 8485 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 8486 ins_encode %{ 8487 uint vlen = Matcher::vector_length(this); 8488 if (vlen <= 16) { 8489 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8490 } else { 8491 int vlen_enc = vector_length_encoding(this); 8492 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8493 } 8494 %} 8495 ins_pipe( pipe_slow ); 8496 %} 8497 8498 instruct vabsS_reg(vec dst, vec src) %{ 8499 match(Set dst (AbsVS src)); 8500 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 8501 ins_encode %{ 8502 uint vlen = Matcher::vector_length(this); 8503 if (vlen <= 8) { 8504 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8505 } else { 8506 int vlen_enc = vector_length_encoding(this); 8507 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8508 } 8509 %} 8510 ins_pipe( pipe_slow ); 8511 %} 8512 8513 instruct vabsI_reg(vec dst, vec src) %{ 8514 match(Set dst (AbsVI src)); 8515 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 8516 ins_encode %{ 8517 uint vlen = Matcher::vector_length(this); 8518 if (vlen <= 4) { 8519 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8520 } else { 8521 int vlen_enc = vector_length_encoding(this); 8522 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8523 } 8524 %} 8525 ins_pipe( pipe_slow ); 8526 %} 8527 8528 instruct vabsL_reg(vec dst, vec src) %{ 8529 match(Set dst (AbsVL src)); 8530 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 8531 ins_encode %{ 8532 assert(UseAVX > 2, "required"); 8533 int vlen_enc = vector_length_encoding(this); 8534 if (!VM_Version::supports_avx512vl()) { 8535 vlen_enc = Assembler::AVX_512bit; 8536 } 8537 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8538 %} 8539 ins_pipe( pipe_slow ); 8540 %} 8541 8542 // --------------------------------- ABSNEG -------------------------------------- 8543 8544 instruct vabsnegF(vec dst, vec src) %{ 8545 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 8546 match(Set dst (AbsVF src)); 8547 match(Set dst (NegVF src)); 8548 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 8549 ins_cost(150); 8550 ins_encode %{ 8551 int opcode = this->ideal_Opcode(); 8552 int vlen = Matcher::vector_length(this); 8553 if (vlen == 2) { 8554 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8555 } else { 8556 assert(vlen == 8 || vlen == 16, "required"); 8557 int vlen_enc = vector_length_encoding(this); 8558 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8559 } 8560 %} 8561 ins_pipe( pipe_slow ); 8562 %} 8563 8564 instruct vabsneg4F(vec dst) %{ 8565 predicate(Matcher::vector_length(n) == 4); 8566 match(Set dst (AbsVF dst)); 8567 match(Set dst (NegVF dst)); 8568 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 8569 ins_cost(150); 8570 ins_encode %{ 8571 int opcode = this->ideal_Opcode(); 8572 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 8573 %} 8574 ins_pipe( pipe_slow ); 8575 %} 8576 8577 instruct vabsnegD(vec dst, vec src) %{ 8578 match(Set dst (AbsVD src)); 8579 match(Set dst (NegVD src)); 8580 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 8581 ins_encode %{ 8582 int opcode = this->ideal_Opcode(); 8583 uint vlen = Matcher::vector_length(this); 8584 if (vlen == 2) { 8585 assert(UseSSE >= 2, "required"); 8586 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8587 } else { 8588 int vlen_enc = vector_length_encoding(this); 8589 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8590 } 8591 %} 8592 ins_pipe( pipe_slow ); 8593 %} 8594 8595 //------------------------------------- VectorTest -------------------------------------------- 8596 8597 #ifdef _LP64 8598 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8599 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8600 match(Set cr (VectorTest src1 src2)); 8601 effect(TEMP vtmp); 8602 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8603 ins_encode %{ 8604 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8605 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8606 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8607 %} 8608 ins_pipe( pipe_slow ); 8609 %} 8610 8611 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8612 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8613 match(Set cr (VectorTest src1 src2)); 8614 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8615 ins_encode %{ 8616 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8617 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8618 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8619 %} 8620 ins_pipe( pipe_slow ); 8621 %} 8622 8623 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8624 predicate((Matcher::vector_length(n->in(1)) < 8 || 8625 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8626 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8627 match(Set cr (VectorTest src1 src2)); 8628 effect(TEMP tmp); 8629 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8630 ins_encode %{ 8631 uint masklen = Matcher::vector_length(this, $src1); 8632 __ kmovwl($tmp$$Register, $src1$$KRegister); 8633 __ andl($tmp$$Register, (1 << masklen) - 1); 8634 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8635 %} 8636 ins_pipe( pipe_slow ); 8637 %} 8638 8639 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8640 predicate((Matcher::vector_length(n->in(1)) < 8 || 8641 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8642 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8643 match(Set cr (VectorTest src1 src2)); 8644 effect(TEMP tmp); 8645 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8646 ins_encode %{ 8647 uint masklen = Matcher::vector_length(this, $src1); 8648 __ kmovwl($tmp$$Register, $src1$$KRegister); 8649 __ andl($tmp$$Register, (1 << masklen) - 1); 8650 %} 8651 ins_pipe( pipe_slow ); 8652 %} 8653 8654 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8655 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8656 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8657 match(Set cr (VectorTest src1 src2)); 8658 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8659 ins_encode %{ 8660 uint masklen = Matcher::vector_length(this, $src1); 8661 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8662 %} 8663 ins_pipe( pipe_slow ); 8664 %} 8665 #endif 8666 8667 //------------------------------------- LoadMask -------------------------------------------- 8668 8669 instruct loadMask(legVec dst, legVec src) %{ 8670 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8671 match(Set dst (VectorLoadMask src)); 8672 effect(TEMP dst); 8673 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8674 ins_encode %{ 8675 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8676 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8677 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8678 %} 8679 ins_pipe( pipe_slow ); 8680 %} 8681 8682 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8683 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8684 match(Set dst (VectorLoadMask src)); 8685 effect(TEMP xtmp); 8686 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8687 ins_encode %{ 8688 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8689 true, Assembler::AVX_512bit); 8690 %} 8691 ins_pipe( pipe_slow ); 8692 %} 8693 8694 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8695 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8696 match(Set dst (VectorLoadMask src)); 8697 effect(TEMP xtmp); 8698 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8699 ins_encode %{ 8700 int vlen_enc = vector_length_encoding(in(1)); 8701 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8702 false, vlen_enc); 8703 %} 8704 ins_pipe( pipe_slow ); 8705 %} 8706 8707 //------------------------------------- StoreMask -------------------------------------------- 8708 8709 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8710 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8711 match(Set dst (VectorStoreMask src size)); 8712 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8713 ins_encode %{ 8714 int vlen = Matcher::vector_length(this); 8715 if (vlen <= 16 && UseAVX <= 2) { 8716 assert(UseSSE >= 3, "required"); 8717 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8718 } else { 8719 assert(UseAVX > 0, "required"); 8720 int src_vlen_enc = vector_length_encoding(this, $src); 8721 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8722 } 8723 %} 8724 ins_pipe( pipe_slow ); 8725 %} 8726 8727 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8728 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8729 match(Set dst (VectorStoreMask src size)); 8730 effect(TEMP_DEF dst, TEMP xtmp); 8731 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8732 ins_encode %{ 8733 int vlen_enc = Assembler::AVX_128bit; 8734 int vlen = Matcher::vector_length(this); 8735 if (vlen <= 8) { 8736 assert(UseSSE >= 3, "required"); 8737 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8738 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8739 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8740 } else { 8741 assert(UseAVX > 0, "required"); 8742 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8743 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8744 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8745 } 8746 %} 8747 ins_pipe( pipe_slow ); 8748 %} 8749 8750 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8751 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8752 match(Set dst (VectorStoreMask src size)); 8753 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8754 effect(TEMP_DEF dst, TEMP xtmp); 8755 ins_encode %{ 8756 int vlen_enc = Assembler::AVX_128bit; 8757 int vlen = Matcher::vector_length(this); 8758 if (vlen <= 4) { 8759 assert(UseSSE >= 3, "required"); 8760 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8761 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8762 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8763 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8764 } else { 8765 assert(UseAVX > 0, "required"); 8766 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8767 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8768 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8769 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8770 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8771 } 8772 %} 8773 ins_pipe( pipe_slow ); 8774 %} 8775 8776 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8777 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8778 match(Set dst (VectorStoreMask src size)); 8779 effect(TEMP_DEF dst, TEMP xtmp); 8780 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8781 ins_encode %{ 8782 assert(UseSSE >= 3, "required"); 8783 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8784 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8785 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8786 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8787 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8788 %} 8789 ins_pipe( pipe_slow ); 8790 %} 8791 8792 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8793 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8794 match(Set dst (VectorStoreMask src size)); 8795 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8796 effect(TEMP_DEF dst, TEMP vtmp); 8797 ins_encode %{ 8798 int vlen_enc = Assembler::AVX_128bit; 8799 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8800 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8801 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8802 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8803 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8804 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8805 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8806 %} 8807 ins_pipe( pipe_slow ); 8808 %} 8809 8810 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8811 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8812 match(Set dst (VectorStoreMask src size)); 8813 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8814 ins_encode %{ 8815 int src_vlen_enc = vector_length_encoding(this, $src); 8816 int dst_vlen_enc = vector_length_encoding(this); 8817 if (!VM_Version::supports_avx512vl()) { 8818 src_vlen_enc = Assembler::AVX_512bit; 8819 } 8820 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8821 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8822 %} 8823 ins_pipe( pipe_slow ); 8824 %} 8825 8826 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8827 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8828 match(Set dst (VectorStoreMask src size)); 8829 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8830 ins_encode %{ 8831 int src_vlen_enc = vector_length_encoding(this, $src); 8832 int dst_vlen_enc = vector_length_encoding(this); 8833 if (!VM_Version::supports_avx512vl()) { 8834 src_vlen_enc = Assembler::AVX_512bit; 8835 } 8836 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8837 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8838 %} 8839 ins_pipe( pipe_slow ); 8840 %} 8841 8842 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8843 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8844 match(Set dst (VectorStoreMask mask size)); 8845 effect(TEMP_DEF dst); 8846 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8847 ins_encode %{ 8848 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8849 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8850 false, Assembler::AVX_512bit, noreg); 8851 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8852 %} 8853 ins_pipe( pipe_slow ); 8854 %} 8855 8856 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8857 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8858 match(Set dst (VectorStoreMask mask size)); 8859 effect(TEMP_DEF dst); 8860 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8861 ins_encode %{ 8862 int dst_vlen_enc = vector_length_encoding(this); 8863 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8864 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8865 %} 8866 ins_pipe( pipe_slow ); 8867 %} 8868 8869 instruct vmaskcast_evex(kReg dst) %{ 8870 match(Set dst (VectorMaskCast dst)); 8871 ins_cost(0); 8872 format %{ "vector_mask_cast $dst" %} 8873 ins_encode %{ 8874 // empty 8875 %} 8876 ins_pipe(empty); 8877 %} 8878 8879 instruct vmaskcast(vec dst) %{ 8880 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8881 match(Set dst (VectorMaskCast dst)); 8882 ins_cost(0); 8883 format %{ "vector_mask_cast $dst" %} 8884 ins_encode %{ 8885 // empty 8886 %} 8887 ins_pipe(empty); 8888 %} 8889 8890 instruct vmaskcast_avx(vec dst, vec src) %{ 8891 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8892 match(Set dst (VectorMaskCast src)); 8893 format %{ "vector_mask_cast $dst, $src" %} 8894 ins_encode %{ 8895 int vlen = Matcher::vector_length(this); 8896 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8897 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8898 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8899 %} 8900 ins_pipe(pipe_slow); 8901 %} 8902 8903 //-------------------------------- Load Iota Indices ---------------------------------- 8904 8905 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8906 match(Set dst (VectorLoadConst src)); 8907 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8908 ins_encode %{ 8909 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8910 BasicType bt = Matcher::vector_element_basic_type(this); 8911 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8912 %} 8913 ins_pipe( pipe_slow ); 8914 %} 8915 8916 #ifdef _LP64 8917 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8918 match(Set dst (PopulateIndex src1 src2)); 8919 effect(TEMP dst, TEMP vtmp); 8920 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8921 ins_encode %{ 8922 assert($src2$$constant == 1, "required"); 8923 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8924 int vlen_enc = vector_length_encoding(this); 8925 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8926 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8927 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8928 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8929 %} 8930 ins_pipe( pipe_slow ); 8931 %} 8932 8933 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8934 match(Set dst (PopulateIndex src1 src2)); 8935 effect(TEMP dst, TEMP vtmp); 8936 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8937 ins_encode %{ 8938 assert($src2$$constant == 1, "required"); 8939 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8940 int vlen_enc = vector_length_encoding(this); 8941 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8942 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8943 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8944 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8945 %} 8946 ins_pipe( pipe_slow ); 8947 %} 8948 #endif 8949 //-------------------------------- Rearrange ---------------------------------- 8950 8951 // LoadShuffle/Rearrange for Byte 8952 instruct rearrangeB(vec dst, vec shuffle) %{ 8953 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8954 Matcher::vector_length(n) < 32); 8955 match(Set dst (VectorRearrange dst shuffle)); 8956 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8957 ins_encode %{ 8958 assert(UseSSE >= 4, "required"); 8959 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8960 %} 8961 ins_pipe( pipe_slow ); 8962 %} 8963 8964 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8965 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8966 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8967 match(Set dst (VectorRearrange src shuffle)); 8968 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8969 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8970 ins_encode %{ 8971 assert(UseAVX >= 2, "required"); 8972 // Swap src into vtmp1 8973 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8974 // Shuffle swapped src to get entries from other 128 bit lane 8975 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8976 // Shuffle original src to get entries from self 128 bit lane 8977 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8978 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8979 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8980 // Perform the blend 8981 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8982 %} 8983 ins_pipe( pipe_slow ); 8984 %} 8985 8986 8987 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8988 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8989 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8990 match(Set dst (VectorRearrange src shuffle)); 8991 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8992 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8993 ins_encode %{ 8994 int vlen_enc = vector_length_encoding(this); 8995 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8996 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8997 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8998 %} 8999 ins_pipe( pipe_slow ); 9000 %} 9001 9002 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 9003 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 9004 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 9005 match(Set dst (VectorRearrange src shuffle)); 9006 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9007 ins_encode %{ 9008 int vlen_enc = vector_length_encoding(this); 9009 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9010 %} 9011 ins_pipe( pipe_slow ); 9012 %} 9013 9014 // LoadShuffle/Rearrange for Short 9015 9016 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 9017 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 9018 !VM_Version::supports_avx512bw()); 9019 match(Set dst (VectorLoadShuffle src)); 9020 effect(TEMP dst, TEMP vtmp); 9021 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 9022 ins_encode %{ 9023 // Create a byte shuffle mask from short shuffle mask 9024 // only byte shuffle instruction available on these platforms 9025 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 9026 if (UseAVX == 0) { 9027 assert(vlen_in_bytes <= 16, "required"); 9028 // Multiply each shuffle by two to get byte index 9029 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 9030 __ psllw($vtmp$$XMMRegister, 1); 9031 9032 // Duplicate to create 2 copies of byte index 9033 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 9034 __ psllw($dst$$XMMRegister, 8); 9035 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 9036 9037 // Add one to get alternate byte index 9038 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 9039 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 9040 } else { 9041 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 9042 int vlen_enc = vector_length_encoding(this); 9043 // Multiply each shuffle by two to get byte index 9044 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 9045 9046 // Duplicate to create 2 copies of byte index 9047 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 9048 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 9049 9050 // Add one to get alternate byte index 9051 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 9052 } 9053 %} 9054 ins_pipe( pipe_slow ); 9055 %} 9056 9057 instruct rearrangeS(vec dst, vec shuffle) %{ 9058 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 9059 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 9060 match(Set dst (VectorRearrange dst shuffle)); 9061 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 9062 ins_encode %{ 9063 assert(UseSSE >= 4, "required"); 9064 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 9065 %} 9066 ins_pipe( pipe_slow ); 9067 %} 9068 9069 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 9070 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 9071 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 9072 match(Set dst (VectorRearrange src shuffle)); 9073 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 9074 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 9075 ins_encode %{ 9076 assert(UseAVX >= 2, "required"); 9077 // Swap src into vtmp1 9078 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 9079 // Shuffle swapped src to get entries from other 128 bit lane 9080 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 9081 // Shuffle original src to get entries from self 128 bit lane 9082 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 9083 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 9084 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 9085 // Perform the blend 9086 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 9087 %} 9088 ins_pipe( pipe_slow ); 9089 %} 9090 9091 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 9092 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 9093 VM_Version::supports_avx512bw()); 9094 match(Set dst (VectorRearrange src shuffle)); 9095 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9096 ins_encode %{ 9097 int vlen_enc = vector_length_encoding(this); 9098 if (!VM_Version::supports_avx512vl()) { 9099 vlen_enc = Assembler::AVX_512bit; 9100 } 9101 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9102 %} 9103 ins_pipe( pipe_slow ); 9104 %} 9105 9106 // LoadShuffle/Rearrange for Integer and Float 9107 9108 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 9109 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9110 Matcher::vector_length(n) == 4 && UseAVX == 0); 9111 match(Set dst (VectorLoadShuffle src)); 9112 effect(TEMP dst, TEMP vtmp); 9113 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 9114 ins_encode %{ 9115 assert(UseSSE >= 4, "required"); 9116 9117 // Create a byte shuffle mask from int shuffle mask 9118 // only byte shuffle instruction available on these platforms 9119 9120 // Duplicate and multiply each shuffle by 4 9121 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 9122 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 9123 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 9124 __ psllw($vtmp$$XMMRegister, 2); 9125 9126 // Duplicate again to create 4 copies of byte index 9127 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 9128 __ psllw($dst$$XMMRegister, 8); 9129 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 9130 9131 // Add 3,2,1,0 to get alternate byte index 9132 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 9133 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 9134 %} 9135 ins_pipe( pipe_slow ); 9136 %} 9137 9138 instruct rearrangeI(vec dst, vec shuffle) %{ 9139 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9140 UseAVX == 0); 9141 match(Set dst (VectorRearrange dst shuffle)); 9142 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 9143 ins_encode %{ 9144 assert(UseSSE >= 4, "required"); 9145 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 9146 %} 9147 ins_pipe( pipe_slow ); 9148 %} 9149 9150 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 9151 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9152 UseAVX > 0); 9153 match(Set dst (VectorRearrange src shuffle)); 9154 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9155 ins_encode %{ 9156 int vlen_enc = vector_length_encoding(this); 9157 BasicType bt = Matcher::vector_element_basic_type(this); 9158 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9159 %} 9160 ins_pipe( pipe_slow ); 9161 %} 9162 9163 // LoadShuffle/Rearrange for Long and Double 9164 9165 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 9166 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9167 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 9168 match(Set dst (VectorLoadShuffle src)); 9169 effect(TEMP dst, TEMP vtmp); 9170 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 9171 ins_encode %{ 9172 assert(UseAVX >= 2, "required"); 9173 9174 int vlen_enc = vector_length_encoding(this); 9175 // Create a double word shuffle mask from long shuffle mask 9176 // only double word shuffle instruction available on these platforms 9177 9178 // Multiply each shuffle by two to get double word index 9179 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 9180 9181 // Duplicate each double word shuffle 9182 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 9183 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 9184 9185 // Add one to get alternate double word index 9186 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 9187 %} 9188 ins_pipe( pipe_slow ); 9189 %} 9190 9191 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 9192 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9193 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 9194 match(Set dst (VectorRearrange src shuffle)); 9195 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9196 ins_encode %{ 9197 assert(UseAVX >= 2, "required"); 9198 9199 int vlen_enc = vector_length_encoding(this); 9200 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9201 %} 9202 ins_pipe( pipe_slow ); 9203 %} 9204 9205 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 9206 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9207 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 9208 match(Set dst (VectorRearrange src shuffle)); 9209 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9210 ins_encode %{ 9211 assert(UseAVX > 2, "required"); 9212 9213 int vlen_enc = vector_length_encoding(this); 9214 if (vlen_enc == Assembler::AVX_128bit) { 9215 vlen_enc = Assembler::AVX_256bit; 9216 } 9217 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9218 %} 9219 ins_pipe( pipe_slow ); 9220 %} 9221 9222 // --------------------------------- FMA -------------------------------------- 9223 // a * b + c 9224 9225 instruct vfmaF_reg(vec a, vec b, vec c) %{ 9226 match(Set c (FmaVF c (Binary a b))); 9227 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9228 ins_cost(150); 9229 ins_encode %{ 9230 assert(UseFMA, "not enabled"); 9231 int vlen_enc = vector_length_encoding(this); 9232 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9233 %} 9234 ins_pipe( pipe_slow ); 9235 %} 9236 9237 instruct vfmaF_mem(vec a, memory b, vec c) %{ 9238 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9239 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9240 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9241 ins_cost(150); 9242 ins_encode %{ 9243 assert(UseFMA, "not enabled"); 9244 int vlen_enc = vector_length_encoding(this); 9245 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9246 %} 9247 ins_pipe( pipe_slow ); 9248 %} 9249 9250 instruct vfmaD_reg(vec a, vec b, vec c) %{ 9251 match(Set c (FmaVD c (Binary a b))); 9252 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9253 ins_cost(150); 9254 ins_encode %{ 9255 assert(UseFMA, "not enabled"); 9256 int vlen_enc = vector_length_encoding(this); 9257 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9258 %} 9259 ins_pipe( pipe_slow ); 9260 %} 9261 9262 instruct vfmaD_mem(vec a, memory b, vec c) %{ 9263 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9264 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9265 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9266 ins_cost(150); 9267 ins_encode %{ 9268 assert(UseFMA, "not enabled"); 9269 int vlen_enc = vector_length_encoding(this); 9270 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9271 %} 9272 ins_pipe( pipe_slow ); 9273 %} 9274 9275 // --------------------------------- Vector Multiply Add -------------------------------------- 9276 9277 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 9278 predicate(UseAVX == 0); 9279 match(Set dst (MulAddVS2VI dst src1)); 9280 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 9281 ins_encode %{ 9282 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9283 %} 9284 ins_pipe( pipe_slow ); 9285 %} 9286 9287 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 9288 predicate(UseAVX > 0); 9289 match(Set dst (MulAddVS2VI src1 src2)); 9290 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 9291 ins_encode %{ 9292 int vlen_enc = vector_length_encoding(this); 9293 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9294 %} 9295 ins_pipe( pipe_slow ); 9296 %} 9297 9298 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9299 9300 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 9301 predicate(VM_Version::supports_avx512_vnni()); 9302 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9303 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 9304 ins_encode %{ 9305 assert(UseAVX > 2, "required"); 9306 int vlen_enc = vector_length_encoding(this); 9307 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9308 %} 9309 ins_pipe( pipe_slow ); 9310 ins_cost(10); 9311 %} 9312 9313 // --------------------------------- PopCount -------------------------------------- 9314 9315 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 9316 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9317 match(Set dst (PopCountVI src)); 9318 match(Set dst (PopCountVL src)); 9319 format %{ "vector_popcount_integral $dst, $src" %} 9320 ins_encode %{ 9321 int opcode = this->ideal_Opcode(); 9322 int vlen_enc = vector_length_encoding(this, $src); 9323 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9324 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 9325 %} 9326 ins_pipe( pipe_slow ); 9327 %} 9328 9329 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9330 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9331 match(Set dst (PopCountVI src mask)); 9332 match(Set dst (PopCountVL src mask)); 9333 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 9334 ins_encode %{ 9335 int vlen_enc = vector_length_encoding(this, $src); 9336 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9337 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9338 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 9339 %} 9340 ins_pipe( pipe_slow ); 9341 %} 9342 9343 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 9344 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9345 match(Set dst (PopCountVI src)); 9346 match(Set dst (PopCountVL src)); 9347 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9348 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 9349 ins_encode %{ 9350 int opcode = this->ideal_Opcode(); 9351 int vlen_enc = vector_length_encoding(this, $src); 9352 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9353 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9354 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 9355 %} 9356 ins_pipe( pipe_slow ); 9357 %} 9358 9359 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 9360 9361 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 9362 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9363 Matcher::vector_length_in_bytes(n->in(1)))); 9364 match(Set dst (CountTrailingZerosV src)); 9365 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 9366 ins_cost(400); 9367 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 9368 ins_encode %{ 9369 int vlen_enc = vector_length_encoding(this, $src); 9370 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9371 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9372 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9373 %} 9374 ins_pipe( pipe_slow ); 9375 %} 9376 9377 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9378 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9379 VM_Version::supports_avx512cd() && 9380 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9381 match(Set dst (CountTrailingZerosV src)); 9382 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9383 ins_cost(400); 9384 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 9385 ins_encode %{ 9386 int vlen_enc = vector_length_encoding(this, $src); 9387 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9388 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9389 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9390 %} 9391 ins_pipe( pipe_slow ); 9392 %} 9393 9394 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 9395 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9396 match(Set dst (CountTrailingZerosV src)); 9397 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 9398 ins_cost(400); 9399 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 9400 ins_encode %{ 9401 int vlen_enc = vector_length_encoding(this, $src); 9402 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9403 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9404 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 9405 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 9406 %} 9407 ins_pipe( pipe_slow ); 9408 %} 9409 9410 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9411 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9412 match(Set dst (CountTrailingZerosV src)); 9413 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9414 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9415 ins_encode %{ 9416 int vlen_enc = vector_length_encoding(this, $src); 9417 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9418 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9419 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9420 %} 9421 ins_pipe( pipe_slow ); 9422 %} 9423 9424 9425 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 9426 9427 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 9428 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 9429 effect(TEMP dst); 9430 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9431 ins_encode %{ 9432 int vector_len = vector_length_encoding(this); 9433 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 9434 %} 9435 ins_pipe( pipe_slow ); 9436 %} 9437 9438 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 9439 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 9440 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 9441 effect(TEMP dst); 9442 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9443 ins_encode %{ 9444 int vector_len = vector_length_encoding(this); 9445 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 9446 %} 9447 ins_pipe( pipe_slow ); 9448 %} 9449 9450 // --------------------------------- Rotation Operations ---------------------------------- 9451 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 9452 match(Set dst (RotateLeftV src shift)); 9453 match(Set dst (RotateRightV src shift)); 9454 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 9455 ins_encode %{ 9456 int opcode = this->ideal_Opcode(); 9457 int vector_len = vector_length_encoding(this); 9458 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9459 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 9460 %} 9461 ins_pipe( pipe_slow ); 9462 %} 9463 9464 instruct vprorate(vec dst, vec src, vec shift) %{ 9465 match(Set dst (RotateLeftV src shift)); 9466 match(Set dst (RotateRightV src shift)); 9467 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 9468 ins_encode %{ 9469 int opcode = this->ideal_Opcode(); 9470 int vector_len = vector_length_encoding(this); 9471 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9472 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9473 %} 9474 ins_pipe( pipe_slow ); 9475 %} 9476 9477 // ---------------------------------- Masked Operations ------------------------------------ 9478 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 9479 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 9480 match(Set dst (LoadVectorMasked mem mask)); 9481 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9482 ins_encode %{ 9483 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9484 int vlen_enc = vector_length_encoding(this); 9485 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 9486 %} 9487 ins_pipe( pipe_slow ); 9488 %} 9489 9490 9491 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 9492 predicate(n->in(3)->bottom_type()->isa_vectmask()); 9493 match(Set dst (LoadVectorMasked mem mask)); 9494 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9495 ins_encode %{ 9496 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9497 int vector_len = vector_length_encoding(this); 9498 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 9499 %} 9500 ins_pipe( pipe_slow ); 9501 %} 9502 9503 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 9504 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9505 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9506 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9507 ins_encode %{ 9508 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9509 int vlen_enc = vector_length_encoding(src_node); 9510 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9511 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 9512 %} 9513 ins_pipe( pipe_slow ); 9514 %} 9515 9516 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 9517 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9518 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9519 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9520 ins_encode %{ 9521 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9522 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9523 int vlen_enc = vector_length_encoding(src_node); 9524 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 9525 %} 9526 ins_pipe( pipe_slow ); 9527 %} 9528 9529 #ifdef _LP64 9530 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{ 9531 match(Set addr (VerifyVectorAlignment addr mask)); 9532 effect(KILL cr); 9533 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %} 9534 ins_encode %{ 9535 Label Lskip; 9536 // check if masked bits of addr are zero 9537 __ testq($addr$$Register, $mask$$constant); 9538 __ jccb(Assembler::equal, Lskip); 9539 __ stop("verify_vector_alignment found a misaligned vector memory access"); 9540 __ bind(Lskip); 9541 %} 9542 ins_pipe(pipe_slow); 9543 %} 9544 9545 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9546 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9547 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9548 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9549 ins_encode %{ 9550 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9551 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9552 9553 Label DONE; 9554 int vlen_enc = vector_length_encoding(this, $src1); 9555 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9556 9557 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9558 __ mov64($dst$$Register, -1L); 9559 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9560 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9561 __ jccb(Assembler::carrySet, DONE); 9562 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9563 __ notq($dst$$Register); 9564 __ tzcntq($dst$$Register, $dst$$Register); 9565 __ bind(DONE); 9566 %} 9567 ins_pipe( pipe_slow ); 9568 %} 9569 9570 9571 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9572 match(Set dst (VectorMaskGen len)); 9573 effect(TEMP temp, KILL cr); 9574 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9575 ins_encode %{ 9576 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9577 %} 9578 ins_pipe( pipe_slow ); 9579 %} 9580 9581 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9582 match(Set dst (VectorMaskGen len)); 9583 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9584 effect(TEMP temp); 9585 ins_encode %{ 9586 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9587 __ kmovql($dst$$KRegister, $temp$$Register); 9588 %} 9589 ins_pipe( pipe_slow ); 9590 %} 9591 9592 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9593 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9594 match(Set dst (VectorMaskToLong mask)); 9595 effect(TEMP dst, KILL cr); 9596 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9597 ins_encode %{ 9598 int opcode = this->ideal_Opcode(); 9599 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9600 int mask_len = Matcher::vector_length(this, $mask); 9601 int mask_size = mask_len * type2aelembytes(mbt); 9602 int vlen_enc = vector_length_encoding(this, $mask); 9603 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9604 $dst$$Register, mask_len, mask_size, vlen_enc); 9605 %} 9606 ins_pipe( pipe_slow ); 9607 %} 9608 9609 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9610 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9611 match(Set dst (VectorMaskToLong mask)); 9612 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9613 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9614 ins_encode %{ 9615 int opcode = this->ideal_Opcode(); 9616 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9617 int mask_len = Matcher::vector_length(this, $mask); 9618 int vlen_enc = vector_length_encoding(this, $mask); 9619 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9620 $dst$$Register, mask_len, mbt, vlen_enc); 9621 %} 9622 ins_pipe( pipe_slow ); 9623 %} 9624 9625 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9626 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9627 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9628 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9629 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9630 ins_encode %{ 9631 int opcode = this->ideal_Opcode(); 9632 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9633 int mask_len = Matcher::vector_length(this, $mask); 9634 int vlen_enc = vector_length_encoding(this, $mask); 9635 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9636 $dst$$Register, mask_len, mbt, vlen_enc); 9637 %} 9638 ins_pipe( pipe_slow ); 9639 %} 9640 9641 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9642 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9643 match(Set dst (VectorMaskTrueCount mask)); 9644 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9645 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9646 ins_encode %{ 9647 int opcode = this->ideal_Opcode(); 9648 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9649 int mask_len = Matcher::vector_length(this, $mask); 9650 int mask_size = mask_len * type2aelembytes(mbt); 9651 int vlen_enc = vector_length_encoding(this, $mask); 9652 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9653 $tmp$$Register, mask_len, mask_size, vlen_enc); 9654 %} 9655 ins_pipe( pipe_slow ); 9656 %} 9657 9658 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9659 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9660 match(Set dst (VectorMaskTrueCount mask)); 9661 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9662 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9663 ins_encode %{ 9664 int opcode = this->ideal_Opcode(); 9665 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9666 int mask_len = Matcher::vector_length(this, $mask); 9667 int vlen_enc = vector_length_encoding(this, $mask); 9668 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9669 $tmp$$Register, mask_len, mbt, vlen_enc); 9670 %} 9671 ins_pipe( pipe_slow ); 9672 %} 9673 9674 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9675 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9676 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9677 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9678 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9679 ins_encode %{ 9680 int opcode = this->ideal_Opcode(); 9681 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9682 int mask_len = Matcher::vector_length(this, $mask); 9683 int vlen_enc = vector_length_encoding(this, $mask); 9684 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9685 $tmp$$Register, mask_len, mbt, vlen_enc); 9686 %} 9687 ins_pipe( pipe_slow ); 9688 %} 9689 9690 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9691 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9692 match(Set dst (VectorMaskFirstTrue mask)); 9693 match(Set dst (VectorMaskLastTrue mask)); 9694 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9695 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9696 ins_encode %{ 9697 int opcode = this->ideal_Opcode(); 9698 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9699 int mask_len = Matcher::vector_length(this, $mask); 9700 int mask_size = mask_len * type2aelembytes(mbt); 9701 int vlen_enc = vector_length_encoding(this, $mask); 9702 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9703 $tmp$$Register, mask_len, mask_size, vlen_enc); 9704 %} 9705 ins_pipe( pipe_slow ); 9706 %} 9707 9708 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9709 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9710 match(Set dst (VectorMaskFirstTrue mask)); 9711 match(Set dst (VectorMaskLastTrue mask)); 9712 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9713 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9714 ins_encode %{ 9715 int opcode = this->ideal_Opcode(); 9716 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9717 int mask_len = Matcher::vector_length(this, $mask); 9718 int vlen_enc = vector_length_encoding(this, $mask); 9719 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9720 $tmp$$Register, mask_len, mbt, vlen_enc); 9721 %} 9722 ins_pipe( pipe_slow ); 9723 %} 9724 9725 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9726 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9727 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9728 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9729 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9730 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9731 ins_encode %{ 9732 int opcode = this->ideal_Opcode(); 9733 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9734 int mask_len = Matcher::vector_length(this, $mask); 9735 int vlen_enc = vector_length_encoding(this, $mask); 9736 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9737 $tmp$$Register, mask_len, mbt, vlen_enc); 9738 %} 9739 ins_pipe( pipe_slow ); 9740 %} 9741 9742 // --------------------------------- Compress/Expand Operations --------------------------- 9743 #ifdef _LP64 9744 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{ 9745 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 9746 match(Set dst (CompressV src mask)); 9747 match(Set dst (ExpandV src mask)); 9748 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr); 9749 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %} 9750 ins_encode %{ 9751 int opcode = this->ideal_Opcode(); 9752 int vlen_enc = vector_length_encoding(this); 9753 BasicType bt = Matcher::vector_element_basic_type(this); 9754 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register, 9755 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc); 9756 %} 9757 ins_pipe( pipe_slow ); 9758 %} 9759 #endif 9760 9761 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9762 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 9763 match(Set dst (CompressV src mask)); 9764 match(Set dst (ExpandV src mask)); 9765 format %{ "vector_compress_expand $dst, $src, $mask" %} 9766 ins_encode %{ 9767 int opcode = this->ideal_Opcode(); 9768 int vector_len = vector_length_encoding(this); 9769 BasicType bt = Matcher::vector_element_basic_type(this); 9770 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9771 %} 9772 ins_pipe( pipe_slow ); 9773 %} 9774 9775 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9776 match(Set dst (CompressM mask)); 9777 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9778 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9779 ins_encode %{ 9780 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9781 int mask_len = Matcher::vector_length(this); 9782 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9783 %} 9784 ins_pipe( pipe_slow ); 9785 %} 9786 9787 #endif // _LP64 9788 9789 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9790 9791 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9792 predicate(!VM_Version::supports_gfni()); 9793 match(Set dst (ReverseV src)); 9794 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9795 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9796 ins_encode %{ 9797 int vec_enc = vector_length_encoding(this); 9798 BasicType bt = Matcher::vector_element_basic_type(this); 9799 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9800 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9801 %} 9802 ins_pipe( pipe_slow ); 9803 %} 9804 9805 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9806 predicate(VM_Version::supports_gfni()); 9807 match(Set dst (ReverseV src)); 9808 effect(TEMP dst, TEMP xtmp); 9809 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9810 ins_encode %{ 9811 int vec_enc = vector_length_encoding(this); 9812 BasicType bt = Matcher::vector_element_basic_type(this); 9813 InternalAddress addr = $constantaddress(jlong(0x8040201008040201)); 9814 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9815 $xtmp$$XMMRegister); 9816 %} 9817 ins_pipe( pipe_slow ); 9818 %} 9819 9820 instruct vreverse_byte_reg(vec dst, vec src) %{ 9821 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9822 match(Set dst (ReverseBytesV src)); 9823 effect(TEMP dst); 9824 format %{ "vector_reverse_byte $dst, $src" %} 9825 ins_encode %{ 9826 int vec_enc = vector_length_encoding(this); 9827 BasicType bt = Matcher::vector_element_basic_type(this); 9828 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9829 %} 9830 ins_pipe( pipe_slow ); 9831 %} 9832 9833 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9834 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9835 match(Set dst (ReverseBytesV src)); 9836 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9837 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9838 ins_encode %{ 9839 int vec_enc = vector_length_encoding(this); 9840 BasicType bt = Matcher::vector_element_basic_type(this); 9841 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9842 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9843 %} 9844 ins_pipe( pipe_slow ); 9845 %} 9846 9847 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9848 9849 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9850 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9851 Matcher::vector_length_in_bytes(n->in(1)))); 9852 match(Set dst (CountLeadingZerosV src)); 9853 format %{ "vector_count_leading_zeros $dst, $src" %} 9854 ins_encode %{ 9855 int vlen_enc = vector_length_encoding(this, $src); 9856 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9857 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9858 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9859 %} 9860 ins_pipe( pipe_slow ); 9861 %} 9862 9863 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9864 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9865 Matcher::vector_length_in_bytes(n->in(1)))); 9866 match(Set dst (CountLeadingZerosV src mask)); 9867 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9868 ins_encode %{ 9869 int vlen_enc = vector_length_encoding(this, $src); 9870 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9871 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9872 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9873 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9874 %} 9875 ins_pipe( pipe_slow ); 9876 %} 9877 9878 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9879 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9880 VM_Version::supports_avx512cd() && 9881 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9882 match(Set dst (CountLeadingZerosV src)); 9883 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9884 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9885 ins_encode %{ 9886 int vlen_enc = vector_length_encoding(this, $src); 9887 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9888 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9889 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9890 %} 9891 ins_pipe( pipe_slow ); 9892 %} 9893 9894 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9895 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9896 match(Set dst (CountLeadingZerosV src)); 9897 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9898 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9899 ins_encode %{ 9900 int vlen_enc = vector_length_encoding(this, $src); 9901 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9902 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9903 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9904 $rtmp$$Register, true, vlen_enc); 9905 %} 9906 ins_pipe( pipe_slow ); 9907 %} 9908 9909 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9910 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9911 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9912 match(Set dst (CountLeadingZerosV src)); 9913 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9914 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9915 ins_encode %{ 9916 int vlen_enc = vector_length_encoding(this, $src); 9917 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9918 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9919 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9920 %} 9921 ins_pipe( pipe_slow ); 9922 %} 9923 9924 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9925 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9926 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9927 match(Set dst (CountLeadingZerosV src)); 9928 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9929 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9930 ins_encode %{ 9931 int vlen_enc = vector_length_encoding(this, $src); 9932 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9933 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9934 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9935 %} 9936 ins_pipe( pipe_slow ); 9937 %} 9938 9939 // ---------------------------------- Vector Masked Operations ------------------------------------ 9940 9941 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9942 match(Set dst (AddVB (Binary dst src2) mask)); 9943 match(Set dst (AddVS (Binary dst src2) mask)); 9944 match(Set dst (AddVI (Binary dst src2) mask)); 9945 match(Set dst (AddVL (Binary dst src2) mask)); 9946 match(Set dst (AddVF (Binary dst src2) mask)); 9947 match(Set dst (AddVD (Binary dst src2) mask)); 9948 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9949 ins_encode %{ 9950 int vlen_enc = vector_length_encoding(this); 9951 BasicType bt = Matcher::vector_element_basic_type(this); 9952 int opc = this->ideal_Opcode(); 9953 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9954 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9955 %} 9956 ins_pipe( pipe_slow ); 9957 %} 9958 9959 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9960 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9961 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9962 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9963 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9964 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9965 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9966 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9967 ins_encode %{ 9968 int vlen_enc = vector_length_encoding(this); 9969 BasicType bt = Matcher::vector_element_basic_type(this); 9970 int opc = this->ideal_Opcode(); 9971 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9972 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9973 %} 9974 ins_pipe( pipe_slow ); 9975 %} 9976 9977 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9978 match(Set dst (XorV (Binary dst src2) mask)); 9979 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9980 ins_encode %{ 9981 int vlen_enc = vector_length_encoding(this); 9982 BasicType bt = Matcher::vector_element_basic_type(this); 9983 int opc = this->ideal_Opcode(); 9984 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9985 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9986 %} 9987 ins_pipe( pipe_slow ); 9988 %} 9989 9990 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9991 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9992 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9993 ins_encode %{ 9994 int vlen_enc = vector_length_encoding(this); 9995 BasicType bt = Matcher::vector_element_basic_type(this); 9996 int opc = this->ideal_Opcode(); 9997 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9998 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9999 %} 10000 ins_pipe( pipe_slow ); 10001 %} 10002 10003 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 10004 match(Set dst (OrV (Binary dst src2) mask)); 10005 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 10006 ins_encode %{ 10007 int vlen_enc = vector_length_encoding(this); 10008 BasicType bt = Matcher::vector_element_basic_type(this); 10009 int opc = this->ideal_Opcode(); 10010 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10011 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10012 %} 10013 ins_pipe( pipe_slow ); 10014 %} 10015 10016 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 10017 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 10018 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 10019 ins_encode %{ 10020 int vlen_enc = vector_length_encoding(this); 10021 BasicType bt = Matcher::vector_element_basic_type(this); 10022 int opc = this->ideal_Opcode(); 10023 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10024 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10025 %} 10026 ins_pipe( pipe_slow ); 10027 %} 10028 10029 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 10030 match(Set dst (AndV (Binary dst src2) mask)); 10031 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 10032 ins_encode %{ 10033 int vlen_enc = vector_length_encoding(this); 10034 BasicType bt = Matcher::vector_element_basic_type(this); 10035 int opc = this->ideal_Opcode(); 10036 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10037 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10038 %} 10039 ins_pipe( pipe_slow ); 10040 %} 10041 10042 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 10043 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 10044 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 10045 ins_encode %{ 10046 int vlen_enc = vector_length_encoding(this); 10047 BasicType bt = Matcher::vector_element_basic_type(this); 10048 int opc = this->ideal_Opcode(); 10049 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10050 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10051 %} 10052 ins_pipe( pipe_slow ); 10053 %} 10054 10055 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 10056 match(Set dst (SubVB (Binary dst src2) mask)); 10057 match(Set dst (SubVS (Binary dst src2) mask)); 10058 match(Set dst (SubVI (Binary dst src2) mask)); 10059 match(Set dst (SubVL (Binary dst src2) mask)); 10060 match(Set dst (SubVF (Binary dst src2) mask)); 10061 match(Set dst (SubVD (Binary dst src2) mask)); 10062 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 10063 ins_encode %{ 10064 int vlen_enc = vector_length_encoding(this); 10065 BasicType bt = Matcher::vector_element_basic_type(this); 10066 int opc = this->ideal_Opcode(); 10067 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10068 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10069 %} 10070 ins_pipe( pipe_slow ); 10071 %} 10072 10073 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 10074 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 10075 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 10076 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 10077 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 10078 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 10079 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 10080 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 10081 ins_encode %{ 10082 int vlen_enc = vector_length_encoding(this); 10083 BasicType bt = Matcher::vector_element_basic_type(this); 10084 int opc = this->ideal_Opcode(); 10085 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10086 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10087 %} 10088 ins_pipe( pipe_slow ); 10089 %} 10090 10091 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 10092 match(Set dst (MulVS (Binary dst src2) mask)); 10093 match(Set dst (MulVI (Binary dst src2) mask)); 10094 match(Set dst (MulVL (Binary dst src2) mask)); 10095 match(Set dst (MulVF (Binary dst src2) mask)); 10096 match(Set dst (MulVD (Binary dst src2) mask)); 10097 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 10098 ins_encode %{ 10099 int vlen_enc = vector_length_encoding(this); 10100 BasicType bt = Matcher::vector_element_basic_type(this); 10101 int opc = this->ideal_Opcode(); 10102 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10103 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10104 %} 10105 ins_pipe( pipe_slow ); 10106 %} 10107 10108 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 10109 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 10110 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 10111 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 10112 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 10113 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 10114 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 10115 ins_encode %{ 10116 int vlen_enc = vector_length_encoding(this); 10117 BasicType bt = Matcher::vector_element_basic_type(this); 10118 int opc = this->ideal_Opcode(); 10119 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10120 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10121 %} 10122 ins_pipe( pipe_slow ); 10123 %} 10124 10125 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 10126 match(Set dst (SqrtVF dst mask)); 10127 match(Set dst (SqrtVD dst mask)); 10128 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 10129 ins_encode %{ 10130 int vlen_enc = vector_length_encoding(this); 10131 BasicType bt = Matcher::vector_element_basic_type(this); 10132 int opc = this->ideal_Opcode(); 10133 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10134 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10135 %} 10136 ins_pipe( pipe_slow ); 10137 %} 10138 10139 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 10140 match(Set dst (DivVF (Binary dst src2) mask)); 10141 match(Set dst (DivVD (Binary dst src2) mask)); 10142 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 10143 ins_encode %{ 10144 int vlen_enc = vector_length_encoding(this); 10145 BasicType bt = Matcher::vector_element_basic_type(this); 10146 int opc = this->ideal_Opcode(); 10147 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10148 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10149 %} 10150 ins_pipe( pipe_slow ); 10151 %} 10152 10153 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 10154 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 10155 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 10156 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 10157 ins_encode %{ 10158 int vlen_enc = vector_length_encoding(this); 10159 BasicType bt = Matcher::vector_element_basic_type(this); 10160 int opc = this->ideal_Opcode(); 10161 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10162 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10163 %} 10164 ins_pipe( pipe_slow ); 10165 %} 10166 10167 10168 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10169 match(Set dst (RotateLeftV (Binary dst shift) mask)); 10170 match(Set dst (RotateRightV (Binary dst shift) mask)); 10171 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 10172 ins_encode %{ 10173 int vlen_enc = vector_length_encoding(this); 10174 BasicType bt = Matcher::vector_element_basic_type(this); 10175 int opc = this->ideal_Opcode(); 10176 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10177 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10178 %} 10179 ins_pipe( pipe_slow ); 10180 %} 10181 10182 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 10183 match(Set dst (RotateLeftV (Binary dst src2) mask)); 10184 match(Set dst (RotateRightV (Binary dst src2) mask)); 10185 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 10186 ins_encode %{ 10187 int vlen_enc = vector_length_encoding(this); 10188 BasicType bt = Matcher::vector_element_basic_type(this); 10189 int opc = this->ideal_Opcode(); 10190 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10191 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10192 %} 10193 ins_pipe( pipe_slow ); 10194 %} 10195 10196 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10197 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 10198 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 10199 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 10200 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 10201 ins_encode %{ 10202 int vlen_enc = vector_length_encoding(this); 10203 BasicType bt = Matcher::vector_element_basic_type(this); 10204 int opc = this->ideal_Opcode(); 10205 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10206 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10207 %} 10208 ins_pipe( pipe_slow ); 10209 %} 10210 10211 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10212 predicate(!n->as_ShiftV()->is_var_shift()); 10213 match(Set dst (LShiftVS (Binary dst src2) mask)); 10214 match(Set dst (LShiftVI (Binary dst src2) mask)); 10215 match(Set dst (LShiftVL (Binary dst src2) mask)); 10216 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10217 ins_encode %{ 10218 int vlen_enc = vector_length_encoding(this); 10219 BasicType bt = Matcher::vector_element_basic_type(this); 10220 int opc = this->ideal_Opcode(); 10221 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10222 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10223 %} 10224 ins_pipe( pipe_slow ); 10225 %} 10226 10227 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10228 predicate(n->as_ShiftV()->is_var_shift()); 10229 match(Set dst (LShiftVS (Binary dst src2) mask)); 10230 match(Set dst (LShiftVI (Binary dst src2) mask)); 10231 match(Set dst (LShiftVL (Binary dst src2) mask)); 10232 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10233 ins_encode %{ 10234 int vlen_enc = vector_length_encoding(this); 10235 BasicType bt = Matcher::vector_element_basic_type(this); 10236 int opc = this->ideal_Opcode(); 10237 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10238 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10239 %} 10240 ins_pipe( pipe_slow ); 10241 %} 10242 10243 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10244 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 10245 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 10246 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 10247 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 10248 ins_encode %{ 10249 int vlen_enc = vector_length_encoding(this); 10250 BasicType bt = Matcher::vector_element_basic_type(this); 10251 int opc = this->ideal_Opcode(); 10252 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10253 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10254 %} 10255 ins_pipe( pipe_slow ); 10256 %} 10257 10258 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10259 predicate(!n->as_ShiftV()->is_var_shift()); 10260 match(Set dst (RShiftVS (Binary dst src2) mask)); 10261 match(Set dst (RShiftVI (Binary dst src2) mask)); 10262 match(Set dst (RShiftVL (Binary dst src2) mask)); 10263 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10264 ins_encode %{ 10265 int vlen_enc = vector_length_encoding(this); 10266 BasicType bt = Matcher::vector_element_basic_type(this); 10267 int opc = this->ideal_Opcode(); 10268 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10269 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10270 %} 10271 ins_pipe( pipe_slow ); 10272 %} 10273 10274 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10275 predicate(n->as_ShiftV()->is_var_shift()); 10276 match(Set dst (RShiftVS (Binary dst src2) mask)); 10277 match(Set dst (RShiftVI (Binary dst src2) mask)); 10278 match(Set dst (RShiftVL (Binary dst src2) mask)); 10279 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10280 ins_encode %{ 10281 int vlen_enc = vector_length_encoding(this); 10282 BasicType bt = Matcher::vector_element_basic_type(this); 10283 int opc = this->ideal_Opcode(); 10284 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10285 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10286 %} 10287 ins_pipe( pipe_slow ); 10288 %} 10289 10290 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10291 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 10292 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 10293 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 10294 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 10295 ins_encode %{ 10296 int vlen_enc = vector_length_encoding(this); 10297 BasicType bt = Matcher::vector_element_basic_type(this); 10298 int opc = this->ideal_Opcode(); 10299 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10300 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10301 %} 10302 ins_pipe( pipe_slow ); 10303 %} 10304 10305 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10306 predicate(!n->as_ShiftV()->is_var_shift()); 10307 match(Set dst (URShiftVS (Binary dst src2) mask)); 10308 match(Set dst (URShiftVI (Binary dst src2) mask)); 10309 match(Set dst (URShiftVL (Binary dst src2) mask)); 10310 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10311 ins_encode %{ 10312 int vlen_enc = vector_length_encoding(this); 10313 BasicType bt = Matcher::vector_element_basic_type(this); 10314 int opc = this->ideal_Opcode(); 10315 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10316 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10317 %} 10318 ins_pipe( pipe_slow ); 10319 %} 10320 10321 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10322 predicate(n->as_ShiftV()->is_var_shift()); 10323 match(Set dst (URShiftVS (Binary dst src2) mask)); 10324 match(Set dst (URShiftVI (Binary dst src2) mask)); 10325 match(Set dst (URShiftVL (Binary dst src2) mask)); 10326 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10327 ins_encode %{ 10328 int vlen_enc = vector_length_encoding(this); 10329 BasicType bt = Matcher::vector_element_basic_type(this); 10330 int opc = this->ideal_Opcode(); 10331 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10332 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10333 %} 10334 ins_pipe( pipe_slow ); 10335 %} 10336 10337 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 10338 match(Set dst (MaxV (Binary dst src2) mask)); 10339 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10340 ins_encode %{ 10341 int vlen_enc = vector_length_encoding(this); 10342 BasicType bt = Matcher::vector_element_basic_type(this); 10343 int opc = this->ideal_Opcode(); 10344 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10345 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10346 %} 10347 ins_pipe( pipe_slow ); 10348 %} 10349 10350 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 10351 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 10352 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10353 ins_encode %{ 10354 int vlen_enc = vector_length_encoding(this); 10355 BasicType bt = Matcher::vector_element_basic_type(this); 10356 int opc = this->ideal_Opcode(); 10357 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10358 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10359 %} 10360 ins_pipe( pipe_slow ); 10361 %} 10362 10363 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 10364 match(Set dst (MinV (Binary dst src2) mask)); 10365 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10366 ins_encode %{ 10367 int vlen_enc = vector_length_encoding(this); 10368 BasicType bt = Matcher::vector_element_basic_type(this); 10369 int opc = this->ideal_Opcode(); 10370 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10371 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10372 %} 10373 ins_pipe( pipe_slow ); 10374 %} 10375 10376 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 10377 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 10378 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10379 ins_encode %{ 10380 int vlen_enc = vector_length_encoding(this); 10381 BasicType bt = Matcher::vector_element_basic_type(this); 10382 int opc = this->ideal_Opcode(); 10383 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10384 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10385 %} 10386 ins_pipe( pipe_slow ); 10387 %} 10388 10389 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 10390 match(Set dst (VectorRearrange (Binary dst src2) mask)); 10391 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 10392 ins_encode %{ 10393 int vlen_enc = vector_length_encoding(this); 10394 BasicType bt = Matcher::vector_element_basic_type(this); 10395 int opc = this->ideal_Opcode(); 10396 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10397 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10398 %} 10399 ins_pipe( pipe_slow ); 10400 %} 10401 10402 instruct vabs_masked(vec dst, kReg mask) %{ 10403 match(Set dst (AbsVB dst mask)); 10404 match(Set dst (AbsVS dst mask)); 10405 match(Set dst (AbsVI dst mask)); 10406 match(Set dst (AbsVL dst mask)); 10407 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 10408 ins_encode %{ 10409 int vlen_enc = vector_length_encoding(this); 10410 BasicType bt = Matcher::vector_element_basic_type(this); 10411 int opc = this->ideal_Opcode(); 10412 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10413 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10414 %} 10415 ins_pipe( pipe_slow ); 10416 %} 10417 10418 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 10419 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 10420 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 10421 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10422 ins_encode %{ 10423 assert(UseFMA, "Needs FMA instructions support."); 10424 int vlen_enc = vector_length_encoding(this); 10425 BasicType bt = Matcher::vector_element_basic_type(this); 10426 int opc = this->ideal_Opcode(); 10427 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10428 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 10429 %} 10430 ins_pipe( pipe_slow ); 10431 %} 10432 10433 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 10434 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 10435 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 10436 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10437 ins_encode %{ 10438 assert(UseFMA, "Needs FMA instructions support."); 10439 int vlen_enc = vector_length_encoding(this); 10440 BasicType bt = Matcher::vector_element_basic_type(this); 10441 int opc = this->ideal_Opcode(); 10442 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10443 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 10444 %} 10445 ins_pipe( pipe_slow ); 10446 %} 10447 10448 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 10449 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 10450 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 10451 ins_encode %{ 10452 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 10453 int vlen_enc = vector_length_encoding(this, $src1); 10454 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 10455 10456 // Comparison i 10457 switch (src1_elem_bt) { 10458 case T_BYTE: { 10459 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10460 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10461 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10462 break; 10463 } 10464 case T_SHORT: { 10465 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10466 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10467 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10468 break; 10469 } 10470 case T_INT: { 10471 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10472 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10473 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10474 break; 10475 } 10476 case T_LONG: { 10477 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10478 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10479 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10480 break; 10481 } 10482 case T_FLOAT: { 10483 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10484 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10485 break; 10486 } 10487 case T_DOUBLE: { 10488 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10489 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10490 break; 10491 } 10492 default: assert(false, "%s", type2name(src1_elem_bt)); break; 10493 } 10494 %} 10495 ins_pipe( pipe_slow ); 10496 %} 10497 10498 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 10499 predicate(Matcher::vector_length(n) <= 32); 10500 match(Set dst (MaskAll src)); 10501 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 10502 ins_encode %{ 10503 int mask_len = Matcher::vector_length(this); 10504 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 10505 %} 10506 ins_pipe( pipe_slow ); 10507 %} 10508 10509 #ifdef _LP64 10510 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10511 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10512 match(Set dst (XorVMask src (MaskAll cnt))); 10513 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10514 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10515 ins_encode %{ 10516 uint masklen = Matcher::vector_length(this); 10517 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10518 %} 10519 ins_pipe( pipe_slow ); 10520 %} 10521 10522 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10523 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10524 (Matcher::vector_length(n) == 16) || 10525 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10526 match(Set dst (XorVMask src (MaskAll cnt))); 10527 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10528 ins_encode %{ 10529 uint masklen = Matcher::vector_length(this); 10530 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10531 %} 10532 ins_pipe( pipe_slow ); 10533 %} 10534 10535 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10536 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10537 match(Set dst (VectorLongToMask src)); 10538 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10539 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10540 ins_encode %{ 10541 int mask_len = Matcher::vector_length(this); 10542 int vec_enc = vector_length_encoding(mask_len); 10543 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10544 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10545 %} 10546 ins_pipe( pipe_slow ); 10547 %} 10548 10549 10550 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10551 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10552 match(Set dst (VectorLongToMask src)); 10553 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10554 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10555 ins_encode %{ 10556 int mask_len = Matcher::vector_length(this); 10557 assert(mask_len <= 32, "invalid mask length"); 10558 int vec_enc = vector_length_encoding(mask_len); 10559 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10560 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10561 %} 10562 ins_pipe( pipe_slow ); 10563 %} 10564 10565 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10566 predicate(n->bottom_type()->isa_vectmask()); 10567 match(Set dst (VectorLongToMask src)); 10568 format %{ "long_to_mask_evex $dst, $src\t!" %} 10569 ins_encode %{ 10570 __ kmov($dst$$KRegister, $src$$Register); 10571 %} 10572 ins_pipe( pipe_slow ); 10573 %} 10574 #endif 10575 10576 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10577 match(Set dst (AndVMask src1 src2)); 10578 match(Set dst (OrVMask src1 src2)); 10579 match(Set dst (XorVMask src1 src2)); 10580 effect(TEMP kscratch); 10581 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10582 ins_encode %{ 10583 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10584 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10585 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal"); 10586 uint masklen = Matcher::vector_length(this); 10587 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10588 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10589 %} 10590 ins_pipe( pipe_slow ); 10591 %} 10592 10593 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10594 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10595 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10596 ins_encode %{ 10597 int vlen_enc = vector_length_encoding(this); 10598 BasicType bt = Matcher::vector_element_basic_type(this); 10599 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10600 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10601 %} 10602 ins_pipe( pipe_slow ); 10603 %} 10604 10605 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10606 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10607 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10608 ins_encode %{ 10609 int vlen_enc = vector_length_encoding(this); 10610 BasicType bt = Matcher::vector_element_basic_type(this); 10611 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10612 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10613 %} 10614 ins_pipe( pipe_slow ); 10615 %} 10616 10617 instruct castMM(kReg dst) 10618 %{ 10619 match(Set dst (CastVV dst)); 10620 10621 size(0); 10622 format %{ "# castVV of $dst" %} 10623 ins_encode(/* empty encoding */); 10624 ins_cost(0); 10625 ins_pipe(empty); 10626 %} 10627 10628 instruct castVV(vec dst) 10629 %{ 10630 match(Set dst (CastVV dst)); 10631 10632 size(0); 10633 format %{ "# castVV of $dst" %} 10634 ins_encode(/* empty encoding */); 10635 ins_cost(0); 10636 ins_pipe(empty); 10637 %} 10638 10639 instruct castVVLeg(legVec dst) 10640 %{ 10641 match(Set dst (CastVV dst)); 10642 10643 size(0); 10644 format %{ "# castVV of $dst" %} 10645 ins_encode(/* empty encoding */); 10646 ins_cost(0); 10647 ins_pipe(empty); 10648 %} 10649 10650 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10651 %{ 10652 match(Set dst (IsInfiniteF src)); 10653 effect(TEMP ktmp, KILL cr); 10654 format %{ "float_class_check $dst, $src" %} 10655 ins_encode %{ 10656 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10657 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10658 %} 10659 ins_pipe(pipe_slow); 10660 %} 10661 10662 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10663 %{ 10664 match(Set dst (IsInfiniteD src)); 10665 effect(TEMP ktmp, KILL cr); 10666 format %{ "double_class_check $dst, $src" %} 10667 ins_encode %{ 10668 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10669 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10670 %} 10671 ins_pipe(pipe_slow); 10672 %} 10673 10674 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2) 10675 %{ 10676 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10677 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10678 match(Set dst (SaturatingAddV src1 src2)); 10679 match(Set dst (SaturatingSubV src1 src2)); 10680 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10681 ins_encode %{ 10682 int vlen_enc = vector_length_encoding(this); 10683 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10684 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10685 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10686 %} 10687 ins_pipe(pipe_slow); 10688 %} 10689 10690 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2) 10691 %{ 10692 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10693 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10694 match(Set dst (SaturatingAddV src1 src2)); 10695 match(Set dst (SaturatingSubV src1 src2)); 10696 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10697 ins_encode %{ 10698 int vlen_enc = vector_length_encoding(this); 10699 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10700 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10701 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10702 %} 10703 ins_pipe(pipe_slow); 10704 %} 10705 10706 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2) 10707 %{ 10708 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10709 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10710 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10711 match(Set dst (SaturatingAddV src1 src2)); 10712 match(Set dst (SaturatingSubV src1 src2)); 10713 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2); 10714 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 10715 ins_encode %{ 10716 int vlen_enc = vector_length_encoding(this); 10717 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10718 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10719 $src1$$XMMRegister, $src2$$XMMRegister, 10720 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10721 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc); 10722 %} 10723 ins_pipe(pipe_slow); 10724 %} 10725 10726 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4) 10727 %{ 10728 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10729 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10730 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10731 match(Set dst (SaturatingAddV src1 src2)); 10732 match(Set dst (SaturatingSubV src1 src2)); 10733 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4); 10734 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 10735 ins_encode %{ 10736 int vlen_enc = vector_length_encoding(this); 10737 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10738 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10739 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10740 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc); 10741 %} 10742 ins_pipe(pipe_slow); 10743 %} 10744 10745 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp) 10746 %{ 10747 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10748 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10749 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10750 match(Set dst (SaturatingAddV src1 src2)); 10751 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp); 10752 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %} 10753 ins_encode %{ 10754 int vlen_enc = vector_length_encoding(this); 10755 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10756 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10757 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10758 %} 10759 ins_pipe(pipe_slow); 10760 %} 10761 10762 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3) 10763 %{ 10764 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10765 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10766 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10767 match(Set dst (SaturatingAddV src1 src2)); 10768 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 10769 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 10770 ins_encode %{ 10771 int vlen_enc = vector_length_encoding(this); 10772 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10773 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10774 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc); 10775 %} 10776 ins_pipe(pipe_slow); 10777 %} 10778 10779 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp) 10780 %{ 10781 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10782 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10783 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10784 match(Set dst (SaturatingSubV src1 src2)); 10785 effect(TEMP ktmp); 10786 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %} 10787 ins_encode %{ 10788 int vlen_enc = vector_length_encoding(this); 10789 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10790 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10791 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10792 %} 10793 ins_pipe(pipe_slow); 10794 %} 10795 10796 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) 10797 %{ 10798 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10799 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10800 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10801 match(Set dst (SaturatingSubV src1 src2)); 10802 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 10803 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %} 10804 ins_encode %{ 10805 int vlen_enc = vector_length_encoding(this); 10806 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10807 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10808 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 10809 %} 10810 ins_pipe(pipe_slow); 10811 %} 10812 10813 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2) 10814 %{ 10815 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10816 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10817 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10818 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10819 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10820 ins_encode %{ 10821 int vlen_enc = vector_length_encoding(this); 10822 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10823 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10824 $src1$$XMMRegister, $src2$$Address, false, vlen_enc); 10825 %} 10826 ins_pipe(pipe_slow); 10827 %} 10828 10829 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2) 10830 %{ 10831 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10832 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10833 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10834 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10835 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10836 ins_encode %{ 10837 int vlen_enc = vector_length_encoding(this); 10838 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10839 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10840 $src1$$XMMRegister, $src2$$Address, true, vlen_enc); 10841 %} 10842 ins_pipe(pipe_slow); 10843 %} 10844 10845 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10846 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10847 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10848 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10849 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10850 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10851 ins_encode %{ 10852 int vlen_enc = vector_length_encoding(this); 10853 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10854 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10855 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc); 10856 %} 10857 ins_pipe( pipe_slow ); 10858 %} 10859 10860 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10861 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10862 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10863 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10864 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10865 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10866 ins_encode %{ 10867 int vlen_enc = vector_length_encoding(this); 10868 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10869 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10870 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc); 10871 %} 10872 ins_pipe( pipe_slow ); 10873 %} 10874 10875 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10876 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10877 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10878 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10879 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10880 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10881 ins_encode %{ 10882 int vlen_enc = vector_length_encoding(this); 10883 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10884 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10885 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc); 10886 %} 10887 ins_pipe( pipe_slow ); 10888 %} 10889 10890 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10891 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10892 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10893 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10894 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10895 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10896 ins_encode %{ 10897 int vlen_enc = vector_length_encoding(this); 10898 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10899 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10900 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc); 10901 %} 10902 ins_pipe( pipe_slow ); 10903 %} 10904 10905 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2) 10906 %{ 10907 match(Set index (SelectFromTwoVector (Binary index src1) src2)); 10908 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %} 10909 ins_encode %{ 10910 int vlen_enc = vector_length_encoding(this); 10911 BasicType bt = Matcher::vector_element_basic_type(this); 10912 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10913 %} 10914 ins_pipe(pipe_slow); 10915 %} 10916 10917 instruct reinterpretS2HF(regF dst, rRegI src) 10918 %{ 10919 match(Set dst (ReinterpretS2HF src)); 10920 format %{ "vmovw $dst, $src" %} 10921 ins_encode %{ 10922 __ vmovw($dst$$XMMRegister, $src$$Register); 10923 %} 10924 ins_pipe(pipe_slow); 10925 %} 10926 10927 instruct convF2HFAndS2HF(regF dst, regF src) 10928 %{ 10929 match(Set dst (ReinterpretS2HF (ConvF2HF src))); 10930 format %{ "convF2HFAndS2HF $dst, $src" %} 10931 ins_encode %{ 10932 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 10933 %} 10934 ins_pipe(pipe_slow); 10935 %} 10936 10937 instruct convHF2SAndHF2F(regF dst, regF src) 10938 %{ 10939 match(Set dst (ConvHF2F (ReinterpretHF2S src))); 10940 format %{ "convHF2SAndHF2F $dst, $src" %} 10941 ins_encode %{ 10942 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, Assembler::AVX_128bit); 10943 %} 10944 ins_pipe(pipe_slow); 10945 %} 10946 10947 instruct reinterpretHF2S(rRegI dst, regF src) 10948 %{ 10949 match(Set dst (ReinterpretHF2S src)); 10950 format %{ "vmovw $dst, $src" %} 10951 ins_encode %{ 10952 __ vmovw($dst$$Register, $src$$XMMRegister); 10953 %} 10954 ins_pipe(pipe_slow); 10955 %} 10956 10957 instruct scalar_sqrt_HF_reg(regF dst, regF src) 10958 %{ 10959 match(Set dst (SqrtHF src)); 10960 format %{ "scalar_sqrt_fp16 $dst, $src" %} 10961 ins_encode %{ 10962 __ vsqrtsh($dst$$XMMRegister, $src$$XMMRegister); 10963 %} 10964 ins_pipe(pipe_slow); 10965 %} 10966 10967 instruct scalar_binOps_HF_reg(regF dst, regF src1, regF src2) 10968 %{ 10969 match(Set dst (AddHF src1 src2)); 10970 match(Set dst (DivHF src1 src2)); 10971 match(Set dst (MaxHF src1 src2)); 10972 match(Set dst (MinHF src1 src2)); 10973 match(Set dst (MulHF src1 src2)); 10974 match(Set dst (SubHF src1 src2)); 10975 format %{ "scalar_binop_fp16 $dst, $src1, $src2" %} 10976 ins_encode %{ 10977 int opcode = this->ideal_Opcode(); 10978 __ efp16sh(opcode, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 10979 %} 10980 ins_pipe(pipe_slow); 10981 %} 10982 10983 instruct scalar_fma_HF_reg(regF dst, regF src1, regF src2) 10984 %{ 10985 match(Set dst (FmaHF src2 (Binary dst src1))); 10986 effect(DEF dst); 10987 format %{ "scalar_fma_fp16 $dst, $src1, $src2\t# $dst = $dst * $src1 + $src2 fma packedH" %} 10988 ins_encode %{ 10989 __ vfmadd132sh($dst$$XMMRegister, $src2$$XMMRegister, $src1$$XMMRegister); 10990 %} 10991 ins_pipe( pipe_slow ); 10992 %}