1 // 2 // Copyright (c) 2011, 2024, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 // AVX3 Mask Registers. 632 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 633 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 634 635 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 636 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 637 638 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 639 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 640 641 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 642 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 643 644 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 645 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 646 647 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 648 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 649 650 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 651 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 652 653 654 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 655 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 656 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 657 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 658 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 659 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 660 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 661 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 662 #ifdef _LP64 663 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 664 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 665 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 666 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 667 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 668 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 669 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 670 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 671 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 672 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 673 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 674 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 675 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 676 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 677 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 678 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 679 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 680 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 681 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 682 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 683 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 684 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 685 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 686 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 687 #endif 688 ); 689 690 alloc_class chunk2(K7, K7_H, 691 K6, K6_H, 692 K5, K5_H, 693 K4, K4_H, 694 K3, K3_H, 695 K2, K2_H, 696 K1, K1_H); 697 698 reg_class vectmask_reg(K1, K1_H, 699 K2, K2_H, 700 K3, K3_H, 701 K4, K4_H, 702 K5, K5_H, 703 K6, K6_H, 704 K7, K7_H); 705 706 reg_class vectmask_reg_K1(K1, K1_H); 707 reg_class vectmask_reg_K2(K2, K2_H); 708 reg_class vectmask_reg_K3(K3, K3_H); 709 reg_class vectmask_reg_K4(K4, K4_H); 710 reg_class vectmask_reg_K5(K5, K5_H); 711 reg_class vectmask_reg_K6(K6, K6_H); 712 reg_class vectmask_reg_K7(K7, K7_H); 713 714 // flags allocation class should be last. 715 alloc_class chunk3(RFLAGS); 716 717 718 // Singleton class for condition codes 719 reg_class int_flags(RFLAGS); 720 721 // Class for pre evex float registers 722 reg_class float_reg_legacy(XMM0, 723 XMM1, 724 XMM2, 725 XMM3, 726 XMM4, 727 XMM5, 728 XMM6, 729 XMM7 730 #ifdef _LP64 731 ,XMM8, 732 XMM9, 733 XMM10, 734 XMM11, 735 XMM12, 736 XMM13, 737 XMM14, 738 XMM15 739 #endif 740 ); 741 742 // Class for evex float registers 743 reg_class float_reg_evex(XMM0, 744 XMM1, 745 XMM2, 746 XMM3, 747 XMM4, 748 XMM5, 749 XMM6, 750 XMM7 751 #ifdef _LP64 752 ,XMM8, 753 XMM9, 754 XMM10, 755 XMM11, 756 XMM12, 757 XMM13, 758 XMM14, 759 XMM15, 760 XMM16, 761 XMM17, 762 XMM18, 763 XMM19, 764 XMM20, 765 XMM21, 766 XMM22, 767 XMM23, 768 XMM24, 769 XMM25, 770 XMM26, 771 XMM27, 772 XMM28, 773 XMM29, 774 XMM30, 775 XMM31 776 #endif 777 ); 778 779 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 780 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 781 782 // Class for pre evex double registers 783 reg_class double_reg_legacy(XMM0, XMM0b, 784 XMM1, XMM1b, 785 XMM2, XMM2b, 786 XMM3, XMM3b, 787 XMM4, XMM4b, 788 XMM5, XMM5b, 789 XMM6, XMM6b, 790 XMM7, XMM7b 791 #ifdef _LP64 792 ,XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b 800 #endif 801 ); 802 803 // Class for evex double registers 804 reg_class double_reg_evex(XMM0, XMM0b, 805 XMM1, XMM1b, 806 XMM2, XMM2b, 807 XMM3, XMM3b, 808 XMM4, XMM4b, 809 XMM5, XMM5b, 810 XMM6, XMM6b, 811 XMM7, XMM7b 812 #ifdef _LP64 813 ,XMM8, XMM8b, 814 XMM9, XMM9b, 815 XMM10, XMM10b, 816 XMM11, XMM11b, 817 XMM12, XMM12b, 818 XMM13, XMM13b, 819 XMM14, XMM14b, 820 XMM15, XMM15b, 821 XMM16, XMM16b, 822 XMM17, XMM17b, 823 XMM18, XMM18b, 824 XMM19, XMM19b, 825 XMM20, XMM20b, 826 XMM21, XMM21b, 827 XMM22, XMM22b, 828 XMM23, XMM23b, 829 XMM24, XMM24b, 830 XMM25, XMM25b, 831 XMM26, XMM26b, 832 XMM27, XMM27b, 833 XMM28, XMM28b, 834 XMM29, XMM29b, 835 XMM30, XMM30b, 836 XMM31, XMM31b 837 #endif 838 ); 839 840 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 841 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 842 843 // Class for pre evex 32bit vector registers 844 reg_class vectors_reg_legacy(XMM0, 845 XMM1, 846 XMM2, 847 XMM3, 848 XMM4, 849 XMM5, 850 XMM6, 851 XMM7 852 #ifdef _LP64 853 ,XMM8, 854 XMM9, 855 XMM10, 856 XMM11, 857 XMM12, 858 XMM13, 859 XMM14, 860 XMM15 861 #endif 862 ); 863 864 // Class for evex 32bit vector registers 865 reg_class vectors_reg_evex(XMM0, 866 XMM1, 867 XMM2, 868 XMM3, 869 XMM4, 870 XMM5, 871 XMM6, 872 XMM7 873 #ifdef _LP64 874 ,XMM8, 875 XMM9, 876 XMM10, 877 XMM11, 878 XMM12, 879 XMM13, 880 XMM14, 881 XMM15, 882 XMM16, 883 XMM17, 884 XMM18, 885 XMM19, 886 XMM20, 887 XMM21, 888 XMM22, 889 XMM23, 890 XMM24, 891 XMM25, 892 XMM26, 893 XMM27, 894 XMM28, 895 XMM29, 896 XMM30, 897 XMM31 898 #endif 899 ); 900 901 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 902 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 903 904 // Class for all 64bit vector registers 905 reg_class vectord_reg_legacy(XMM0, XMM0b, 906 XMM1, XMM1b, 907 XMM2, XMM2b, 908 XMM3, XMM3b, 909 XMM4, XMM4b, 910 XMM5, XMM5b, 911 XMM6, XMM6b, 912 XMM7, XMM7b 913 #ifdef _LP64 914 ,XMM8, XMM8b, 915 XMM9, XMM9b, 916 XMM10, XMM10b, 917 XMM11, XMM11b, 918 XMM12, XMM12b, 919 XMM13, XMM13b, 920 XMM14, XMM14b, 921 XMM15, XMM15b 922 #endif 923 ); 924 925 // Class for all 64bit vector registers 926 reg_class vectord_reg_evex(XMM0, XMM0b, 927 XMM1, XMM1b, 928 XMM2, XMM2b, 929 XMM3, XMM3b, 930 XMM4, XMM4b, 931 XMM5, XMM5b, 932 XMM6, XMM6b, 933 XMM7, XMM7b 934 #ifdef _LP64 935 ,XMM8, XMM8b, 936 XMM9, XMM9b, 937 XMM10, XMM10b, 938 XMM11, XMM11b, 939 XMM12, XMM12b, 940 XMM13, XMM13b, 941 XMM14, XMM14b, 942 XMM15, XMM15b, 943 XMM16, XMM16b, 944 XMM17, XMM17b, 945 XMM18, XMM18b, 946 XMM19, XMM19b, 947 XMM20, XMM20b, 948 XMM21, XMM21b, 949 XMM22, XMM22b, 950 XMM23, XMM23b, 951 XMM24, XMM24b, 952 XMM25, XMM25b, 953 XMM26, XMM26b, 954 XMM27, XMM27b, 955 XMM28, XMM28b, 956 XMM29, XMM29b, 957 XMM30, XMM30b, 958 XMM31, XMM31b 959 #endif 960 ); 961 962 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 963 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 964 965 // Class for all 128bit vector registers 966 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 967 XMM1, XMM1b, XMM1c, XMM1d, 968 XMM2, XMM2b, XMM2c, XMM2d, 969 XMM3, XMM3b, XMM3c, XMM3d, 970 XMM4, XMM4b, XMM4c, XMM4d, 971 XMM5, XMM5b, XMM5c, XMM5d, 972 XMM6, XMM6b, XMM6c, XMM6d, 973 XMM7, XMM7b, XMM7c, XMM7d 974 #ifdef _LP64 975 ,XMM8, XMM8b, XMM8c, XMM8d, 976 XMM9, XMM9b, XMM9c, XMM9d, 977 XMM10, XMM10b, XMM10c, XMM10d, 978 XMM11, XMM11b, XMM11c, XMM11d, 979 XMM12, XMM12b, XMM12c, XMM12d, 980 XMM13, XMM13b, XMM13c, XMM13d, 981 XMM14, XMM14b, XMM14c, XMM14d, 982 XMM15, XMM15b, XMM15c, XMM15d 983 #endif 984 ); 985 986 // Class for all 128bit vector registers 987 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 988 XMM1, XMM1b, XMM1c, XMM1d, 989 XMM2, XMM2b, XMM2c, XMM2d, 990 XMM3, XMM3b, XMM3c, XMM3d, 991 XMM4, XMM4b, XMM4c, XMM4d, 992 XMM5, XMM5b, XMM5c, XMM5d, 993 XMM6, XMM6b, XMM6c, XMM6d, 994 XMM7, XMM7b, XMM7c, XMM7d 995 #ifdef _LP64 996 ,XMM8, XMM8b, XMM8c, XMM8d, 997 XMM9, XMM9b, XMM9c, XMM9d, 998 XMM10, XMM10b, XMM10c, XMM10d, 999 XMM11, XMM11b, XMM11c, XMM11d, 1000 XMM12, XMM12b, XMM12c, XMM12d, 1001 XMM13, XMM13b, XMM13c, XMM13d, 1002 XMM14, XMM14b, XMM14c, XMM14d, 1003 XMM15, XMM15b, XMM15c, XMM15d, 1004 XMM16, XMM16b, XMM16c, XMM16d, 1005 XMM17, XMM17b, XMM17c, XMM17d, 1006 XMM18, XMM18b, XMM18c, XMM18d, 1007 XMM19, XMM19b, XMM19c, XMM19d, 1008 XMM20, XMM20b, XMM20c, XMM20d, 1009 XMM21, XMM21b, XMM21c, XMM21d, 1010 XMM22, XMM22b, XMM22c, XMM22d, 1011 XMM23, XMM23b, XMM23c, XMM23d, 1012 XMM24, XMM24b, XMM24c, XMM24d, 1013 XMM25, XMM25b, XMM25c, XMM25d, 1014 XMM26, XMM26b, XMM26c, XMM26d, 1015 XMM27, XMM27b, XMM27c, XMM27d, 1016 XMM28, XMM28b, XMM28c, XMM28d, 1017 XMM29, XMM29b, XMM29c, XMM29d, 1018 XMM30, XMM30b, XMM30c, XMM30d, 1019 XMM31, XMM31b, XMM31c, XMM31d 1020 #endif 1021 ); 1022 1023 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1024 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1025 1026 // Class for all 256bit vector registers 1027 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1028 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1029 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1030 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1031 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1032 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1033 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1034 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1035 #ifdef _LP64 1036 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1037 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1038 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1039 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1040 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1041 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1042 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1043 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1044 #endif 1045 ); 1046 1047 // Class for all 256bit vector registers 1048 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1049 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1050 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1051 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1052 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1053 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1054 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1055 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1056 #ifdef _LP64 1057 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1058 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1059 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1060 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1061 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1062 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1063 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1064 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1065 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1066 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1067 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1068 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1069 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1070 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1071 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1072 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1073 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1074 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1075 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1076 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1077 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1078 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1079 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1080 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1081 #endif 1082 ); 1083 1084 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1085 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1086 1087 // Class for all 512bit vector registers 1088 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1089 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1090 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1091 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1092 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1093 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1094 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1095 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1096 #ifdef _LP64 1097 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1098 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1099 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1100 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1101 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1102 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1103 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1104 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1105 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1106 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1107 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1108 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1109 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1110 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1111 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1112 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1113 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1114 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1115 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1116 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1117 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1118 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1119 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1120 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1121 #endif 1122 ); 1123 1124 // Class for restricted 512bit vector registers 1125 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1126 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1127 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1128 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1129 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1130 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1131 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1132 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1133 #ifdef _LP64 1134 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1135 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1136 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1137 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1138 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1139 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1140 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1141 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1142 #endif 1143 ); 1144 1145 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1146 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1147 1148 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1149 %} 1150 1151 1152 //----------SOURCE BLOCK------------------------------------------------------- 1153 // This is a block of C++ code which provides values, functions, and 1154 // definitions necessary in the rest of the architecture description 1155 1156 source_hpp %{ 1157 // Header information of the source block. 1158 // Method declarations/definitions which are used outside 1159 // the ad-scope can conveniently be defined here. 1160 // 1161 // To keep related declarations/definitions/uses close together, 1162 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1163 1164 #include "runtime/vm_version.hpp" 1165 1166 class NativeJump; 1167 1168 class CallStubImpl { 1169 1170 //-------------------------------------------------------------- 1171 //---< Used for optimization in Compile::shorten_branches >--- 1172 //-------------------------------------------------------------- 1173 1174 public: 1175 // Size of call trampoline stub. 1176 static uint size_call_trampoline() { 1177 return 0; // no call trampolines on this platform 1178 } 1179 1180 // number of relocations needed by a call trampoline stub 1181 static uint reloc_call_trampoline() { 1182 return 0; // no call trampolines on this platform 1183 } 1184 }; 1185 1186 class HandlerImpl { 1187 1188 public: 1189 1190 static int emit_exception_handler(C2_MacroAssembler *masm); 1191 static int emit_deopt_handler(C2_MacroAssembler* masm); 1192 1193 static uint size_exception_handler() { 1194 // NativeCall instruction size is the same as NativeJump. 1195 // exception handler starts out as jump and can be patched to 1196 // a call be deoptimization. (4932387) 1197 // Note that this value is also credited (in output.cpp) to 1198 // the size of the code section. 1199 return NativeJump::instruction_size; 1200 } 1201 1202 #ifdef _LP64 1203 static uint size_deopt_handler() { 1204 // three 5 byte instructions plus one move for unreachable address. 1205 return 15+3; 1206 } 1207 #else 1208 static uint size_deopt_handler() { 1209 // NativeCall instruction size is the same as NativeJump. 1210 // exception handler starts out as jump and can be patched to 1211 // a call be deoptimization. (4932387) 1212 // Note that this value is also credited (in output.cpp) to 1213 // the size of the code section. 1214 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1215 } 1216 #endif 1217 }; 1218 1219 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1220 switch(bytes) { 1221 case 4: // fall-through 1222 case 8: // fall-through 1223 case 16: return Assembler::AVX_128bit; 1224 case 32: return Assembler::AVX_256bit; 1225 case 64: return Assembler::AVX_512bit; 1226 1227 default: { 1228 ShouldNotReachHere(); 1229 return Assembler::AVX_NoVec; 1230 } 1231 } 1232 } 1233 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1236 } 1237 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1239 uint def_idx = use->operand_index(opnd); 1240 Node* def = use->in(def_idx); 1241 return vector_length_encoding(def); 1242 } 1243 1244 static inline bool is_vector_popcount_predicate(BasicType bt) { 1245 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1246 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1247 } 1248 1249 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1250 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1251 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1252 } 1253 1254 class Node::PD { 1255 public: 1256 enum NodeFlags { 1257 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1258 Flag_sets_carry_flag = Node::_last_flag << 2, 1259 Flag_sets_parity_flag = Node::_last_flag << 3, 1260 Flag_sets_zero_flag = Node::_last_flag << 4, 1261 Flag_sets_overflow_flag = Node::_last_flag << 5, 1262 Flag_sets_sign_flag = Node::_last_flag << 6, 1263 Flag_clears_carry_flag = Node::_last_flag << 7, 1264 Flag_clears_parity_flag = Node::_last_flag << 8, 1265 Flag_clears_zero_flag = Node::_last_flag << 9, 1266 Flag_clears_overflow_flag = Node::_last_flag << 10, 1267 Flag_clears_sign_flag = Node::_last_flag << 11, 1268 _last_flag = Flag_clears_sign_flag 1269 }; 1270 }; 1271 1272 %} // end source_hpp 1273 1274 source %{ 1275 1276 #include "opto/addnode.hpp" 1277 #include "c2_intelJccErratum_x86.hpp" 1278 1279 void PhaseOutput::pd_perform_mach_node_analysis() { 1280 if (VM_Version::has_intel_jcc_erratum()) { 1281 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1282 _buf_sizes._code += extra_padding; 1283 } 1284 } 1285 1286 int MachNode::pd_alignment_required() const { 1287 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1288 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1289 return IntelJccErratum::largest_jcc_size() + 1; 1290 } else { 1291 return 1; 1292 } 1293 } 1294 1295 int MachNode::compute_padding(int current_offset) const { 1296 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1297 Compile* C = Compile::current(); 1298 PhaseOutput* output = C->output(); 1299 Block* block = output->block(); 1300 int index = output->index(); 1301 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1302 } else { 1303 return 0; 1304 } 1305 } 1306 1307 // Emit exception handler code. 1308 // Stuff framesize into a register and call a VM stub routine. 1309 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) { 1310 1311 // Note that the code buffer's insts_mark is always relative to insts. 1312 // That's why we must use the macroassembler to generate a handler. 1313 address base = __ start_a_stub(size_exception_handler()); 1314 if (base == nullptr) { 1315 ciEnv::current()->record_failure("CodeCache is full"); 1316 return 0; // CodeBuffer::expand failed 1317 } 1318 int offset = __ offset(); 1319 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1320 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1321 __ end_a_stub(); 1322 return offset; 1323 } 1324 1325 // Emit deopt handler code. 1326 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 address base = __ start_a_stub(size_deopt_handler()); 1331 if (base == nullptr) { 1332 ciEnv::current()->record_failure("CodeCache is full"); 1333 return 0; // CodeBuffer::expand failed 1334 } 1335 int offset = __ offset(); 1336 1337 #ifdef _LP64 1338 address the_pc = (address) __ pc(); 1339 Label next; 1340 // push a "the_pc" on the stack without destroying any registers 1341 // as they all may be live. 1342 1343 // push address of "next" 1344 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1345 __ bind(next); 1346 // adjust it so it matches "the_pc" 1347 __ subptr(Address(rsp, 0), __ offset() - offset); 1348 #else 1349 InternalAddress here(__ pc()); 1350 __ pushptr(here.addr(), noreg); 1351 #endif 1352 1353 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1354 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1355 __ end_a_stub(); 1356 return offset; 1357 } 1358 1359 static Assembler::Width widthForType(BasicType bt) { 1360 if (bt == T_BYTE) { 1361 return Assembler::B; 1362 } else if (bt == T_SHORT) { 1363 return Assembler::W; 1364 } else if (bt == T_INT) { 1365 return Assembler::D; 1366 } else { 1367 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1368 return Assembler::Q; 1369 } 1370 } 1371 1372 //============================================================================= 1373 1374 // Float masks come from different places depending on platform. 1375 #ifdef _LP64 1376 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1377 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1378 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1379 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1380 #else 1381 static address float_signmask() { return (address)float_signmask_pool; } 1382 static address float_signflip() { return (address)float_signflip_pool; } 1383 static address double_signmask() { return (address)double_signmask_pool; } 1384 static address double_signflip() { return (address)double_signflip_pool; } 1385 #endif 1386 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1387 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1388 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1389 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1390 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1391 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1392 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1393 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1394 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1395 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1396 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1397 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1398 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1399 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1400 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1401 1402 //============================================================================= 1403 bool Matcher::match_rule_supported(int opcode) { 1404 if (!has_match_rule(opcode)) { 1405 return false; // no match rule present 1406 } 1407 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1408 switch (opcode) { 1409 case Op_AbsVL: 1410 case Op_StoreVectorScatter: 1411 if (UseAVX < 3) { 1412 return false; 1413 } 1414 break; 1415 case Op_PopCountI: 1416 case Op_PopCountL: 1417 if (!UsePopCountInstruction) { 1418 return false; 1419 } 1420 break; 1421 case Op_PopCountVI: 1422 if (UseAVX < 2) { 1423 return false; 1424 } 1425 break; 1426 case Op_CompressV: 1427 case Op_ExpandV: 1428 case Op_PopCountVL: 1429 if (UseAVX < 2) { 1430 return false; 1431 } 1432 break; 1433 case Op_MulVI: 1434 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1435 return false; 1436 } 1437 break; 1438 case Op_MulVL: 1439 if (UseSSE < 4) { // only with SSE4_1 or AVX 1440 return false; 1441 } 1442 break; 1443 case Op_MulReductionVL: 1444 if (VM_Version::supports_avx512dq() == false) { 1445 return false; 1446 } 1447 break; 1448 case Op_AddReductionVL: 1449 if (UseSSE < 2) { // requires at least SSE2 1450 return false; 1451 } 1452 break; 1453 case Op_AbsVB: 1454 case Op_AbsVS: 1455 case Op_AbsVI: 1456 case Op_AddReductionVI: 1457 case Op_AndReductionV: 1458 case Op_OrReductionV: 1459 case Op_XorReductionV: 1460 if (UseSSE < 3) { // requires at least SSSE3 1461 return false; 1462 } 1463 break; 1464 case Op_VectorLoadShuffle: 1465 case Op_VectorRearrange: 1466 case Op_MulReductionVI: 1467 if (UseSSE < 4) { // requires at least SSE4 1468 return false; 1469 } 1470 break; 1471 case Op_IsInfiniteF: 1472 case Op_IsInfiniteD: 1473 if (!VM_Version::supports_avx512dq()) { 1474 return false; 1475 } 1476 break; 1477 case Op_SqrtVD: 1478 case Op_SqrtVF: 1479 case Op_VectorMaskCmp: 1480 case Op_VectorCastB2X: 1481 case Op_VectorCastS2X: 1482 case Op_VectorCastI2X: 1483 case Op_VectorCastL2X: 1484 case Op_VectorCastF2X: 1485 case Op_VectorCastD2X: 1486 case Op_VectorUCastB2X: 1487 case Op_VectorUCastS2X: 1488 case Op_VectorUCastI2X: 1489 case Op_VectorMaskCast: 1490 if (UseAVX < 1) { // enabled for AVX only 1491 return false; 1492 } 1493 break; 1494 case Op_PopulateIndex: 1495 if (!is_LP64 || (UseAVX < 2)) { 1496 return false; 1497 } 1498 break; 1499 case Op_RoundVF: 1500 if (UseAVX < 2) { // enabled for AVX2 only 1501 return false; 1502 } 1503 break; 1504 case Op_RoundVD: 1505 if (UseAVX < 3) { 1506 return false; // enabled for AVX3 only 1507 } 1508 break; 1509 case Op_CompareAndSwapL: 1510 #ifdef _LP64 1511 case Op_CompareAndSwapP: 1512 #endif 1513 break; 1514 case Op_StrIndexOf: 1515 if (!UseSSE42Intrinsics) { 1516 return false; 1517 } 1518 break; 1519 case Op_StrIndexOfChar: 1520 if (!UseSSE42Intrinsics) { 1521 return false; 1522 } 1523 break; 1524 case Op_OnSpinWait: 1525 if (VM_Version::supports_on_spin_wait() == false) { 1526 return false; 1527 } 1528 break; 1529 case Op_MulVB: 1530 case Op_LShiftVB: 1531 case Op_RShiftVB: 1532 case Op_URShiftVB: 1533 case Op_VectorInsert: 1534 case Op_VectorLoadMask: 1535 case Op_VectorStoreMask: 1536 case Op_VectorBlend: 1537 if (UseSSE < 4) { 1538 return false; 1539 } 1540 break; 1541 #ifdef _LP64 1542 case Op_MaxD: 1543 case Op_MaxF: 1544 case Op_MinD: 1545 case Op_MinF: 1546 if (UseAVX < 1) { // enabled for AVX only 1547 return false; 1548 } 1549 break; 1550 #endif 1551 case Op_CacheWB: 1552 case Op_CacheWBPreSync: 1553 case Op_CacheWBPostSync: 1554 if (!VM_Version::supports_data_cache_line_flush()) { 1555 return false; 1556 } 1557 break; 1558 case Op_ExtractB: 1559 case Op_ExtractL: 1560 case Op_ExtractI: 1561 case Op_RoundDoubleMode: 1562 if (UseSSE < 4) { 1563 return false; 1564 } 1565 break; 1566 case Op_RoundDoubleModeV: 1567 if (VM_Version::supports_avx() == false) { 1568 return false; // 128bit vroundpd is not available 1569 } 1570 break; 1571 case Op_LoadVectorGather: 1572 case Op_LoadVectorGatherMasked: 1573 if (UseAVX < 2) { 1574 return false; 1575 } 1576 break; 1577 case Op_FmaF: 1578 case Op_FmaD: 1579 case Op_FmaVD: 1580 case Op_FmaVF: 1581 if (!UseFMA) { 1582 return false; 1583 } 1584 break; 1585 case Op_MacroLogicV: 1586 if (UseAVX < 3 || !UseVectorMacroLogic) { 1587 return false; 1588 } 1589 break; 1590 1591 case Op_VectorCmpMasked: 1592 case Op_VectorMaskGen: 1593 if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { 1594 return false; 1595 } 1596 break; 1597 case Op_VectorMaskFirstTrue: 1598 case Op_VectorMaskLastTrue: 1599 case Op_VectorMaskTrueCount: 1600 case Op_VectorMaskToLong: 1601 if (!is_LP64 || UseAVX < 1) { 1602 return false; 1603 } 1604 break; 1605 case Op_RoundF: 1606 case Op_RoundD: 1607 if (!is_LP64) { 1608 return false; 1609 } 1610 break; 1611 case Op_CopySignD: 1612 case Op_CopySignF: 1613 if (UseAVX < 3 || !is_LP64) { 1614 return false; 1615 } 1616 if (!VM_Version::supports_avx512vl()) { 1617 return false; 1618 } 1619 break; 1620 #ifndef _LP64 1621 case Op_AddReductionVF: 1622 case Op_AddReductionVD: 1623 case Op_MulReductionVF: 1624 case Op_MulReductionVD: 1625 if (UseSSE < 1) { // requires at least SSE 1626 return false; 1627 } 1628 break; 1629 case Op_MulAddVS2VI: 1630 case Op_RShiftVL: 1631 case Op_AbsVD: 1632 case Op_NegVD: 1633 if (UseSSE < 2) { 1634 return false; 1635 } 1636 break; 1637 #endif // !LP64 1638 case Op_CompressBits: 1639 if (!VM_Version::supports_bmi2() || (!is_LP64 && UseSSE < 2)) { 1640 return false; 1641 } 1642 break; 1643 case Op_ExpandBits: 1644 if (!VM_Version::supports_bmi2() || (!is_LP64 && (UseSSE < 2 || !VM_Version::supports_bmi1()))) { 1645 return false; 1646 } 1647 break; 1648 case Op_SignumF: 1649 if (UseSSE < 1) { 1650 return false; 1651 } 1652 break; 1653 case Op_SignumD: 1654 if (UseSSE < 2) { 1655 return false; 1656 } 1657 break; 1658 case Op_CompressM: 1659 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1660 return false; 1661 } 1662 break; 1663 case Op_SqrtF: 1664 if (UseSSE < 1) { 1665 return false; 1666 } 1667 break; 1668 case Op_SqrtD: 1669 #ifdef _LP64 1670 if (UseSSE < 2) { 1671 return false; 1672 } 1673 #else 1674 // x86_32.ad has a special match rule for SqrtD. 1675 // Together with common x86 rules, this handles all UseSSE cases. 1676 #endif 1677 break; 1678 case Op_ConvF2HF: 1679 case Op_ConvHF2F: 1680 if (!VM_Version::supports_float16()) { 1681 return false; 1682 } 1683 break; 1684 case Op_VectorCastF2HF: 1685 case Op_VectorCastHF2F: 1686 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1687 return false; 1688 } 1689 break; 1690 } 1691 return true; // Match rules are supported by default. 1692 } 1693 1694 //------------------------------------------------------------------------ 1695 1696 static inline bool is_pop_count_instr_target(BasicType bt) { 1697 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1698 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1699 } 1700 1701 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) { 1702 return match_rule_supported_vector(opcode, vlen, bt); 1703 } 1704 1705 // Identify extra cases that we might want to provide match rules for vector nodes and 1706 // other intrinsics guarded with vector length (vlen) and element type (bt). 1707 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1708 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1709 if (!match_rule_supported(opcode)) { 1710 return false; 1711 } 1712 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1713 // * SSE2 supports 128bit vectors for all types; 1714 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1715 // * AVX2 supports 256bit vectors for all types; 1716 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1717 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1718 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1719 // And MaxVectorSize is taken into account as well. 1720 if (!vector_size_supported(bt, vlen)) { 1721 return false; 1722 } 1723 // Special cases which require vector length follow: 1724 // * implementation limitations 1725 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1726 // * 128bit vroundpd instruction is present only in AVX1 1727 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1728 switch (opcode) { 1729 case Op_AbsVF: 1730 case Op_NegVF: 1731 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1732 return false; // 512bit vandps and vxorps are not available 1733 } 1734 break; 1735 case Op_AbsVD: 1736 case Op_NegVD: 1737 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1738 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1739 } 1740 break; 1741 case Op_RotateRightV: 1742 case Op_RotateLeftV: 1743 if (bt != T_INT && bt != T_LONG) { 1744 return false; 1745 } // fallthrough 1746 case Op_MacroLogicV: 1747 if (!VM_Version::supports_evex() || 1748 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1749 return false; 1750 } 1751 break; 1752 case Op_ClearArray: 1753 case Op_VectorMaskGen: 1754 case Op_VectorCmpMasked: 1755 if (!is_LP64 || !VM_Version::supports_avx512bw()) { 1756 return false; 1757 } 1758 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1759 return false; 1760 } 1761 break; 1762 case Op_LoadVectorMasked: 1763 case Op_StoreVectorMasked: 1764 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1765 return false; 1766 } 1767 break; 1768 case Op_UMinV: 1769 case Op_UMaxV: 1770 if (UseAVX == 0) { 1771 return false; 1772 } 1773 break; 1774 case Op_MaxV: 1775 case Op_MinV: 1776 if (UseSSE < 4 && is_integral_type(bt)) { 1777 return false; 1778 } 1779 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1780 // Float/Double intrinsics are enabled for AVX family currently. 1781 if (UseAVX == 0) { 1782 return false; 1783 } 1784 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1785 return false; 1786 } 1787 } 1788 break; 1789 case Op_CallLeafVector: 1790 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1791 return false; 1792 } 1793 break; 1794 case Op_AddReductionVI: 1795 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1796 return false; 1797 } 1798 // fallthrough 1799 case Op_AndReductionV: 1800 case Op_OrReductionV: 1801 case Op_XorReductionV: 1802 if (is_subword_type(bt) && (UseSSE < 4)) { 1803 return false; 1804 } 1805 #ifndef _LP64 1806 if (bt == T_BYTE || bt == T_LONG) { 1807 return false; 1808 } 1809 #endif 1810 break; 1811 #ifndef _LP64 1812 case Op_VectorInsert: 1813 if (bt == T_LONG || bt == T_DOUBLE) { 1814 return false; 1815 } 1816 break; 1817 #endif 1818 case Op_MinReductionV: 1819 case Op_MaxReductionV: 1820 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1821 return false; 1822 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1823 return false; 1824 } 1825 // Float/Double intrinsics enabled for AVX family. 1826 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1827 return false; 1828 } 1829 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1830 return false; 1831 } 1832 #ifndef _LP64 1833 if (bt == T_BYTE || bt == T_LONG) { 1834 return false; 1835 } 1836 #endif 1837 break; 1838 case Op_VectorTest: 1839 if (UseSSE < 4) { 1840 return false; // Implementation limitation 1841 } else if (size_in_bits < 32) { 1842 return false; // Implementation limitation 1843 } 1844 break; 1845 case Op_VectorLoadShuffle: 1846 case Op_VectorRearrange: 1847 if(vlen == 2) { 1848 return false; // Implementation limitation due to how shuffle is loaded 1849 } else if (size_in_bits == 256 && UseAVX < 2) { 1850 return false; // Implementation limitation 1851 } 1852 break; 1853 case Op_VectorLoadMask: 1854 case Op_VectorMaskCast: 1855 if (size_in_bits == 256 && UseAVX < 2) { 1856 return false; // Implementation limitation 1857 } 1858 // fallthrough 1859 case Op_VectorStoreMask: 1860 if (vlen == 2) { 1861 return false; // Implementation limitation 1862 } 1863 break; 1864 case Op_PopulateIndex: 1865 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1866 return false; 1867 } 1868 break; 1869 case Op_VectorCastB2X: 1870 case Op_VectorCastS2X: 1871 case Op_VectorCastI2X: 1872 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1873 return false; 1874 } 1875 break; 1876 case Op_VectorCastL2X: 1877 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1878 return false; 1879 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1880 return false; 1881 } 1882 break; 1883 case Op_VectorCastF2X: { 1884 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1885 // happen after intermediate conversion to integer and special handling 1886 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1887 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1888 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1889 return false; 1890 } 1891 } 1892 // fallthrough 1893 case Op_VectorCastD2X: 1894 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1895 return false; 1896 } 1897 break; 1898 case Op_VectorCastF2HF: 1899 case Op_VectorCastHF2F: 1900 if (!VM_Version::supports_f16c() && 1901 ((!VM_Version::supports_evex() || 1902 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1903 return false; 1904 } 1905 break; 1906 case Op_RoundVD: 1907 if (!VM_Version::supports_avx512dq()) { 1908 return false; 1909 } 1910 break; 1911 case Op_MulReductionVI: 1912 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1913 return false; 1914 } 1915 break; 1916 case Op_LoadVectorGatherMasked: 1917 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1918 return false; 1919 } 1920 if (is_subword_type(bt) && 1921 (!is_LP64 || 1922 (size_in_bits > 256 && !VM_Version::supports_avx512bw()) || 1923 (size_in_bits < 64) || 1924 (bt == T_SHORT && !VM_Version::supports_bmi2()))) { 1925 return false; 1926 } 1927 break; 1928 case Op_StoreVectorScatterMasked: 1929 case Op_StoreVectorScatter: 1930 if (is_subword_type(bt)) { 1931 return false; 1932 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1933 return false; 1934 } 1935 // fallthrough 1936 case Op_LoadVectorGather: 1937 if (!is_subword_type(bt) && size_in_bits == 64) { 1938 return false; 1939 } 1940 if (is_subword_type(bt) && size_in_bits < 64) { 1941 return false; 1942 } 1943 break; 1944 case Op_SaturatingAddV: 1945 case Op_SaturatingSubV: 1946 if (UseAVX < 1) { 1947 return false; // Implementation limitation 1948 } 1949 if (is_subword_type(bt) && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1950 return false; 1951 } 1952 break; 1953 case Op_SelectFromTwoVector: 1954 if (size_in_bits < 128 || (size_in_bits < 512 && !VM_Version::supports_avx512vl())) { 1955 return false; 1956 } 1957 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 1958 return false; 1959 } 1960 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 1961 return false; 1962 } 1963 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) { 1964 return false; 1965 } 1966 break; 1967 case Op_MaskAll: 1968 if (!VM_Version::supports_evex()) { 1969 return false; 1970 } 1971 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1972 return false; 1973 } 1974 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1975 return false; 1976 } 1977 break; 1978 case Op_VectorMaskCmp: 1979 if (vlen < 2 || size_in_bits < 32) { 1980 return false; 1981 } 1982 break; 1983 case Op_CompressM: 1984 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1985 return false; 1986 } 1987 break; 1988 case Op_CompressV: 1989 case Op_ExpandV: 1990 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1991 return false; 1992 } 1993 if (!is_LP64 && !VM_Version::supports_avx512vl() && size_in_bits < 512) { 1994 return false; 1995 } 1996 if (size_in_bits < 128 ) { 1997 return false; 1998 } 1999 case Op_VectorLongToMask: 2000 if (UseAVX < 1 || !is_LP64) { 2001 return false; 2002 } 2003 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 2004 return false; 2005 } 2006 break; 2007 case Op_SignumVD: 2008 case Op_SignumVF: 2009 if (UseAVX < 1) { 2010 return false; 2011 } 2012 break; 2013 case Op_PopCountVI: 2014 case Op_PopCountVL: { 2015 if (!is_pop_count_instr_target(bt) && 2016 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 2017 return false; 2018 } 2019 } 2020 break; 2021 case Op_ReverseV: 2022 case Op_ReverseBytesV: 2023 if (UseAVX < 2) { 2024 return false; 2025 } 2026 break; 2027 case Op_CountTrailingZerosV: 2028 case Op_CountLeadingZerosV: 2029 if (UseAVX < 2) { 2030 return false; 2031 } 2032 break; 2033 } 2034 return true; // Per default match rules are supported. 2035 } 2036 2037 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 2038 // ADLC based match_rule_supported routine checks for the existence of pattern based 2039 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 2040 // of their non-masked counterpart with mask edge being the differentiator. 2041 // This routine does a strict check on the existence of masked operation patterns 2042 // by returning a default false value for all the other opcodes apart from the 2043 // ones whose masked instruction patterns are defined in this file. 2044 if (!match_rule_supported_vector(opcode, vlen, bt)) { 2045 return false; 2046 } 2047 2048 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 2049 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 2050 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 2051 return false; 2052 } 2053 switch(opcode) { 2054 // Unary masked operations 2055 case Op_AbsVB: 2056 case Op_AbsVS: 2057 if(!VM_Version::supports_avx512bw()) { 2058 return false; // Implementation limitation 2059 } 2060 case Op_AbsVI: 2061 case Op_AbsVL: 2062 return true; 2063 2064 // Ternary masked operations 2065 case Op_FmaVF: 2066 case Op_FmaVD: 2067 return true; 2068 2069 case Op_MacroLogicV: 2070 if(bt != T_INT && bt != T_LONG) { 2071 return false; 2072 } 2073 return true; 2074 2075 // Binary masked operations 2076 case Op_AddVB: 2077 case Op_AddVS: 2078 case Op_SubVB: 2079 case Op_SubVS: 2080 case Op_MulVS: 2081 case Op_LShiftVS: 2082 case Op_RShiftVS: 2083 case Op_URShiftVS: 2084 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2085 if (!VM_Version::supports_avx512bw()) { 2086 return false; // Implementation limitation 2087 } 2088 return true; 2089 2090 case Op_MulVL: 2091 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2092 if (!VM_Version::supports_avx512dq()) { 2093 return false; // Implementation limitation 2094 } 2095 return true; 2096 2097 case Op_AndV: 2098 case Op_OrV: 2099 case Op_XorV: 2100 case Op_RotateRightV: 2101 case Op_RotateLeftV: 2102 if (bt != T_INT && bt != T_LONG) { 2103 return false; // Implementation limitation 2104 } 2105 return true; 2106 2107 case Op_VectorLoadMask: 2108 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2109 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2110 return false; 2111 } 2112 return true; 2113 2114 case Op_AddVI: 2115 case Op_AddVL: 2116 case Op_AddVF: 2117 case Op_AddVD: 2118 case Op_SubVI: 2119 case Op_SubVL: 2120 case Op_SubVF: 2121 case Op_SubVD: 2122 case Op_MulVI: 2123 case Op_MulVF: 2124 case Op_MulVD: 2125 case Op_DivVF: 2126 case Op_DivVD: 2127 case Op_SqrtVF: 2128 case Op_SqrtVD: 2129 case Op_LShiftVI: 2130 case Op_LShiftVL: 2131 case Op_RShiftVI: 2132 case Op_RShiftVL: 2133 case Op_URShiftVI: 2134 case Op_URShiftVL: 2135 case Op_LoadVectorMasked: 2136 case Op_StoreVectorMasked: 2137 case Op_LoadVectorGatherMasked: 2138 case Op_StoreVectorScatterMasked: 2139 return true; 2140 2141 case Op_UMinV: 2142 case Op_UMaxV: 2143 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 2144 return false; 2145 } // fallthrough 2146 case Op_MaxV: 2147 case Op_MinV: 2148 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2149 return false; // Implementation limitation 2150 } 2151 if (is_floating_point_type(bt)) { 2152 return false; // Implementation limitation 2153 } 2154 return true; 2155 case Op_SaturatingAddV: 2156 case Op_SaturatingSubV: 2157 if (!is_subword_type(bt)) { 2158 return false; 2159 } 2160 if (size_in_bits < 128 || !VM_Version::supports_avx512bw()) { 2161 return false; // Implementation limitation 2162 } 2163 return true; 2164 2165 case Op_VectorMaskCmp: 2166 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2167 return false; // Implementation limitation 2168 } 2169 return true; 2170 2171 case Op_VectorRearrange: 2172 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2173 return false; // Implementation limitation 2174 } 2175 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2176 return false; // Implementation limitation 2177 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2178 return false; // Implementation limitation 2179 } 2180 return true; 2181 2182 // Binary Logical operations 2183 case Op_AndVMask: 2184 case Op_OrVMask: 2185 case Op_XorVMask: 2186 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2187 return false; // Implementation limitation 2188 } 2189 return true; 2190 2191 case Op_PopCountVI: 2192 case Op_PopCountVL: 2193 if (!is_pop_count_instr_target(bt)) { 2194 return false; 2195 } 2196 return true; 2197 2198 case Op_MaskAll: 2199 return true; 2200 2201 case Op_CountLeadingZerosV: 2202 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2203 return true; 2204 } 2205 default: 2206 return false; 2207 } 2208 } 2209 2210 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2211 return false; 2212 } 2213 2214 // Return true if Vector::rearrange needs preparation of the shuffle argument 2215 bool Matcher::vector_rearrange_requires_load_shuffle(BasicType elem_bt, int vlen) { 2216 switch (elem_bt) { 2217 case T_BYTE: return false; 2218 case T_SHORT: return !VM_Version::supports_avx512bw(); 2219 case T_INT: return !VM_Version::supports_avx(); 2220 case T_LONG: return vlen < 8 && !VM_Version::supports_avx512vl(); 2221 default: 2222 ShouldNotReachHere(); 2223 return false; 2224 } 2225 } 2226 2227 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2228 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2229 bool legacy = (generic_opnd->opcode() == LEGVEC); 2230 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2231 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2232 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2233 return new legVecZOper(); 2234 } 2235 if (legacy) { 2236 switch (ideal_reg) { 2237 case Op_VecS: return new legVecSOper(); 2238 case Op_VecD: return new legVecDOper(); 2239 case Op_VecX: return new legVecXOper(); 2240 case Op_VecY: return new legVecYOper(); 2241 case Op_VecZ: return new legVecZOper(); 2242 } 2243 } else { 2244 switch (ideal_reg) { 2245 case Op_VecS: return new vecSOper(); 2246 case Op_VecD: return new vecDOper(); 2247 case Op_VecX: return new vecXOper(); 2248 case Op_VecY: return new vecYOper(); 2249 case Op_VecZ: return new vecZOper(); 2250 } 2251 } 2252 ShouldNotReachHere(); 2253 return nullptr; 2254 } 2255 2256 bool Matcher::is_reg2reg_move(MachNode* m) { 2257 switch (m->rule()) { 2258 case MoveVec2Leg_rule: 2259 case MoveLeg2Vec_rule: 2260 case MoveF2VL_rule: 2261 case MoveF2LEG_rule: 2262 case MoveVL2F_rule: 2263 case MoveLEG2F_rule: 2264 case MoveD2VL_rule: 2265 case MoveD2LEG_rule: 2266 case MoveVL2D_rule: 2267 case MoveLEG2D_rule: 2268 return true; 2269 default: 2270 return false; 2271 } 2272 } 2273 2274 bool Matcher::is_generic_vector(MachOper* opnd) { 2275 switch (opnd->opcode()) { 2276 case VEC: 2277 case LEGVEC: 2278 return true; 2279 default: 2280 return false; 2281 } 2282 } 2283 2284 //------------------------------------------------------------------------ 2285 2286 const RegMask* Matcher::predicate_reg_mask(void) { 2287 return &_VECTMASK_REG_mask; 2288 } 2289 2290 // Max vector size in bytes. 0 if not supported. 2291 int Matcher::vector_width_in_bytes(BasicType bt) { 2292 assert(is_java_primitive(bt), "only primitive type vectors"); 2293 if (UseSSE < 2) return 0; 2294 // SSE2 supports 128bit vectors for all types. 2295 // AVX2 supports 256bit vectors for all types. 2296 // AVX2/EVEX supports 512bit vectors for all types. 2297 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2298 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2299 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2300 size = (UseAVX > 2) ? 64 : 32; 2301 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2302 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2303 // Use flag to limit vector size. 2304 size = MIN2(size,(int)MaxVectorSize); 2305 // Minimum 2 values in vector (or 4 for bytes). 2306 switch (bt) { 2307 case T_DOUBLE: 2308 case T_LONG: 2309 if (size < 16) return 0; 2310 break; 2311 case T_FLOAT: 2312 case T_INT: 2313 if (size < 8) return 0; 2314 break; 2315 case T_BOOLEAN: 2316 if (size < 4) return 0; 2317 break; 2318 case T_CHAR: 2319 if (size < 4) return 0; 2320 break; 2321 case T_BYTE: 2322 if (size < 4) return 0; 2323 break; 2324 case T_SHORT: 2325 if (size < 4) return 0; 2326 break; 2327 default: 2328 ShouldNotReachHere(); 2329 } 2330 return size; 2331 } 2332 2333 // Limits on vector size (number of elements) loaded into vector. 2334 int Matcher::max_vector_size(const BasicType bt) { 2335 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2336 } 2337 int Matcher::min_vector_size(const BasicType bt) { 2338 int max_size = max_vector_size(bt); 2339 // Min size which can be loaded into vector is 4 bytes. 2340 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2341 // Support for calling svml double64 vectors 2342 if (bt == T_DOUBLE) { 2343 size = 1; 2344 } 2345 return MIN2(size,max_size); 2346 } 2347 2348 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) { 2349 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2350 // by default on Cascade Lake 2351 if (VM_Version::is_default_intel_cascade_lake()) { 2352 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2353 } 2354 return Matcher::max_vector_size(bt); 2355 } 2356 2357 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2358 return -1; 2359 } 2360 2361 // Vector ideal reg corresponding to specified size in bytes 2362 uint Matcher::vector_ideal_reg(int size) { 2363 assert(MaxVectorSize >= size, ""); 2364 switch(size) { 2365 case 4: return Op_VecS; 2366 case 8: return Op_VecD; 2367 case 16: return Op_VecX; 2368 case 32: return Op_VecY; 2369 case 64: return Op_VecZ; 2370 } 2371 ShouldNotReachHere(); 2372 return 0; 2373 } 2374 2375 // Check for shift by small constant as well 2376 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2377 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2378 shift->in(2)->get_int() <= 3 && 2379 // Are there other uses besides address expressions? 2380 !matcher->is_visited(shift)) { 2381 address_visited.set(shift->_idx); // Flag as address_visited 2382 mstack.push(shift->in(2), Matcher::Visit); 2383 Node *conv = shift->in(1); 2384 #ifdef _LP64 2385 // Allow Matcher to match the rule which bypass 2386 // ConvI2L operation for an array index on LP64 2387 // if the index value is positive. 2388 if (conv->Opcode() == Op_ConvI2L && 2389 conv->as_Type()->type()->is_long()->_lo >= 0 && 2390 // Are there other uses besides address expressions? 2391 !matcher->is_visited(conv)) { 2392 address_visited.set(conv->_idx); // Flag as address_visited 2393 mstack.push(conv->in(1), Matcher::Pre_Visit); 2394 } else 2395 #endif 2396 mstack.push(conv, Matcher::Pre_Visit); 2397 return true; 2398 } 2399 return false; 2400 } 2401 2402 // This function identifies sub-graphs in which a 'load' node is 2403 // input to two different nodes, and such that it can be matched 2404 // with BMI instructions like blsi, blsr, etc. 2405 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2406 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2407 // refers to the same node. 2408 // 2409 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2410 // This is a temporary solution until we make DAGs expressible in ADL. 2411 template<typename ConType> 2412 class FusedPatternMatcher { 2413 Node* _op1_node; 2414 Node* _mop_node; 2415 int _con_op; 2416 2417 static int match_next(Node* n, int next_op, int next_op_idx) { 2418 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2419 return -1; 2420 } 2421 2422 if (next_op_idx == -1) { // n is commutative, try rotations 2423 if (n->in(1)->Opcode() == next_op) { 2424 return 1; 2425 } else if (n->in(2)->Opcode() == next_op) { 2426 return 2; 2427 } 2428 } else { 2429 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2430 if (n->in(next_op_idx)->Opcode() == next_op) { 2431 return next_op_idx; 2432 } 2433 } 2434 return -1; 2435 } 2436 2437 public: 2438 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2439 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2440 2441 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2442 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2443 typename ConType::NativeType con_value) { 2444 if (_op1_node->Opcode() != op1) { 2445 return false; 2446 } 2447 if (_mop_node->outcnt() > 2) { 2448 return false; 2449 } 2450 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2451 if (op1_op2_idx == -1) { 2452 return false; 2453 } 2454 // Memory operation must be the other edge 2455 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2456 2457 // Check that the mop node is really what we want 2458 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2459 Node* op2_node = _op1_node->in(op1_op2_idx); 2460 if (op2_node->outcnt() > 1) { 2461 return false; 2462 } 2463 assert(op2_node->Opcode() == op2, "Should be"); 2464 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2465 if (op2_con_idx == -1) { 2466 return false; 2467 } 2468 // Memory operation must be the other edge 2469 int op2_mop_idx = (op2_con_idx & 1) + 1; 2470 // Check that the memory operation is the same node 2471 if (op2_node->in(op2_mop_idx) == _mop_node) { 2472 // Now check the constant 2473 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2474 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2475 return true; 2476 } 2477 } 2478 } 2479 return false; 2480 } 2481 }; 2482 2483 static bool is_bmi_pattern(Node* n, Node* m) { 2484 assert(UseBMI1Instructions, "sanity"); 2485 if (n != nullptr && m != nullptr) { 2486 if (m->Opcode() == Op_LoadI) { 2487 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2488 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2489 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2490 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2491 } else if (m->Opcode() == Op_LoadL) { 2492 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2493 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2494 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2495 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2496 } 2497 } 2498 return false; 2499 } 2500 2501 // Should the matcher clone input 'm' of node 'n'? 2502 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2503 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2504 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2505 mstack.push(m, Visit); 2506 return true; 2507 } 2508 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2509 mstack.push(m, Visit); // m = ShiftCntV 2510 return true; 2511 } 2512 if (is_encode_and_store_pattern(n, m)) { 2513 mstack.push(m, Visit); 2514 return true; 2515 } 2516 return false; 2517 } 2518 2519 // Should the Matcher clone shifts on addressing modes, expecting them 2520 // to be subsumed into complex addressing expressions or compute them 2521 // into registers? 2522 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2523 Node *off = m->in(AddPNode::Offset); 2524 if (off->is_Con()) { 2525 address_visited.test_set(m->_idx); // Flag as address_visited 2526 Node *adr = m->in(AddPNode::Address); 2527 2528 // Intel can handle 2 adds in addressing mode, with one of them using an immediate offset. 2529 // AtomicAdd is not an addressing expression. 2530 // Cheap to find it by looking for screwy base. 2531 if (adr->is_AddP() && 2532 !adr->in(AddPNode::Base)->is_top() && 2533 !adr->in(AddPNode::Offset)->is_Con() && 2534 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2535 // Are there other uses besides address expressions? 2536 !is_visited(adr)) { 2537 address_visited.set(adr->_idx); // Flag as address_visited 2538 Node *shift = adr->in(AddPNode::Offset); 2539 if (!clone_shift(shift, this, mstack, address_visited)) { 2540 mstack.push(shift, Pre_Visit); 2541 } 2542 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2543 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2544 } else { 2545 mstack.push(adr, Pre_Visit); 2546 } 2547 2548 // Clone X+offset as it also folds into most addressing expressions 2549 mstack.push(off, Visit); 2550 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2551 return true; 2552 } else if (clone_shift(off, this, mstack, address_visited)) { 2553 address_visited.test_set(m->_idx); // Flag as address_visited 2554 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2555 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2556 return true; 2557 } 2558 return false; 2559 } 2560 2561 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2562 switch (bt) { 2563 case BoolTest::eq: 2564 return Assembler::eq; 2565 case BoolTest::ne: 2566 return Assembler::neq; 2567 case BoolTest::le: 2568 case BoolTest::ule: 2569 return Assembler::le; 2570 case BoolTest::ge: 2571 case BoolTest::uge: 2572 return Assembler::nlt; 2573 case BoolTest::lt: 2574 case BoolTest::ult: 2575 return Assembler::lt; 2576 case BoolTest::gt: 2577 case BoolTest::ugt: 2578 return Assembler::nle; 2579 default : ShouldNotReachHere(); return Assembler::_false; 2580 } 2581 } 2582 2583 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2584 switch (bt) { 2585 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2586 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2587 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2588 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2589 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2590 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2591 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2592 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2593 } 2594 } 2595 2596 // Helper methods for MachSpillCopyNode::implementation(). 2597 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 2598 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2599 assert(ireg == Op_VecS || // 32bit vector 2600 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2601 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi), 2602 "no non-adjacent vector moves" ); 2603 if (masm) { 2604 switch (ireg) { 2605 case Op_VecS: // copy whole register 2606 case Op_VecD: 2607 case Op_VecX: 2608 #ifndef _LP64 2609 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2610 #else 2611 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2612 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2613 } else { 2614 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2615 } 2616 #endif 2617 break; 2618 case Op_VecY: 2619 #ifndef _LP64 2620 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2621 #else 2622 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2623 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2624 } else { 2625 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2626 } 2627 #endif 2628 break; 2629 case Op_VecZ: 2630 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2631 break; 2632 default: 2633 ShouldNotReachHere(); 2634 } 2635 #ifndef PRODUCT 2636 } else { 2637 switch (ireg) { 2638 case Op_VecS: 2639 case Op_VecD: 2640 case Op_VecX: 2641 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2642 break; 2643 case Op_VecY: 2644 case Op_VecZ: 2645 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2646 break; 2647 default: 2648 ShouldNotReachHere(); 2649 } 2650 #endif 2651 } 2652 } 2653 2654 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 2655 int stack_offset, int reg, uint ireg, outputStream* st) { 2656 if (masm) { 2657 if (is_load) { 2658 switch (ireg) { 2659 case Op_VecS: 2660 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2661 break; 2662 case Op_VecD: 2663 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2664 break; 2665 case Op_VecX: 2666 #ifndef _LP64 2667 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2668 #else 2669 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2670 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2671 } else { 2672 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2673 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2674 } 2675 #endif 2676 break; 2677 case Op_VecY: 2678 #ifndef _LP64 2679 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2680 #else 2681 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2682 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2683 } else { 2684 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2685 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2686 } 2687 #endif 2688 break; 2689 case Op_VecZ: 2690 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2691 break; 2692 default: 2693 ShouldNotReachHere(); 2694 } 2695 } else { // store 2696 switch (ireg) { 2697 case Op_VecS: 2698 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2699 break; 2700 case Op_VecD: 2701 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2702 break; 2703 case Op_VecX: 2704 #ifndef _LP64 2705 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2706 #else 2707 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2708 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2709 } 2710 else { 2711 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2712 } 2713 #endif 2714 break; 2715 case Op_VecY: 2716 #ifndef _LP64 2717 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2718 #else 2719 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2720 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2721 } 2722 else { 2723 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2724 } 2725 #endif 2726 break; 2727 case Op_VecZ: 2728 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2729 break; 2730 default: 2731 ShouldNotReachHere(); 2732 } 2733 } 2734 #ifndef PRODUCT 2735 } else { 2736 if (is_load) { 2737 switch (ireg) { 2738 case Op_VecS: 2739 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2740 break; 2741 case Op_VecD: 2742 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2743 break; 2744 case Op_VecX: 2745 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2746 break; 2747 case Op_VecY: 2748 case Op_VecZ: 2749 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2750 break; 2751 default: 2752 ShouldNotReachHere(); 2753 } 2754 } else { // store 2755 switch (ireg) { 2756 case Op_VecS: 2757 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2758 break; 2759 case Op_VecD: 2760 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2761 break; 2762 case Op_VecX: 2763 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2764 break; 2765 case Op_VecY: 2766 case Op_VecZ: 2767 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2768 break; 2769 default: 2770 ShouldNotReachHere(); 2771 } 2772 } 2773 #endif 2774 } 2775 } 2776 2777 template <class T> 2778 static inline GrowableArray<jbyte>* vreplicate_imm(BasicType bt, T con, int len) { 2779 int size = type2aelembytes(bt) * len; 2780 GrowableArray<jbyte>* val = new GrowableArray<jbyte>(size, size, 0); 2781 for (int i = 0; i < len; i++) { 2782 int offset = i * type2aelembytes(bt); 2783 switch (bt) { 2784 case T_BYTE: val->at(i) = con; break; 2785 case T_SHORT: { 2786 jshort c = con; 2787 memcpy(val->adr_at(offset), &c, sizeof(jshort)); 2788 break; 2789 } 2790 case T_INT: { 2791 jint c = con; 2792 memcpy(val->adr_at(offset), &c, sizeof(jint)); 2793 break; 2794 } 2795 case T_LONG: { 2796 jlong c = con; 2797 memcpy(val->adr_at(offset), &c, sizeof(jlong)); 2798 break; 2799 } 2800 case T_FLOAT: { 2801 jfloat c = con; 2802 memcpy(val->adr_at(offset), &c, sizeof(jfloat)); 2803 break; 2804 } 2805 case T_DOUBLE: { 2806 jdouble c = con; 2807 memcpy(val->adr_at(offset), &c, sizeof(jdouble)); 2808 break; 2809 } 2810 default: assert(false, "%s", type2name(bt)); 2811 } 2812 } 2813 return val; 2814 } 2815 2816 static inline jlong high_bit_set(BasicType bt) { 2817 switch (bt) { 2818 case T_BYTE: return 0x8080808080808080; 2819 case T_SHORT: return 0x8000800080008000; 2820 case T_INT: return 0x8000000080000000; 2821 case T_LONG: return 0x8000000000000000; 2822 default: 2823 ShouldNotReachHere(); 2824 return 0; 2825 } 2826 } 2827 2828 #ifndef PRODUCT 2829 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2830 st->print("nop \t# %d bytes pad for loops and calls", _count); 2831 } 2832 #endif 2833 2834 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const { 2835 __ nop(_count); 2836 } 2837 2838 uint MachNopNode::size(PhaseRegAlloc*) const { 2839 return _count; 2840 } 2841 2842 #ifndef PRODUCT 2843 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2844 st->print("# breakpoint"); 2845 } 2846 #endif 2847 2848 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const { 2849 __ int3(); 2850 } 2851 2852 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2853 return MachNode::size(ra_); 2854 } 2855 2856 %} 2857 2858 encode %{ 2859 2860 enc_class call_epilog %{ 2861 if (VerifyStackAtCalls) { 2862 // Check that stack depth is unchanged: find majik cookie on stack 2863 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2864 Label L; 2865 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2866 __ jccb(Assembler::equal, L); 2867 // Die if stack mismatch 2868 __ int3(); 2869 __ bind(L); 2870 } 2871 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) { 2872 // The last return value is not set by the callee but used to pass IsInit information to compiled code. 2873 // Search for the corresponding projection, get the register and emit code that initialized it. 2874 uint con = (tf()->range_cc()->cnt() - 1); 2875 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { 2876 ProjNode* proj = fast_out(i)->as_Proj(); 2877 if (proj->_con == con) { 2878 // Set IsInit if rax is non-null (a non-null value is returned buffered or scalarized) 2879 OptoReg::Name optoReg = ra_->get_reg_first(proj); 2880 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP)); 2881 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1; 2882 __ testq(rax, rax); 2883 __ setb(Assembler::notZero, toReg); 2884 __ movzbl(toReg, toReg); 2885 if (reg->is_stack()) { 2886 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size; 2887 __ movq(Address(rsp, st_off), toReg); 2888 } 2889 break; 2890 } 2891 } 2892 if (return_value_is_used()) { 2893 // An inline type is returned as fields in multiple registers. 2894 // Rax either contains an oop if the inline type is buffered or a pointer 2895 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax 2896 // if the lowest bit is set to allow C2 to use the oop after null checking. 2897 // rax &= (rax & 1) - 1 2898 __ movptr(rscratch1, rax); 2899 __ andptr(rscratch1, 0x1); 2900 __ subptr(rscratch1, 0x1); 2901 __ andptr(rax, rscratch1); 2902 } 2903 } 2904 %} 2905 2906 %} 2907 2908 // Operands for bound floating pointer register arguments 2909 operand rxmm0() %{ 2910 constraint(ALLOC_IN_RC(xmm0_reg)); 2911 match(VecX); 2912 format%{%} 2913 interface(REG_INTER); 2914 %} 2915 2916 //----------OPERANDS----------------------------------------------------------- 2917 // Operand definitions must precede instruction definitions for correct parsing 2918 // in the ADLC because operands constitute user defined types which are used in 2919 // instruction definitions. 2920 2921 // Vectors 2922 2923 // Dummy generic vector class. Should be used for all vector operands. 2924 // Replaced with vec[SDXYZ] during post-selection pass. 2925 operand vec() %{ 2926 constraint(ALLOC_IN_RC(dynamic)); 2927 match(VecX); 2928 match(VecY); 2929 match(VecZ); 2930 match(VecS); 2931 match(VecD); 2932 2933 format %{ %} 2934 interface(REG_INTER); 2935 %} 2936 2937 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2938 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2939 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2940 // runtime code generation via reg_class_dynamic. 2941 operand legVec() %{ 2942 constraint(ALLOC_IN_RC(dynamic)); 2943 match(VecX); 2944 match(VecY); 2945 match(VecZ); 2946 match(VecS); 2947 match(VecD); 2948 2949 format %{ %} 2950 interface(REG_INTER); 2951 %} 2952 2953 // Replaces vec during post-selection cleanup. See above. 2954 operand vecS() %{ 2955 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2956 match(VecS); 2957 2958 format %{ %} 2959 interface(REG_INTER); 2960 %} 2961 2962 // Replaces legVec during post-selection cleanup. See above. 2963 operand legVecS() %{ 2964 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2965 match(VecS); 2966 2967 format %{ %} 2968 interface(REG_INTER); 2969 %} 2970 2971 // Replaces vec during post-selection cleanup. See above. 2972 operand vecD() %{ 2973 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2974 match(VecD); 2975 2976 format %{ %} 2977 interface(REG_INTER); 2978 %} 2979 2980 // Replaces legVec during post-selection cleanup. See above. 2981 operand legVecD() %{ 2982 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2983 match(VecD); 2984 2985 format %{ %} 2986 interface(REG_INTER); 2987 %} 2988 2989 // Replaces vec during post-selection cleanup. See above. 2990 operand vecX() %{ 2991 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2992 match(VecX); 2993 2994 format %{ %} 2995 interface(REG_INTER); 2996 %} 2997 2998 // Replaces legVec during post-selection cleanup. See above. 2999 operand legVecX() %{ 3000 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 3001 match(VecX); 3002 3003 format %{ %} 3004 interface(REG_INTER); 3005 %} 3006 3007 // Replaces vec during post-selection cleanup. See above. 3008 operand vecY() %{ 3009 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 3010 match(VecY); 3011 3012 format %{ %} 3013 interface(REG_INTER); 3014 %} 3015 3016 // Replaces legVec during post-selection cleanup. See above. 3017 operand legVecY() %{ 3018 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 3019 match(VecY); 3020 3021 format %{ %} 3022 interface(REG_INTER); 3023 %} 3024 3025 // Replaces vec during post-selection cleanup. See above. 3026 operand vecZ() %{ 3027 constraint(ALLOC_IN_RC(vectorz_reg)); 3028 match(VecZ); 3029 3030 format %{ %} 3031 interface(REG_INTER); 3032 %} 3033 3034 // Replaces legVec during post-selection cleanup. See above. 3035 operand legVecZ() %{ 3036 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 3037 match(VecZ); 3038 3039 format %{ %} 3040 interface(REG_INTER); 3041 %} 3042 3043 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 3044 3045 // ============================================================================ 3046 3047 instruct ShouldNotReachHere() %{ 3048 match(Halt); 3049 format %{ "stop\t# ShouldNotReachHere" %} 3050 ins_encode %{ 3051 if (is_reachable()) { 3052 __ stop(_halt_reason); 3053 } 3054 %} 3055 ins_pipe(pipe_slow); 3056 %} 3057 3058 // ============================================================================ 3059 3060 instruct addF_reg(regF dst, regF src) %{ 3061 predicate((UseSSE>=1) && (UseAVX == 0)); 3062 match(Set dst (AddF dst src)); 3063 3064 format %{ "addss $dst, $src" %} 3065 ins_cost(150); 3066 ins_encode %{ 3067 __ addss($dst$$XMMRegister, $src$$XMMRegister); 3068 %} 3069 ins_pipe(pipe_slow); 3070 %} 3071 3072 instruct addF_mem(regF dst, memory src) %{ 3073 predicate((UseSSE>=1) && (UseAVX == 0)); 3074 match(Set dst (AddF dst (LoadF src))); 3075 3076 format %{ "addss $dst, $src" %} 3077 ins_cost(150); 3078 ins_encode %{ 3079 __ addss($dst$$XMMRegister, $src$$Address); 3080 %} 3081 ins_pipe(pipe_slow); 3082 %} 3083 3084 instruct addF_imm(regF dst, immF con) %{ 3085 predicate((UseSSE>=1) && (UseAVX == 0)); 3086 match(Set dst (AddF dst con)); 3087 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3088 ins_cost(150); 3089 ins_encode %{ 3090 __ addss($dst$$XMMRegister, $constantaddress($con)); 3091 %} 3092 ins_pipe(pipe_slow); 3093 %} 3094 3095 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 3096 predicate(UseAVX > 0); 3097 match(Set dst (AddF src1 src2)); 3098 3099 format %{ "vaddss $dst, $src1, $src2" %} 3100 ins_cost(150); 3101 ins_encode %{ 3102 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3103 %} 3104 ins_pipe(pipe_slow); 3105 %} 3106 3107 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 3108 predicate(UseAVX > 0); 3109 match(Set dst (AddF src1 (LoadF src2))); 3110 3111 format %{ "vaddss $dst, $src1, $src2" %} 3112 ins_cost(150); 3113 ins_encode %{ 3114 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3115 %} 3116 ins_pipe(pipe_slow); 3117 %} 3118 3119 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 3120 predicate(UseAVX > 0); 3121 match(Set dst (AddF src con)); 3122 3123 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3124 ins_cost(150); 3125 ins_encode %{ 3126 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3127 %} 3128 ins_pipe(pipe_slow); 3129 %} 3130 3131 instruct addD_reg(regD dst, regD src) %{ 3132 predicate((UseSSE>=2) && (UseAVX == 0)); 3133 match(Set dst (AddD dst src)); 3134 3135 format %{ "addsd $dst, $src" %} 3136 ins_cost(150); 3137 ins_encode %{ 3138 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 3139 %} 3140 ins_pipe(pipe_slow); 3141 %} 3142 3143 instruct addD_mem(regD dst, memory src) %{ 3144 predicate((UseSSE>=2) && (UseAVX == 0)); 3145 match(Set dst (AddD dst (LoadD src))); 3146 3147 format %{ "addsd $dst, $src" %} 3148 ins_cost(150); 3149 ins_encode %{ 3150 __ addsd($dst$$XMMRegister, $src$$Address); 3151 %} 3152 ins_pipe(pipe_slow); 3153 %} 3154 3155 instruct addD_imm(regD dst, immD con) %{ 3156 predicate((UseSSE>=2) && (UseAVX == 0)); 3157 match(Set dst (AddD dst con)); 3158 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3159 ins_cost(150); 3160 ins_encode %{ 3161 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3162 %} 3163 ins_pipe(pipe_slow); 3164 %} 3165 3166 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3167 predicate(UseAVX > 0); 3168 match(Set dst (AddD src1 src2)); 3169 3170 format %{ "vaddsd $dst, $src1, $src2" %} 3171 ins_cost(150); 3172 ins_encode %{ 3173 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3174 %} 3175 ins_pipe(pipe_slow); 3176 %} 3177 3178 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3179 predicate(UseAVX > 0); 3180 match(Set dst (AddD src1 (LoadD src2))); 3181 3182 format %{ "vaddsd $dst, $src1, $src2" %} 3183 ins_cost(150); 3184 ins_encode %{ 3185 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3186 %} 3187 ins_pipe(pipe_slow); 3188 %} 3189 3190 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3191 predicate(UseAVX > 0); 3192 match(Set dst (AddD src con)); 3193 3194 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3195 ins_cost(150); 3196 ins_encode %{ 3197 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3198 %} 3199 ins_pipe(pipe_slow); 3200 %} 3201 3202 instruct subF_reg(regF dst, regF src) %{ 3203 predicate((UseSSE>=1) && (UseAVX == 0)); 3204 match(Set dst (SubF dst src)); 3205 3206 format %{ "subss $dst, $src" %} 3207 ins_cost(150); 3208 ins_encode %{ 3209 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3210 %} 3211 ins_pipe(pipe_slow); 3212 %} 3213 3214 instruct subF_mem(regF dst, memory src) %{ 3215 predicate((UseSSE>=1) && (UseAVX == 0)); 3216 match(Set dst (SubF dst (LoadF src))); 3217 3218 format %{ "subss $dst, $src" %} 3219 ins_cost(150); 3220 ins_encode %{ 3221 __ subss($dst$$XMMRegister, $src$$Address); 3222 %} 3223 ins_pipe(pipe_slow); 3224 %} 3225 3226 instruct subF_imm(regF dst, immF con) %{ 3227 predicate((UseSSE>=1) && (UseAVX == 0)); 3228 match(Set dst (SubF dst con)); 3229 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3230 ins_cost(150); 3231 ins_encode %{ 3232 __ subss($dst$$XMMRegister, $constantaddress($con)); 3233 %} 3234 ins_pipe(pipe_slow); 3235 %} 3236 3237 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3238 predicate(UseAVX > 0); 3239 match(Set dst (SubF src1 src2)); 3240 3241 format %{ "vsubss $dst, $src1, $src2" %} 3242 ins_cost(150); 3243 ins_encode %{ 3244 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3245 %} 3246 ins_pipe(pipe_slow); 3247 %} 3248 3249 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3250 predicate(UseAVX > 0); 3251 match(Set dst (SubF src1 (LoadF src2))); 3252 3253 format %{ "vsubss $dst, $src1, $src2" %} 3254 ins_cost(150); 3255 ins_encode %{ 3256 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3257 %} 3258 ins_pipe(pipe_slow); 3259 %} 3260 3261 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3262 predicate(UseAVX > 0); 3263 match(Set dst (SubF src con)); 3264 3265 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3266 ins_cost(150); 3267 ins_encode %{ 3268 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3269 %} 3270 ins_pipe(pipe_slow); 3271 %} 3272 3273 instruct subD_reg(regD dst, regD src) %{ 3274 predicate((UseSSE>=2) && (UseAVX == 0)); 3275 match(Set dst (SubD dst src)); 3276 3277 format %{ "subsd $dst, $src" %} 3278 ins_cost(150); 3279 ins_encode %{ 3280 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3281 %} 3282 ins_pipe(pipe_slow); 3283 %} 3284 3285 instruct subD_mem(regD dst, memory src) %{ 3286 predicate((UseSSE>=2) && (UseAVX == 0)); 3287 match(Set dst (SubD dst (LoadD src))); 3288 3289 format %{ "subsd $dst, $src" %} 3290 ins_cost(150); 3291 ins_encode %{ 3292 __ subsd($dst$$XMMRegister, $src$$Address); 3293 %} 3294 ins_pipe(pipe_slow); 3295 %} 3296 3297 instruct subD_imm(regD dst, immD con) %{ 3298 predicate((UseSSE>=2) && (UseAVX == 0)); 3299 match(Set dst (SubD dst con)); 3300 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3301 ins_cost(150); 3302 ins_encode %{ 3303 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3304 %} 3305 ins_pipe(pipe_slow); 3306 %} 3307 3308 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3309 predicate(UseAVX > 0); 3310 match(Set dst (SubD src1 src2)); 3311 3312 format %{ "vsubsd $dst, $src1, $src2" %} 3313 ins_cost(150); 3314 ins_encode %{ 3315 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3316 %} 3317 ins_pipe(pipe_slow); 3318 %} 3319 3320 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3321 predicate(UseAVX > 0); 3322 match(Set dst (SubD src1 (LoadD src2))); 3323 3324 format %{ "vsubsd $dst, $src1, $src2" %} 3325 ins_cost(150); 3326 ins_encode %{ 3327 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3328 %} 3329 ins_pipe(pipe_slow); 3330 %} 3331 3332 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3333 predicate(UseAVX > 0); 3334 match(Set dst (SubD src con)); 3335 3336 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3337 ins_cost(150); 3338 ins_encode %{ 3339 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3340 %} 3341 ins_pipe(pipe_slow); 3342 %} 3343 3344 instruct mulF_reg(regF dst, regF src) %{ 3345 predicate((UseSSE>=1) && (UseAVX == 0)); 3346 match(Set dst (MulF dst src)); 3347 3348 format %{ "mulss $dst, $src" %} 3349 ins_cost(150); 3350 ins_encode %{ 3351 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3352 %} 3353 ins_pipe(pipe_slow); 3354 %} 3355 3356 instruct mulF_mem(regF dst, memory src) %{ 3357 predicate((UseSSE>=1) && (UseAVX == 0)); 3358 match(Set dst (MulF dst (LoadF src))); 3359 3360 format %{ "mulss $dst, $src" %} 3361 ins_cost(150); 3362 ins_encode %{ 3363 __ mulss($dst$$XMMRegister, $src$$Address); 3364 %} 3365 ins_pipe(pipe_slow); 3366 %} 3367 3368 instruct mulF_imm(regF dst, immF con) %{ 3369 predicate((UseSSE>=1) && (UseAVX == 0)); 3370 match(Set dst (MulF dst con)); 3371 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3372 ins_cost(150); 3373 ins_encode %{ 3374 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3375 %} 3376 ins_pipe(pipe_slow); 3377 %} 3378 3379 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3380 predicate(UseAVX > 0); 3381 match(Set dst (MulF src1 src2)); 3382 3383 format %{ "vmulss $dst, $src1, $src2" %} 3384 ins_cost(150); 3385 ins_encode %{ 3386 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3387 %} 3388 ins_pipe(pipe_slow); 3389 %} 3390 3391 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3392 predicate(UseAVX > 0); 3393 match(Set dst (MulF src1 (LoadF src2))); 3394 3395 format %{ "vmulss $dst, $src1, $src2" %} 3396 ins_cost(150); 3397 ins_encode %{ 3398 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3399 %} 3400 ins_pipe(pipe_slow); 3401 %} 3402 3403 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3404 predicate(UseAVX > 0); 3405 match(Set dst (MulF src con)); 3406 3407 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3408 ins_cost(150); 3409 ins_encode %{ 3410 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3411 %} 3412 ins_pipe(pipe_slow); 3413 %} 3414 3415 instruct mulD_reg(regD dst, regD src) %{ 3416 predicate((UseSSE>=2) && (UseAVX == 0)); 3417 match(Set dst (MulD dst src)); 3418 3419 format %{ "mulsd $dst, $src" %} 3420 ins_cost(150); 3421 ins_encode %{ 3422 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3423 %} 3424 ins_pipe(pipe_slow); 3425 %} 3426 3427 instruct mulD_mem(regD dst, memory src) %{ 3428 predicate((UseSSE>=2) && (UseAVX == 0)); 3429 match(Set dst (MulD dst (LoadD src))); 3430 3431 format %{ "mulsd $dst, $src" %} 3432 ins_cost(150); 3433 ins_encode %{ 3434 __ mulsd($dst$$XMMRegister, $src$$Address); 3435 %} 3436 ins_pipe(pipe_slow); 3437 %} 3438 3439 instruct mulD_imm(regD dst, immD con) %{ 3440 predicate((UseSSE>=2) && (UseAVX == 0)); 3441 match(Set dst (MulD dst con)); 3442 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3443 ins_cost(150); 3444 ins_encode %{ 3445 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3446 %} 3447 ins_pipe(pipe_slow); 3448 %} 3449 3450 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3451 predicate(UseAVX > 0); 3452 match(Set dst (MulD src1 src2)); 3453 3454 format %{ "vmulsd $dst, $src1, $src2" %} 3455 ins_cost(150); 3456 ins_encode %{ 3457 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3458 %} 3459 ins_pipe(pipe_slow); 3460 %} 3461 3462 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3463 predicate(UseAVX > 0); 3464 match(Set dst (MulD src1 (LoadD src2))); 3465 3466 format %{ "vmulsd $dst, $src1, $src2" %} 3467 ins_cost(150); 3468 ins_encode %{ 3469 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3470 %} 3471 ins_pipe(pipe_slow); 3472 %} 3473 3474 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3475 predicate(UseAVX > 0); 3476 match(Set dst (MulD src con)); 3477 3478 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3479 ins_cost(150); 3480 ins_encode %{ 3481 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3482 %} 3483 ins_pipe(pipe_slow); 3484 %} 3485 3486 instruct divF_reg(regF dst, regF src) %{ 3487 predicate((UseSSE>=1) && (UseAVX == 0)); 3488 match(Set dst (DivF dst src)); 3489 3490 format %{ "divss $dst, $src" %} 3491 ins_cost(150); 3492 ins_encode %{ 3493 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3494 %} 3495 ins_pipe(pipe_slow); 3496 %} 3497 3498 instruct divF_mem(regF dst, memory src) %{ 3499 predicate((UseSSE>=1) && (UseAVX == 0)); 3500 match(Set dst (DivF dst (LoadF src))); 3501 3502 format %{ "divss $dst, $src" %} 3503 ins_cost(150); 3504 ins_encode %{ 3505 __ divss($dst$$XMMRegister, $src$$Address); 3506 %} 3507 ins_pipe(pipe_slow); 3508 %} 3509 3510 instruct divF_imm(regF dst, immF con) %{ 3511 predicate((UseSSE>=1) && (UseAVX == 0)); 3512 match(Set dst (DivF dst con)); 3513 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3514 ins_cost(150); 3515 ins_encode %{ 3516 __ divss($dst$$XMMRegister, $constantaddress($con)); 3517 %} 3518 ins_pipe(pipe_slow); 3519 %} 3520 3521 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3522 predicate(UseAVX > 0); 3523 match(Set dst (DivF src1 src2)); 3524 3525 format %{ "vdivss $dst, $src1, $src2" %} 3526 ins_cost(150); 3527 ins_encode %{ 3528 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3529 %} 3530 ins_pipe(pipe_slow); 3531 %} 3532 3533 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3534 predicate(UseAVX > 0); 3535 match(Set dst (DivF src1 (LoadF src2))); 3536 3537 format %{ "vdivss $dst, $src1, $src2" %} 3538 ins_cost(150); 3539 ins_encode %{ 3540 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3541 %} 3542 ins_pipe(pipe_slow); 3543 %} 3544 3545 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3546 predicate(UseAVX > 0); 3547 match(Set dst (DivF src con)); 3548 3549 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3550 ins_cost(150); 3551 ins_encode %{ 3552 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3553 %} 3554 ins_pipe(pipe_slow); 3555 %} 3556 3557 instruct divD_reg(regD dst, regD src) %{ 3558 predicate((UseSSE>=2) && (UseAVX == 0)); 3559 match(Set dst (DivD dst src)); 3560 3561 format %{ "divsd $dst, $src" %} 3562 ins_cost(150); 3563 ins_encode %{ 3564 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3565 %} 3566 ins_pipe(pipe_slow); 3567 %} 3568 3569 instruct divD_mem(regD dst, memory src) %{ 3570 predicate((UseSSE>=2) && (UseAVX == 0)); 3571 match(Set dst (DivD dst (LoadD src))); 3572 3573 format %{ "divsd $dst, $src" %} 3574 ins_cost(150); 3575 ins_encode %{ 3576 __ divsd($dst$$XMMRegister, $src$$Address); 3577 %} 3578 ins_pipe(pipe_slow); 3579 %} 3580 3581 instruct divD_imm(regD dst, immD con) %{ 3582 predicate((UseSSE>=2) && (UseAVX == 0)); 3583 match(Set dst (DivD dst con)); 3584 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3585 ins_cost(150); 3586 ins_encode %{ 3587 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3588 %} 3589 ins_pipe(pipe_slow); 3590 %} 3591 3592 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3593 predicate(UseAVX > 0); 3594 match(Set dst (DivD src1 src2)); 3595 3596 format %{ "vdivsd $dst, $src1, $src2" %} 3597 ins_cost(150); 3598 ins_encode %{ 3599 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3600 %} 3601 ins_pipe(pipe_slow); 3602 %} 3603 3604 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3605 predicate(UseAVX > 0); 3606 match(Set dst (DivD src1 (LoadD src2))); 3607 3608 format %{ "vdivsd $dst, $src1, $src2" %} 3609 ins_cost(150); 3610 ins_encode %{ 3611 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3612 %} 3613 ins_pipe(pipe_slow); 3614 %} 3615 3616 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3617 predicate(UseAVX > 0); 3618 match(Set dst (DivD src con)); 3619 3620 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3621 ins_cost(150); 3622 ins_encode %{ 3623 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3624 %} 3625 ins_pipe(pipe_slow); 3626 %} 3627 3628 instruct absF_reg(regF dst) %{ 3629 predicate((UseSSE>=1) && (UseAVX == 0)); 3630 match(Set dst (AbsF dst)); 3631 ins_cost(150); 3632 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3633 ins_encode %{ 3634 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3635 %} 3636 ins_pipe(pipe_slow); 3637 %} 3638 3639 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3640 predicate(UseAVX > 0); 3641 match(Set dst (AbsF src)); 3642 ins_cost(150); 3643 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3644 ins_encode %{ 3645 int vlen_enc = Assembler::AVX_128bit; 3646 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3647 ExternalAddress(float_signmask()), vlen_enc); 3648 %} 3649 ins_pipe(pipe_slow); 3650 %} 3651 3652 instruct absD_reg(regD dst) %{ 3653 predicate((UseSSE>=2) && (UseAVX == 0)); 3654 match(Set dst (AbsD dst)); 3655 ins_cost(150); 3656 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3657 "# abs double by sign masking" %} 3658 ins_encode %{ 3659 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3660 %} 3661 ins_pipe(pipe_slow); 3662 %} 3663 3664 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3665 predicate(UseAVX > 0); 3666 match(Set dst (AbsD src)); 3667 ins_cost(150); 3668 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3669 "# abs double by sign masking" %} 3670 ins_encode %{ 3671 int vlen_enc = Assembler::AVX_128bit; 3672 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3673 ExternalAddress(double_signmask()), vlen_enc); 3674 %} 3675 ins_pipe(pipe_slow); 3676 %} 3677 3678 instruct negF_reg(regF dst) %{ 3679 predicate((UseSSE>=1) && (UseAVX == 0)); 3680 match(Set dst (NegF dst)); 3681 ins_cost(150); 3682 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3683 ins_encode %{ 3684 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3685 %} 3686 ins_pipe(pipe_slow); 3687 %} 3688 3689 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3690 predicate(UseAVX > 0); 3691 match(Set dst (NegF src)); 3692 ins_cost(150); 3693 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3694 ins_encode %{ 3695 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3696 ExternalAddress(float_signflip())); 3697 %} 3698 ins_pipe(pipe_slow); 3699 %} 3700 3701 instruct negD_reg(regD dst) %{ 3702 predicate((UseSSE>=2) && (UseAVX == 0)); 3703 match(Set dst (NegD dst)); 3704 ins_cost(150); 3705 format %{ "xorpd $dst, [0x8000000000000000]\t" 3706 "# neg double by sign flipping" %} 3707 ins_encode %{ 3708 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3709 %} 3710 ins_pipe(pipe_slow); 3711 %} 3712 3713 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3714 predicate(UseAVX > 0); 3715 match(Set dst (NegD src)); 3716 ins_cost(150); 3717 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3718 "# neg double by sign flipping" %} 3719 ins_encode %{ 3720 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3721 ExternalAddress(double_signflip())); 3722 %} 3723 ins_pipe(pipe_slow); 3724 %} 3725 3726 // sqrtss instruction needs destination register to be pre initialized for best performance 3727 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3728 instruct sqrtF_reg(regF dst) %{ 3729 predicate(UseSSE>=1); 3730 match(Set dst (SqrtF dst)); 3731 format %{ "sqrtss $dst, $dst" %} 3732 ins_encode %{ 3733 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3734 %} 3735 ins_pipe(pipe_slow); 3736 %} 3737 3738 // sqrtsd instruction needs destination register to be pre initialized for best performance 3739 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3740 instruct sqrtD_reg(regD dst) %{ 3741 predicate(UseSSE>=2); 3742 match(Set dst (SqrtD dst)); 3743 format %{ "sqrtsd $dst, $dst" %} 3744 ins_encode %{ 3745 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3746 %} 3747 ins_pipe(pipe_slow); 3748 %} 3749 3750 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3751 effect(TEMP tmp); 3752 match(Set dst (ConvF2HF src)); 3753 ins_cost(125); 3754 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3755 ins_encode %{ 3756 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3757 %} 3758 ins_pipe( pipe_slow ); 3759 %} 3760 3761 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3762 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3763 effect(TEMP ktmp, TEMP rtmp); 3764 match(Set mem (StoreC mem (ConvF2HF src))); 3765 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3766 ins_encode %{ 3767 __ movl($rtmp$$Register, 0x1); 3768 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3769 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3770 %} 3771 ins_pipe( pipe_slow ); 3772 %} 3773 3774 instruct vconvF2HF(vec dst, vec src) %{ 3775 match(Set dst (VectorCastF2HF src)); 3776 format %{ "vector_conv_F2HF $dst $src" %} 3777 ins_encode %{ 3778 int vlen_enc = vector_length_encoding(this, $src); 3779 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3780 %} 3781 ins_pipe( pipe_slow ); 3782 %} 3783 3784 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3785 predicate(n->as_StoreVector()->memory_size() >= 16); 3786 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3787 format %{ "vcvtps2ph $mem,$src" %} 3788 ins_encode %{ 3789 int vlen_enc = vector_length_encoding(this, $src); 3790 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3791 %} 3792 ins_pipe( pipe_slow ); 3793 %} 3794 3795 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3796 match(Set dst (ConvHF2F src)); 3797 format %{ "vcvtph2ps $dst,$src" %} 3798 ins_encode %{ 3799 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3800 %} 3801 ins_pipe( pipe_slow ); 3802 %} 3803 3804 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3805 match(Set dst (VectorCastHF2F (LoadVector mem))); 3806 format %{ "vcvtph2ps $dst,$mem" %} 3807 ins_encode %{ 3808 int vlen_enc = vector_length_encoding(this); 3809 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3810 %} 3811 ins_pipe( pipe_slow ); 3812 %} 3813 3814 instruct vconvHF2F(vec dst, vec src) %{ 3815 match(Set dst (VectorCastHF2F src)); 3816 ins_cost(125); 3817 format %{ "vector_conv_HF2F $dst,$src" %} 3818 ins_encode %{ 3819 int vlen_enc = vector_length_encoding(this); 3820 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3821 %} 3822 ins_pipe( pipe_slow ); 3823 %} 3824 3825 // ---------------------------------------- VectorReinterpret ------------------------------------ 3826 instruct reinterpret_mask(kReg dst) %{ 3827 predicate(n->bottom_type()->isa_vectmask() && 3828 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3829 match(Set dst (VectorReinterpret dst)); 3830 ins_cost(125); 3831 format %{ "vector_reinterpret $dst\t!" %} 3832 ins_encode %{ 3833 // empty 3834 %} 3835 ins_pipe( pipe_slow ); 3836 %} 3837 3838 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3839 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3840 n->bottom_type()->isa_vectmask() && 3841 n->in(1)->bottom_type()->isa_vectmask() && 3842 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3843 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3844 match(Set dst (VectorReinterpret src)); 3845 effect(TEMP xtmp); 3846 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3847 ins_encode %{ 3848 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3849 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3850 assert(src_sz == dst_sz , "src and dst size mismatch"); 3851 int vlen_enc = vector_length_encoding(src_sz); 3852 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3853 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3854 %} 3855 ins_pipe( pipe_slow ); 3856 %} 3857 3858 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3859 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3860 n->bottom_type()->isa_vectmask() && 3861 n->in(1)->bottom_type()->isa_vectmask() && 3862 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3863 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3864 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3865 match(Set dst (VectorReinterpret src)); 3866 effect(TEMP xtmp); 3867 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3868 ins_encode %{ 3869 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3870 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3871 assert(src_sz == dst_sz , "src and dst size mismatch"); 3872 int vlen_enc = vector_length_encoding(src_sz); 3873 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3874 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3875 %} 3876 ins_pipe( pipe_slow ); 3877 %} 3878 3879 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3880 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3881 n->bottom_type()->isa_vectmask() && 3882 n->in(1)->bottom_type()->isa_vectmask() && 3883 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3884 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3885 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3886 match(Set dst (VectorReinterpret src)); 3887 effect(TEMP xtmp); 3888 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3889 ins_encode %{ 3890 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3891 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3892 assert(src_sz == dst_sz , "src and dst size mismatch"); 3893 int vlen_enc = vector_length_encoding(src_sz); 3894 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3895 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3896 %} 3897 ins_pipe( pipe_slow ); 3898 %} 3899 3900 instruct reinterpret(vec dst) %{ 3901 predicate(!n->bottom_type()->isa_vectmask() && 3902 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3903 match(Set dst (VectorReinterpret dst)); 3904 ins_cost(125); 3905 format %{ "vector_reinterpret $dst\t!" %} 3906 ins_encode %{ 3907 // empty 3908 %} 3909 ins_pipe( pipe_slow ); 3910 %} 3911 3912 instruct reinterpret_expand(vec dst, vec src) %{ 3913 predicate(UseAVX == 0 && 3914 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3915 match(Set dst (VectorReinterpret src)); 3916 ins_cost(125); 3917 effect(TEMP dst); 3918 format %{ "vector_reinterpret_expand $dst,$src" %} 3919 ins_encode %{ 3920 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3921 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3922 3923 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3924 if (src_vlen_in_bytes == 4) { 3925 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3926 } else { 3927 assert(src_vlen_in_bytes == 8, ""); 3928 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3929 } 3930 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3931 %} 3932 ins_pipe( pipe_slow ); 3933 %} 3934 3935 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3936 predicate(UseAVX > 0 && 3937 !n->bottom_type()->isa_vectmask() && 3938 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3939 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3940 match(Set dst (VectorReinterpret src)); 3941 ins_cost(125); 3942 format %{ "vector_reinterpret_expand $dst,$src" %} 3943 ins_encode %{ 3944 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3945 %} 3946 ins_pipe( pipe_slow ); 3947 %} 3948 3949 3950 instruct vreinterpret_expand(legVec dst, vec src) %{ 3951 predicate(UseAVX > 0 && 3952 !n->bottom_type()->isa_vectmask() && 3953 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3954 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3955 match(Set dst (VectorReinterpret src)); 3956 ins_cost(125); 3957 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3958 ins_encode %{ 3959 switch (Matcher::vector_length_in_bytes(this, $src)) { 3960 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3961 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3962 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3963 default: ShouldNotReachHere(); 3964 } 3965 %} 3966 ins_pipe( pipe_slow ); 3967 %} 3968 3969 instruct reinterpret_shrink(vec dst, legVec src) %{ 3970 predicate(!n->bottom_type()->isa_vectmask() && 3971 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3972 match(Set dst (VectorReinterpret src)); 3973 ins_cost(125); 3974 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3975 ins_encode %{ 3976 switch (Matcher::vector_length_in_bytes(this)) { 3977 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3978 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3979 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3980 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3981 default: ShouldNotReachHere(); 3982 } 3983 %} 3984 ins_pipe( pipe_slow ); 3985 %} 3986 3987 // ---------------------------------------------------------------------------------------------------- 3988 3989 #ifdef _LP64 3990 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3991 match(Set dst (RoundDoubleMode src rmode)); 3992 format %{ "roundsd $dst,$src" %} 3993 ins_cost(150); 3994 ins_encode %{ 3995 assert(UseSSE >= 4, "required"); 3996 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) { 3997 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3998 } 3999 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 4000 %} 4001 ins_pipe(pipe_slow); 4002 %} 4003 4004 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 4005 match(Set dst (RoundDoubleMode con rmode)); 4006 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 4007 ins_cost(150); 4008 ins_encode %{ 4009 assert(UseSSE >= 4, "required"); 4010 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 4011 %} 4012 ins_pipe(pipe_slow); 4013 %} 4014 4015 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 4016 predicate(Matcher::vector_length(n) < 8); 4017 match(Set dst (RoundDoubleModeV src rmode)); 4018 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 4019 ins_encode %{ 4020 assert(UseAVX > 0, "required"); 4021 int vlen_enc = vector_length_encoding(this); 4022 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 4023 %} 4024 ins_pipe( pipe_slow ); 4025 %} 4026 4027 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 4028 predicate(Matcher::vector_length(n) == 8); 4029 match(Set dst (RoundDoubleModeV src rmode)); 4030 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 4031 ins_encode %{ 4032 assert(UseAVX > 2, "required"); 4033 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 4034 %} 4035 ins_pipe( pipe_slow ); 4036 %} 4037 4038 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 4039 predicate(Matcher::vector_length(n) < 8); 4040 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 4041 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 4042 ins_encode %{ 4043 assert(UseAVX > 0, "required"); 4044 int vlen_enc = vector_length_encoding(this); 4045 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 4046 %} 4047 ins_pipe( pipe_slow ); 4048 %} 4049 4050 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 4051 predicate(Matcher::vector_length(n) == 8); 4052 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 4053 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 4054 ins_encode %{ 4055 assert(UseAVX > 2, "required"); 4056 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 4057 %} 4058 ins_pipe( pipe_slow ); 4059 %} 4060 #endif // _LP64 4061 4062 instruct onspinwait() %{ 4063 match(OnSpinWait); 4064 ins_cost(200); 4065 4066 format %{ 4067 $$template 4068 $$emit$$"pause\t! membar_onspinwait" 4069 %} 4070 ins_encode %{ 4071 __ pause(); 4072 %} 4073 ins_pipe(pipe_slow); 4074 %} 4075 4076 // a * b + c 4077 instruct fmaD_reg(regD a, regD b, regD c) %{ 4078 match(Set c (FmaD c (Binary a b))); 4079 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 4080 ins_cost(150); 4081 ins_encode %{ 4082 assert(UseFMA, "Needs FMA instructions support."); 4083 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4084 %} 4085 ins_pipe( pipe_slow ); 4086 %} 4087 4088 // a * b + c 4089 instruct fmaF_reg(regF a, regF b, regF c) %{ 4090 match(Set c (FmaF c (Binary a b))); 4091 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 4092 ins_cost(150); 4093 ins_encode %{ 4094 assert(UseFMA, "Needs FMA instructions support."); 4095 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4096 %} 4097 ins_pipe( pipe_slow ); 4098 %} 4099 4100 // ====================VECTOR INSTRUCTIONS===================================== 4101 4102 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4103 instruct MoveVec2Leg(legVec dst, vec src) %{ 4104 match(Set dst src); 4105 format %{ "" %} 4106 ins_encode %{ 4107 ShouldNotReachHere(); 4108 %} 4109 ins_pipe( fpu_reg_reg ); 4110 %} 4111 4112 instruct MoveLeg2Vec(vec dst, legVec src) %{ 4113 match(Set dst src); 4114 format %{ "" %} 4115 ins_encode %{ 4116 ShouldNotReachHere(); 4117 %} 4118 ins_pipe( fpu_reg_reg ); 4119 %} 4120 4121 // ============================================================================ 4122 4123 // Load vectors generic operand pattern 4124 instruct loadV(vec dst, memory mem) %{ 4125 match(Set dst (LoadVector mem)); 4126 ins_cost(125); 4127 format %{ "load_vector $dst,$mem" %} 4128 ins_encode %{ 4129 BasicType bt = Matcher::vector_element_basic_type(this); 4130 __ load_vector(bt, $dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 4131 %} 4132 ins_pipe( pipe_slow ); 4133 %} 4134 4135 // Store vectors generic operand pattern. 4136 instruct storeV(memory mem, vec src) %{ 4137 match(Set mem (StoreVector mem src)); 4138 ins_cost(145); 4139 format %{ "store_vector $mem,$src\n\t" %} 4140 ins_encode %{ 4141 switch (Matcher::vector_length_in_bytes(this, $src)) { 4142 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 4143 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 4144 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 4145 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 4146 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 4147 default: ShouldNotReachHere(); 4148 } 4149 %} 4150 ins_pipe( pipe_slow ); 4151 %} 4152 4153 // ---------------------------------------- Gather ------------------------------------ 4154 4155 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE 4156 4157 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 4158 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) && 4159 Matcher::vector_length_in_bytes(n) <= 32); 4160 match(Set dst (LoadVectorGather mem idx)); 4161 effect(TEMP dst, TEMP tmp, TEMP mask); 4162 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 4163 ins_encode %{ 4164 int vlen_enc = vector_length_encoding(this); 4165 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4166 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4167 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4168 __ lea($tmp$$Register, $mem$$Address); 4169 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4170 %} 4171 ins_pipe( pipe_slow ); 4172 %} 4173 4174 4175 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 4176 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4177 !is_subword_type(Matcher::vector_element_basic_type(n))); 4178 match(Set dst (LoadVectorGather mem idx)); 4179 effect(TEMP dst, TEMP tmp, TEMP ktmp); 4180 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 4181 ins_encode %{ 4182 int vlen_enc = vector_length_encoding(this); 4183 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4184 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister); 4185 __ lea($tmp$$Register, $mem$$Address); 4186 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4187 %} 4188 ins_pipe( pipe_slow ); 4189 %} 4190 4191 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4192 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4193 !is_subword_type(Matcher::vector_element_basic_type(n))); 4194 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4195 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4196 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4197 ins_encode %{ 4198 assert(UseAVX > 2, "sanity"); 4199 int vlen_enc = vector_length_encoding(this); 4200 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4201 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4202 // Note: Since gather instruction partially updates the opmask register used 4203 // for predication hense moving mask operand to a temporary. 4204 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4205 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4206 __ lea($tmp$$Register, $mem$$Address); 4207 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4208 %} 4209 ins_pipe( pipe_slow ); 4210 %} 4211 4212 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegI rtmp) %{ 4213 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4214 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4215 effect(TEMP tmp, TEMP rtmp); 4216 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %} 4217 ins_encode %{ 4218 int vlen_enc = vector_length_encoding(this); 4219 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4220 __ lea($tmp$$Register, $mem$$Address); 4221 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp$$Register, vlen_enc); 4222 %} 4223 ins_pipe( pipe_slow ); 4224 %} 4225 4226 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegP idx_base_temp, 4227 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4228 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4229 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4230 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4231 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4232 ins_encode %{ 4233 int vlen_enc = vector_length_encoding(this); 4234 int vector_len = Matcher::vector_length(this); 4235 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4236 __ lea($tmp$$Register, $mem$$Address); 4237 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4238 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, noreg, $xtmp1$$XMMRegister, 4239 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4240 %} 4241 ins_pipe( pipe_slow ); 4242 %} 4243 4244 instruct vgather_subwordLE8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegI rtmp, rFlagsReg cr) %{ 4245 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4246 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4247 effect(TEMP tmp, TEMP rtmp, KILL cr); 4248 format %{ "vector_gatherLE8_off $dst, $mem, $idx_base, $offset\t! using $tmp and $rtmp as TEMP" %} 4249 ins_encode %{ 4250 int vlen_enc = vector_length_encoding(this); 4251 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4252 __ lea($tmp$$Register, $mem$$Address); 4253 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, $rtmp$$Register, vlen_enc); 4254 %} 4255 ins_pipe( pipe_slow ); 4256 %} 4257 4258 4259 instruct vgather_subwordGT8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegP idx_base_temp, 4260 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4261 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4262 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4263 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4264 format %{ "vector_gatherGT8_off $dst, $mem, $idx_base, $offset\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4265 ins_encode %{ 4266 int vlen_enc = vector_length_encoding(this); 4267 int vector_len = Matcher::vector_length(this); 4268 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4269 __ lea($tmp$$Register, $mem$$Address); 4270 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4271 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, noreg, $xtmp1$$XMMRegister, 4272 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4273 %} 4274 ins_pipe( pipe_slow ); 4275 %} 4276 4277 4278 #ifdef _LP64 4279 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4280 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4281 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4282 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4283 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4284 ins_encode %{ 4285 int vlen_enc = vector_length_encoding(this); 4286 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4287 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4288 __ lea($tmp$$Register, $mem$$Address); 4289 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4290 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4291 %} 4292 ins_pipe( pipe_slow ); 4293 %} 4294 4295 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4296 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4297 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4298 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4299 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4300 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4301 ins_encode %{ 4302 int vlen_enc = vector_length_encoding(this); 4303 int vector_len = Matcher::vector_length(this); 4304 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4305 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4306 __ lea($tmp$$Register, $mem$$Address); 4307 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4308 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4309 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4310 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4311 %} 4312 ins_pipe( pipe_slow ); 4313 %} 4314 4315 instruct vgather_masked_subwordLE8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4316 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4317 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4318 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4319 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4320 ins_encode %{ 4321 int vlen_enc = vector_length_encoding(this); 4322 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4323 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4324 __ lea($tmp$$Register, $mem$$Address); 4325 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4326 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4327 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4328 %} 4329 ins_pipe( pipe_slow ); 4330 %} 4331 4332 instruct vgather_masked_subwordGT8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4333 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4334 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4335 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4336 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4337 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4338 ins_encode %{ 4339 int vlen_enc = vector_length_encoding(this); 4340 int vector_len = Matcher::vector_length(this); 4341 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4342 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4343 __ lea($tmp$$Register, $mem$$Address); 4344 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4345 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4346 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4347 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4348 %} 4349 ins_pipe( pipe_slow ); 4350 %} 4351 4352 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4353 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4354 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4355 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4356 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4357 ins_encode %{ 4358 int vlen_enc = vector_length_encoding(this); 4359 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4360 __ lea($tmp$$Register, $mem$$Address); 4361 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4362 if (elem_bt == T_SHORT) { 4363 __ movl($mask_idx$$Register, 0x55555555); 4364 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4365 } 4366 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4367 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4368 %} 4369 ins_pipe( pipe_slow ); 4370 %} 4371 4372 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4373 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4374 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4375 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4376 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4377 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4378 ins_encode %{ 4379 int vlen_enc = vector_length_encoding(this); 4380 int vector_len = Matcher::vector_length(this); 4381 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4382 __ lea($tmp$$Register, $mem$$Address); 4383 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4384 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4385 if (elem_bt == T_SHORT) { 4386 __ movl($mask_idx$$Register, 0x55555555); 4387 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4388 } 4389 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4390 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4391 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4392 %} 4393 ins_pipe( pipe_slow ); 4394 %} 4395 4396 instruct vgather_masked_subwordLE8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4397 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4398 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4399 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4400 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4401 ins_encode %{ 4402 int vlen_enc = vector_length_encoding(this); 4403 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4404 __ lea($tmp$$Register, $mem$$Address); 4405 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4406 if (elem_bt == T_SHORT) { 4407 __ movl($mask_idx$$Register, 0x55555555); 4408 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4409 } 4410 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4411 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4412 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4413 %} 4414 ins_pipe( pipe_slow ); 4415 %} 4416 4417 instruct vgather_masked_subwordGT8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4418 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4419 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4420 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4421 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4422 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4423 ins_encode %{ 4424 int vlen_enc = vector_length_encoding(this); 4425 int vector_len = Matcher::vector_length(this); 4426 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4427 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4428 __ lea($tmp$$Register, $mem$$Address); 4429 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4430 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4431 if (elem_bt == T_SHORT) { 4432 __ movl($mask_idx$$Register, 0x55555555); 4433 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4434 } 4435 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4436 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4437 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4438 %} 4439 ins_pipe( pipe_slow ); 4440 %} 4441 #endif 4442 4443 // ====================Scatter======================================= 4444 4445 // Scatter INT, LONG, FLOAT, DOUBLE 4446 4447 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4448 predicate(UseAVX > 2); 4449 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4450 effect(TEMP tmp, TEMP ktmp); 4451 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4452 ins_encode %{ 4453 int vlen_enc = vector_length_encoding(this, $src); 4454 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4455 4456 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4457 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4458 4459 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4460 __ lea($tmp$$Register, $mem$$Address); 4461 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4462 %} 4463 ins_pipe( pipe_slow ); 4464 %} 4465 4466 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4467 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4468 effect(TEMP tmp, TEMP ktmp); 4469 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4470 ins_encode %{ 4471 int vlen_enc = vector_length_encoding(this, $src); 4472 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4473 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4474 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4475 // Note: Since scatter instruction partially updates the opmask register used 4476 // for predication hense moving mask operand to a temporary. 4477 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4478 __ lea($tmp$$Register, $mem$$Address); 4479 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4480 %} 4481 ins_pipe( pipe_slow ); 4482 %} 4483 4484 // ====================REPLICATE======================================= 4485 4486 // Replicate byte scalar to be vector 4487 instruct vReplB_reg(vec dst, rRegI src) %{ 4488 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4489 match(Set dst (Replicate src)); 4490 format %{ "replicateB $dst,$src" %} 4491 ins_encode %{ 4492 uint vlen = Matcher::vector_length(this); 4493 if (UseAVX >= 2) { 4494 int vlen_enc = vector_length_encoding(this); 4495 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4496 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4497 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4498 } else { 4499 __ movdl($dst$$XMMRegister, $src$$Register); 4500 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4501 } 4502 } else { 4503 assert(UseAVX < 2, ""); 4504 __ movdl($dst$$XMMRegister, $src$$Register); 4505 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4506 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4507 if (vlen >= 16) { 4508 assert(vlen == 16, ""); 4509 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4510 } 4511 } 4512 %} 4513 ins_pipe( pipe_slow ); 4514 %} 4515 4516 instruct ReplB_mem(vec dst, memory mem) %{ 4517 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4518 match(Set dst (Replicate (LoadB mem))); 4519 format %{ "replicateB $dst,$mem" %} 4520 ins_encode %{ 4521 int vlen_enc = vector_length_encoding(this); 4522 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4523 %} 4524 ins_pipe( pipe_slow ); 4525 %} 4526 4527 // ====================ReplicateS======================================= 4528 4529 instruct vReplS_reg(vec dst, rRegI src) %{ 4530 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4531 match(Set dst (Replicate src)); 4532 format %{ "replicateS $dst,$src" %} 4533 ins_encode %{ 4534 uint vlen = Matcher::vector_length(this); 4535 int vlen_enc = vector_length_encoding(this); 4536 if (UseAVX >= 2) { 4537 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4538 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4539 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4540 } else { 4541 __ movdl($dst$$XMMRegister, $src$$Register); 4542 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4543 } 4544 } else { 4545 assert(UseAVX < 2, ""); 4546 __ movdl($dst$$XMMRegister, $src$$Register); 4547 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4548 if (vlen >= 8) { 4549 assert(vlen == 8, ""); 4550 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4551 } 4552 } 4553 %} 4554 ins_pipe( pipe_slow ); 4555 %} 4556 4557 instruct ReplS_mem(vec dst, memory mem) %{ 4558 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4559 match(Set dst (Replicate (LoadS mem))); 4560 format %{ "replicateS $dst,$mem" %} 4561 ins_encode %{ 4562 int vlen_enc = vector_length_encoding(this); 4563 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4564 %} 4565 ins_pipe( pipe_slow ); 4566 %} 4567 4568 // ====================ReplicateI======================================= 4569 4570 instruct ReplI_reg(vec dst, rRegI src) %{ 4571 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4572 match(Set dst (Replicate src)); 4573 format %{ "replicateI $dst,$src" %} 4574 ins_encode %{ 4575 uint vlen = Matcher::vector_length(this); 4576 int vlen_enc = vector_length_encoding(this); 4577 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4578 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4579 } else if (VM_Version::supports_avx2()) { 4580 __ movdl($dst$$XMMRegister, $src$$Register); 4581 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4582 } else { 4583 __ movdl($dst$$XMMRegister, $src$$Register); 4584 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4585 } 4586 %} 4587 ins_pipe( pipe_slow ); 4588 %} 4589 4590 instruct ReplI_mem(vec dst, memory mem) %{ 4591 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4592 match(Set dst (Replicate (LoadI mem))); 4593 format %{ "replicateI $dst,$mem" %} 4594 ins_encode %{ 4595 int vlen_enc = vector_length_encoding(this); 4596 if (VM_Version::supports_avx2()) { 4597 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4598 } else if (VM_Version::supports_avx()) { 4599 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4600 } else { 4601 __ movdl($dst$$XMMRegister, $mem$$Address); 4602 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4603 } 4604 %} 4605 ins_pipe( pipe_slow ); 4606 %} 4607 4608 instruct ReplI_imm(vec dst, immI con) %{ 4609 predicate(Matcher::is_non_long_integral_vector(n)); 4610 match(Set dst (Replicate con)); 4611 format %{ "replicateI $dst,$con" %} 4612 ins_encode %{ 4613 InternalAddress addr = $constantaddress(vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4614 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 16) / 4615 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4616 BasicType bt = Matcher::vector_element_basic_type(this); 4617 int vlen = Matcher::vector_length_in_bytes(this); 4618 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4619 %} 4620 ins_pipe( pipe_slow ); 4621 %} 4622 4623 // Replicate scalar zero to be vector 4624 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4625 predicate(Matcher::is_non_long_integral_vector(n)); 4626 match(Set dst (Replicate zero)); 4627 format %{ "replicateI $dst,$zero" %} 4628 ins_encode %{ 4629 int vlen_enc = vector_length_encoding(this); 4630 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4631 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4632 } else { 4633 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4634 } 4635 %} 4636 ins_pipe( fpu_reg_reg ); 4637 %} 4638 4639 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4640 predicate(UseSSE >= 2 && Matcher::is_non_long_integral_vector(n)); 4641 match(Set dst (Replicate con)); 4642 format %{ "vallones $dst" %} 4643 ins_encode %{ 4644 int vector_len = vector_length_encoding(this); 4645 __ vallones($dst$$XMMRegister, vector_len); 4646 %} 4647 ins_pipe( pipe_slow ); 4648 %} 4649 4650 // ====================ReplicateL======================================= 4651 4652 #ifdef _LP64 4653 // Replicate long (8 byte) scalar to be vector 4654 instruct ReplL_reg(vec dst, rRegL src) %{ 4655 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4656 match(Set dst (Replicate src)); 4657 format %{ "replicateL $dst,$src" %} 4658 ins_encode %{ 4659 int vlen = Matcher::vector_length(this); 4660 int vlen_enc = vector_length_encoding(this); 4661 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4662 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4663 } else if (VM_Version::supports_avx2()) { 4664 __ movdq($dst$$XMMRegister, $src$$Register); 4665 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4666 } else { 4667 __ movdq($dst$$XMMRegister, $src$$Register); 4668 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4669 } 4670 %} 4671 ins_pipe( pipe_slow ); 4672 %} 4673 #else // _LP64 4674 // Replicate long (8 byte) scalar to be vector 4675 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 4676 predicate(Matcher::vector_length(n) <= 4 && Matcher::vector_element_basic_type(n) == T_LONG); 4677 match(Set dst (Replicate src)); 4678 effect(TEMP dst, USE src, TEMP tmp); 4679 format %{ "replicateL $dst,$src" %} 4680 ins_encode %{ 4681 uint vlen = Matcher::vector_length(this); 4682 if (vlen == 2) { 4683 __ movdl($dst$$XMMRegister, $src$$Register); 4684 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4685 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4686 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4687 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4688 int vlen_enc = Assembler::AVX_256bit; 4689 __ movdl($dst$$XMMRegister, $src$$Register); 4690 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4691 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4692 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4693 } else { 4694 __ movdl($dst$$XMMRegister, $src$$Register); 4695 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4696 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4697 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4698 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4699 } 4700 %} 4701 ins_pipe( pipe_slow ); 4702 %} 4703 4704 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 4705 predicate(Matcher::vector_length(n) == 8 && Matcher::vector_element_basic_type(n) == T_LONG); 4706 match(Set dst (Replicate src)); 4707 effect(TEMP dst, USE src, TEMP tmp); 4708 format %{ "replicateL $dst,$src" %} 4709 ins_encode %{ 4710 if (VM_Version::supports_avx512vl()) { 4711 __ movdl($dst$$XMMRegister, $src$$Register); 4712 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4713 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4714 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4715 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4716 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4717 } else { 4718 int vlen_enc = Assembler::AVX_512bit; 4719 __ movdl($dst$$XMMRegister, $src$$Register); 4720 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4721 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4722 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4723 } 4724 %} 4725 ins_pipe( pipe_slow ); 4726 %} 4727 #endif // _LP64 4728 4729 instruct ReplL_mem(vec dst, memory mem) %{ 4730 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4731 match(Set dst (Replicate (LoadL mem))); 4732 format %{ "replicateL $dst,$mem" %} 4733 ins_encode %{ 4734 int vlen_enc = vector_length_encoding(this); 4735 if (VM_Version::supports_avx2()) { 4736 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4737 } else if (VM_Version::supports_sse3()) { 4738 __ movddup($dst$$XMMRegister, $mem$$Address); 4739 } else { 4740 __ movq($dst$$XMMRegister, $mem$$Address); 4741 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4742 } 4743 %} 4744 ins_pipe( pipe_slow ); 4745 %} 4746 4747 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4748 instruct ReplL_imm(vec dst, immL con) %{ 4749 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4750 match(Set dst (Replicate con)); 4751 format %{ "replicateL $dst,$con" %} 4752 ins_encode %{ 4753 InternalAddress addr = $constantaddress(vreplicate_imm(T_LONG, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4754 int vlen = Matcher::vector_length_in_bytes(this); 4755 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4756 %} 4757 ins_pipe( pipe_slow ); 4758 %} 4759 4760 instruct ReplL_zero(vec dst, immL0 zero) %{ 4761 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4762 match(Set dst (Replicate zero)); 4763 format %{ "replicateL $dst,$zero" %} 4764 ins_encode %{ 4765 int vlen_enc = vector_length_encoding(this); 4766 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4767 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4768 } else { 4769 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4770 } 4771 %} 4772 ins_pipe( fpu_reg_reg ); 4773 %} 4774 4775 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4776 predicate(UseSSE >= 2 && Matcher::vector_element_basic_type(n) == T_LONG); 4777 match(Set dst (Replicate con)); 4778 format %{ "vallones $dst" %} 4779 ins_encode %{ 4780 int vector_len = vector_length_encoding(this); 4781 __ vallones($dst$$XMMRegister, vector_len); 4782 %} 4783 ins_pipe( pipe_slow ); 4784 %} 4785 4786 // ====================ReplicateF======================================= 4787 4788 instruct vReplF_reg(vec dst, vlRegF src) %{ 4789 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4790 match(Set dst (Replicate src)); 4791 format %{ "replicateF $dst,$src" %} 4792 ins_encode %{ 4793 uint vlen = Matcher::vector_length(this); 4794 int vlen_enc = vector_length_encoding(this); 4795 if (vlen <= 4) { 4796 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4797 } else if (VM_Version::supports_avx2()) { 4798 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4799 } else { 4800 assert(vlen == 8, "sanity"); 4801 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4802 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4803 } 4804 %} 4805 ins_pipe( pipe_slow ); 4806 %} 4807 4808 instruct ReplF_reg(vec dst, vlRegF src) %{ 4809 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4810 match(Set dst (Replicate src)); 4811 format %{ "replicateF $dst,$src" %} 4812 ins_encode %{ 4813 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4814 %} 4815 ins_pipe( pipe_slow ); 4816 %} 4817 4818 instruct ReplF_mem(vec dst, memory mem) %{ 4819 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4820 match(Set dst (Replicate (LoadF mem))); 4821 format %{ "replicateF $dst,$mem" %} 4822 ins_encode %{ 4823 int vlen_enc = vector_length_encoding(this); 4824 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4825 %} 4826 ins_pipe( pipe_slow ); 4827 %} 4828 4829 // Replicate float scalar immediate to be vector by loading from const table. 4830 instruct ReplF_imm(vec dst, immF con) %{ 4831 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4832 match(Set dst (Replicate con)); 4833 format %{ "replicateF $dst,$con" %} 4834 ins_encode %{ 4835 InternalAddress addr = $constantaddress(vreplicate_imm(T_FLOAT, $con$$constant, 4836 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 4)); 4837 int vlen = Matcher::vector_length_in_bytes(this); 4838 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4839 %} 4840 ins_pipe( pipe_slow ); 4841 %} 4842 4843 instruct ReplF_zero(vec dst, immF0 zero) %{ 4844 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4845 match(Set dst (Replicate zero)); 4846 format %{ "replicateF $dst,$zero" %} 4847 ins_encode %{ 4848 int vlen_enc = vector_length_encoding(this); 4849 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4850 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4851 } else { 4852 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4853 } 4854 %} 4855 ins_pipe( fpu_reg_reg ); 4856 %} 4857 4858 // ====================ReplicateD======================================= 4859 4860 // Replicate double (8 bytes) scalar to be vector 4861 instruct vReplD_reg(vec dst, vlRegD src) %{ 4862 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4863 match(Set dst (Replicate src)); 4864 format %{ "replicateD $dst,$src" %} 4865 ins_encode %{ 4866 uint vlen = Matcher::vector_length(this); 4867 int vlen_enc = vector_length_encoding(this); 4868 if (vlen <= 2) { 4869 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4870 } else if (VM_Version::supports_avx2()) { 4871 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4872 } else { 4873 assert(vlen == 4, "sanity"); 4874 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4875 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4876 } 4877 %} 4878 ins_pipe( pipe_slow ); 4879 %} 4880 4881 instruct ReplD_reg(vec dst, vlRegD src) %{ 4882 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4883 match(Set dst (Replicate src)); 4884 format %{ "replicateD $dst,$src" %} 4885 ins_encode %{ 4886 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4887 %} 4888 ins_pipe( pipe_slow ); 4889 %} 4890 4891 instruct ReplD_mem(vec dst, memory mem) %{ 4892 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4893 match(Set dst (Replicate (LoadD mem))); 4894 format %{ "replicateD $dst,$mem" %} 4895 ins_encode %{ 4896 if (Matcher::vector_length(this) >= 4) { 4897 int vlen_enc = vector_length_encoding(this); 4898 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4899 } else { 4900 __ movddup($dst$$XMMRegister, $mem$$Address); 4901 } 4902 %} 4903 ins_pipe( pipe_slow ); 4904 %} 4905 4906 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4907 instruct ReplD_imm(vec dst, immD con) %{ 4908 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4909 match(Set dst (Replicate con)); 4910 format %{ "replicateD $dst,$con" %} 4911 ins_encode %{ 4912 InternalAddress addr = $constantaddress(vreplicate_imm(T_DOUBLE, $con$$constant, VM_Version::supports_sse3() ? 1 : 2)); 4913 int vlen = Matcher::vector_length_in_bytes(this); 4914 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4915 %} 4916 ins_pipe( pipe_slow ); 4917 %} 4918 4919 instruct ReplD_zero(vec dst, immD0 zero) %{ 4920 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4921 match(Set dst (Replicate zero)); 4922 format %{ "replicateD $dst,$zero" %} 4923 ins_encode %{ 4924 int vlen_enc = vector_length_encoding(this); 4925 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4926 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4927 } else { 4928 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4929 } 4930 %} 4931 ins_pipe( fpu_reg_reg ); 4932 %} 4933 4934 // ====================VECTOR INSERT======================================= 4935 4936 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4937 predicate(Matcher::vector_length_in_bytes(n) < 32); 4938 match(Set dst (VectorInsert (Binary dst val) idx)); 4939 format %{ "vector_insert $dst,$val,$idx" %} 4940 ins_encode %{ 4941 assert(UseSSE >= 4, "required"); 4942 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4943 4944 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4945 4946 assert(is_integral_type(elem_bt), ""); 4947 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4948 4949 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4950 %} 4951 ins_pipe( pipe_slow ); 4952 %} 4953 4954 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4955 predicate(Matcher::vector_length_in_bytes(n) == 32); 4956 match(Set dst (VectorInsert (Binary src val) idx)); 4957 effect(TEMP vtmp); 4958 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4959 ins_encode %{ 4960 int vlen_enc = Assembler::AVX_256bit; 4961 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4962 int elem_per_lane = 16/type2aelembytes(elem_bt); 4963 int log2epr = log2(elem_per_lane); 4964 4965 assert(is_integral_type(elem_bt), "sanity"); 4966 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4967 4968 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4969 uint y_idx = ($idx$$constant >> log2epr) & 1; 4970 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4971 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4972 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4973 %} 4974 ins_pipe( pipe_slow ); 4975 %} 4976 4977 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4978 predicate(Matcher::vector_length_in_bytes(n) == 64); 4979 match(Set dst (VectorInsert (Binary src val) idx)); 4980 effect(TEMP vtmp); 4981 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4982 ins_encode %{ 4983 assert(UseAVX > 2, "sanity"); 4984 4985 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4986 int elem_per_lane = 16/type2aelembytes(elem_bt); 4987 int log2epr = log2(elem_per_lane); 4988 4989 assert(is_integral_type(elem_bt), ""); 4990 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4991 4992 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4993 uint y_idx = ($idx$$constant >> log2epr) & 3; 4994 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4995 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4996 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4997 %} 4998 ins_pipe( pipe_slow ); 4999 %} 5000 5001 #ifdef _LP64 5002 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 5003 predicate(Matcher::vector_length(n) == 2); 5004 match(Set dst (VectorInsert (Binary dst val) idx)); 5005 format %{ "vector_insert $dst,$val,$idx" %} 5006 ins_encode %{ 5007 assert(UseSSE >= 4, "required"); 5008 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 5009 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5010 5011 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 5012 %} 5013 ins_pipe( pipe_slow ); 5014 %} 5015 5016 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 5017 predicate(Matcher::vector_length(n) == 4); 5018 match(Set dst (VectorInsert (Binary src val) idx)); 5019 effect(TEMP vtmp); 5020 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5021 ins_encode %{ 5022 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 5023 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5024 5025 uint x_idx = $idx$$constant & right_n_bits(1); 5026 uint y_idx = ($idx$$constant >> 1) & 1; 5027 int vlen_enc = Assembler::AVX_256bit; 5028 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5029 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 5030 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5031 %} 5032 ins_pipe( pipe_slow ); 5033 %} 5034 5035 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 5036 predicate(Matcher::vector_length(n) == 8); 5037 match(Set dst (VectorInsert (Binary src val) idx)); 5038 effect(TEMP vtmp); 5039 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5040 ins_encode %{ 5041 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 5042 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5043 5044 uint x_idx = $idx$$constant & right_n_bits(1); 5045 uint y_idx = ($idx$$constant >> 1) & 3; 5046 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5047 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 5048 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5049 %} 5050 ins_pipe( pipe_slow ); 5051 %} 5052 #endif 5053 5054 instruct insertF(vec dst, regF val, immU8 idx) %{ 5055 predicate(Matcher::vector_length(n) < 8); 5056 match(Set dst (VectorInsert (Binary dst val) idx)); 5057 format %{ "vector_insert $dst,$val,$idx" %} 5058 ins_encode %{ 5059 assert(UseSSE >= 4, "sanity"); 5060 5061 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 5062 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5063 5064 uint x_idx = $idx$$constant & right_n_bits(2); 5065 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5066 %} 5067 ins_pipe( pipe_slow ); 5068 %} 5069 5070 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 5071 predicate(Matcher::vector_length(n) >= 8); 5072 match(Set dst (VectorInsert (Binary src val) idx)); 5073 effect(TEMP vtmp); 5074 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5075 ins_encode %{ 5076 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 5077 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5078 5079 int vlen = Matcher::vector_length(this); 5080 uint x_idx = $idx$$constant & right_n_bits(2); 5081 if (vlen == 8) { 5082 uint y_idx = ($idx$$constant >> 2) & 1; 5083 int vlen_enc = Assembler::AVX_256bit; 5084 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5085 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5086 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5087 } else { 5088 assert(vlen == 16, "sanity"); 5089 uint y_idx = ($idx$$constant >> 2) & 3; 5090 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5091 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5092 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5093 } 5094 %} 5095 ins_pipe( pipe_slow ); 5096 %} 5097 5098 #ifdef _LP64 5099 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 5100 predicate(Matcher::vector_length(n) == 2); 5101 match(Set dst (VectorInsert (Binary dst val) idx)); 5102 effect(TEMP tmp); 5103 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 5104 ins_encode %{ 5105 assert(UseSSE >= 4, "sanity"); 5106 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5107 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5108 5109 __ movq($tmp$$Register, $val$$XMMRegister); 5110 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 5111 %} 5112 ins_pipe( pipe_slow ); 5113 %} 5114 5115 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 5116 predicate(Matcher::vector_length(n) == 4); 5117 match(Set dst (VectorInsert (Binary src val) idx)); 5118 effect(TEMP vtmp, TEMP tmp); 5119 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 5120 ins_encode %{ 5121 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5122 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5123 5124 uint x_idx = $idx$$constant & right_n_bits(1); 5125 uint y_idx = ($idx$$constant >> 1) & 1; 5126 int vlen_enc = Assembler::AVX_256bit; 5127 __ movq($tmp$$Register, $val$$XMMRegister); 5128 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5129 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 5130 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5131 %} 5132 ins_pipe( pipe_slow ); 5133 %} 5134 5135 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 5136 predicate(Matcher::vector_length(n) == 8); 5137 match(Set dst (VectorInsert (Binary src val) idx)); 5138 effect(TEMP tmp, TEMP vtmp); 5139 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5140 ins_encode %{ 5141 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5142 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5143 5144 uint x_idx = $idx$$constant & right_n_bits(1); 5145 uint y_idx = ($idx$$constant >> 1) & 3; 5146 __ movq($tmp$$Register, $val$$XMMRegister); 5147 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5148 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 5149 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5150 %} 5151 ins_pipe( pipe_slow ); 5152 %} 5153 #endif 5154 5155 // ====================REDUCTION ARITHMETIC======================================= 5156 5157 // =======================Int Reduction========================================== 5158 5159 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5160 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 5161 match(Set dst (AddReductionVI src1 src2)); 5162 match(Set dst (MulReductionVI src1 src2)); 5163 match(Set dst (AndReductionV src1 src2)); 5164 match(Set dst ( OrReductionV src1 src2)); 5165 match(Set dst (XorReductionV src1 src2)); 5166 match(Set dst (MinReductionV src1 src2)); 5167 match(Set dst (MaxReductionV src1 src2)); 5168 effect(TEMP vtmp1, TEMP vtmp2); 5169 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5170 ins_encode %{ 5171 int opcode = this->ideal_Opcode(); 5172 int vlen = Matcher::vector_length(this, $src2); 5173 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5174 %} 5175 ins_pipe( pipe_slow ); 5176 %} 5177 5178 // =======================Long Reduction========================================== 5179 5180 #ifdef _LP64 5181 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5182 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 5183 match(Set dst (AddReductionVL src1 src2)); 5184 match(Set dst (MulReductionVL src1 src2)); 5185 match(Set dst (AndReductionV src1 src2)); 5186 match(Set dst ( OrReductionV src1 src2)); 5187 match(Set dst (XorReductionV src1 src2)); 5188 match(Set dst (MinReductionV src1 src2)); 5189 match(Set dst (MaxReductionV src1 src2)); 5190 effect(TEMP vtmp1, TEMP vtmp2); 5191 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5192 ins_encode %{ 5193 int opcode = this->ideal_Opcode(); 5194 int vlen = Matcher::vector_length(this, $src2); 5195 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5196 %} 5197 ins_pipe( pipe_slow ); 5198 %} 5199 5200 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 5201 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 5202 match(Set dst (AddReductionVL src1 src2)); 5203 match(Set dst (MulReductionVL src1 src2)); 5204 match(Set dst (AndReductionV src1 src2)); 5205 match(Set dst ( OrReductionV src1 src2)); 5206 match(Set dst (XorReductionV src1 src2)); 5207 match(Set dst (MinReductionV src1 src2)); 5208 match(Set dst (MaxReductionV src1 src2)); 5209 effect(TEMP vtmp1, TEMP vtmp2); 5210 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5211 ins_encode %{ 5212 int opcode = this->ideal_Opcode(); 5213 int vlen = Matcher::vector_length(this, $src2); 5214 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5215 %} 5216 ins_pipe( pipe_slow ); 5217 %} 5218 #endif // _LP64 5219 5220 // =======================Float Reduction========================================== 5221 5222 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 5223 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src 5224 match(Set dst (AddReductionVF dst src)); 5225 match(Set dst (MulReductionVF dst src)); 5226 effect(TEMP dst, TEMP vtmp); 5227 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 5228 ins_encode %{ 5229 int opcode = this->ideal_Opcode(); 5230 int vlen = Matcher::vector_length(this, $src); 5231 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5232 %} 5233 ins_pipe( pipe_slow ); 5234 %} 5235 5236 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 5237 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5238 match(Set dst (AddReductionVF dst src)); 5239 match(Set dst (MulReductionVF dst src)); 5240 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5241 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5242 ins_encode %{ 5243 int opcode = this->ideal_Opcode(); 5244 int vlen = Matcher::vector_length(this, $src); 5245 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5246 %} 5247 ins_pipe( pipe_slow ); 5248 %} 5249 5250 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5251 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src 5252 match(Set dst (AddReductionVF dst src)); 5253 match(Set dst (MulReductionVF dst src)); 5254 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5255 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5256 ins_encode %{ 5257 int opcode = this->ideal_Opcode(); 5258 int vlen = Matcher::vector_length(this, $src); 5259 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5260 %} 5261 ins_pipe( pipe_slow ); 5262 %} 5263 5264 5265 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{ 5266 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5267 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5268 // src1 contains reduction identity 5269 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5270 match(Set dst (AddReductionVF src1 src2)); 5271 match(Set dst (MulReductionVF src1 src2)); 5272 effect(TEMP dst); 5273 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %} 5274 ins_encode %{ 5275 int opcode = this->ideal_Opcode(); 5276 int vlen = Matcher::vector_length(this, $src2); 5277 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5278 %} 5279 ins_pipe( pipe_slow ); 5280 %} 5281 5282 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{ 5283 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5284 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5285 // src1 contains reduction identity 5286 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5287 match(Set dst (AddReductionVF src1 src2)); 5288 match(Set dst (MulReductionVF src1 src2)); 5289 effect(TEMP dst, TEMP vtmp); 5290 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5291 ins_encode %{ 5292 int opcode = this->ideal_Opcode(); 5293 int vlen = Matcher::vector_length(this, $src2); 5294 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5295 %} 5296 ins_pipe( pipe_slow ); 5297 %} 5298 5299 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{ 5300 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5301 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5302 // src1 contains reduction identity 5303 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5304 match(Set dst (AddReductionVF src1 src2)); 5305 match(Set dst (MulReductionVF src1 src2)); 5306 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5307 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5308 ins_encode %{ 5309 int opcode = this->ideal_Opcode(); 5310 int vlen = Matcher::vector_length(this, $src2); 5311 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5312 %} 5313 ins_pipe( pipe_slow ); 5314 %} 5315 5316 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5317 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5318 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5319 // src1 contains reduction identity 5320 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2 5321 match(Set dst (AddReductionVF src1 src2)); 5322 match(Set dst (MulReductionVF src1 src2)); 5323 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5324 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5325 ins_encode %{ 5326 int opcode = this->ideal_Opcode(); 5327 int vlen = Matcher::vector_length(this, $src2); 5328 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5329 %} 5330 ins_pipe( pipe_slow ); 5331 %} 5332 5333 // =======================Double Reduction========================================== 5334 5335 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 5336 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src 5337 match(Set dst (AddReductionVD dst src)); 5338 match(Set dst (MulReductionVD dst src)); 5339 effect(TEMP dst, TEMP vtmp); 5340 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 5341 ins_encode %{ 5342 int opcode = this->ideal_Opcode(); 5343 int vlen = Matcher::vector_length(this, $src); 5344 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5345 %} 5346 ins_pipe( pipe_slow ); 5347 %} 5348 5349 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 5350 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src 5351 match(Set dst (AddReductionVD dst src)); 5352 match(Set dst (MulReductionVD dst src)); 5353 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5354 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5355 ins_encode %{ 5356 int opcode = this->ideal_Opcode(); 5357 int vlen = Matcher::vector_length(this, $src); 5358 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5359 %} 5360 ins_pipe( pipe_slow ); 5361 %} 5362 5363 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5364 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5365 match(Set dst (AddReductionVD dst src)); 5366 match(Set dst (MulReductionVD dst src)); 5367 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5368 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5369 ins_encode %{ 5370 int opcode = this->ideal_Opcode(); 5371 int vlen = Matcher::vector_length(this, $src); 5372 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5373 %} 5374 ins_pipe( pipe_slow ); 5375 %} 5376 5377 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{ 5378 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5379 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5380 // src1 contains reduction identity 5381 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5382 match(Set dst (AddReductionVD src1 src2)); 5383 match(Set dst (MulReductionVD src1 src2)); 5384 effect(TEMP dst); 5385 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %} 5386 ins_encode %{ 5387 int opcode = this->ideal_Opcode(); 5388 int vlen = Matcher::vector_length(this, $src2); 5389 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5390 %} 5391 ins_pipe( pipe_slow ); 5392 %} 5393 5394 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{ 5395 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5396 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5397 // src1 contains reduction identity 5398 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5399 match(Set dst (AddReductionVD src1 src2)); 5400 match(Set dst (MulReductionVD src1 src2)); 5401 effect(TEMP dst, TEMP vtmp); 5402 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5403 ins_encode %{ 5404 int opcode = this->ideal_Opcode(); 5405 int vlen = Matcher::vector_length(this, $src2); 5406 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5407 %} 5408 ins_pipe( pipe_slow ); 5409 %} 5410 5411 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5412 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5413 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5414 // src1 contains reduction identity 5415 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5416 match(Set dst (AddReductionVD src1 src2)); 5417 match(Set dst (MulReductionVD src1 src2)); 5418 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5419 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5420 ins_encode %{ 5421 int opcode = this->ideal_Opcode(); 5422 int vlen = Matcher::vector_length(this, $src2); 5423 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5424 %} 5425 ins_pipe( pipe_slow ); 5426 %} 5427 5428 // =======================Byte Reduction========================================== 5429 5430 #ifdef _LP64 5431 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5432 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5433 match(Set dst (AddReductionVI src1 src2)); 5434 match(Set dst (AndReductionV src1 src2)); 5435 match(Set dst ( OrReductionV src1 src2)); 5436 match(Set dst (XorReductionV src1 src2)); 5437 match(Set dst (MinReductionV src1 src2)); 5438 match(Set dst (MaxReductionV src1 src2)); 5439 effect(TEMP vtmp1, TEMP vtmp2); 5440 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5441 ins_encode %{ 5442 int opcode = this->ideal_Opcode(); 5443 int vlen = Matcher::vector_length(this, $src2); 5444 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5445 %} 5446 ins_pipe( pipe_slow ); 5447 %} 5448 5449 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5450 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5451 match(Set dst (AddReductionVI src1 src2)); 5452 match(Set dst (AndReductionV src1 src2)); 5453 match(Set dst ( OrReductionV src1 src2)); 5454 match(Set dst (XorReductionV src1 src2)); 5455 match(Set dst (MinReductionV src1 src2)); 5456 match(Set dst (MaxReductionV src1 src2)); 5457 effect(TEMP vtmp1, TEMP vtmp2); 5458 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5459 ins_encode %{ 5460 int opcode = this->ideal_Opcode(); 5461 int vlen = Matcher::vector_length(this, $src2); 5462 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5463 %} 5464 ins_pipe( pipe_slow ); 5465 %} 5466 #endif 5467 5468 // =======================Short Reduction========================================== 5469 5470 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5471 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5472 match(Set dst (AddReductionVI src1 src2)); 5473 match(Set dst (MulReductionVI src1 src2)); 5474 match(Set dst (AndReductionV src1 src2)); 5475 match(Set dst ( OrReductionV src1 src2)); 5476 match(Set dst (XorReductionV src1 src2)); 5477 match(Set dst (MinReductionV src1 src2)); 5478 match(Set dst (MaxReductionV src1 src2)); 5479 effect(TEMP vtmp1, TEMP vtmp2); 5480 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5481 ins_encode %{ 5482 int opcode = this->ideal_Opcode(); 5483 int vlen = Matcher::vector_length(this, $src2); 5484 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5485 %} 5486 ins_pipe( pipe_slow ); 5487 %} 5488 5489 // =======================Mul Reduction========================================== 5490 5491 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5492 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5493 Matcher::vector_length(n->in(2)) <= 32); // src2 5494 match(Set dst (MulReductionVI src1 src2)); 5495 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5496 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5497 ins_encode %{ 5498 int opcode = this->ideal_Opcode(); 5499 int vlen = Matcher::vector_length(this, $src2); 5500 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5501 %} 5502 ins_pipe( pipe_slow ); 5503 %} 5504 5505 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5506 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5507 Matcher::vector_length(n->in(2)) == 64); // src2 5508 match(Set dst (MulReductionVI src1 src2)); 5509 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5510 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5511 ins_encode %{ 5512 int opcode = this->ideal_Opcode(); 5513 int vlen = Matcher::vector_length(this, $src2); 5514 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5515 %} 5516 ins_pipe( pipe_slow ); 5517 %} 5518 5519 //--------------------Min/Max Float Reduction -------------------- 5520 // Float Min Reduction 5521 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 5522 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5523 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5524 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5525 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5526 Matcher::vector_length(n->in(2)) == 2); 5527 match(Set dst (MinReductionV src1 src2)); 5528 match(Set dst (MaxReductionV src1 src2)); 5529 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5530 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5531 ins_encode %{ 5532 assert(UseAVX > 0, "sanity"); 5533 5534 int opcode = this->ideal_Opcode(); 5535 int vlen = Matcher::vector_length(this, $src2); 5536 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5537 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5538 %} 5539 ins_pipe( pipe_slow ); 5540 %} 5541 5542 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5543 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5544 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5545 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5546 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5547 Matcher::vector_length(n->in(2)) >= 4); 5548 match(Set dst (MinReductionV src1 src2)); 5549 match(Set dst (MaxReductionV src1 src2)); 5550 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5551 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5552 ins_encode %{ 5553 assert(UseAVX > 0, "sanity"); 5554 5555 int opcode = this->ideal_Opcode(); 5556 int vlen = Matcher::vector_length(this, $src2); 5557 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5558 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5559 %} 5560 ins_pipe( pipe_slow ); 5561 %} 5562 5563 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 5564 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5565 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5566 Matcher::vector_length(n->in(2)) == 2); 5567 match(Set dst (MinReductionV dst src)); 5568 match(Set dst (MaxReductionV dst src)); 5569 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5570 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5571 ins_encode %{ 5572 assert(UseAVX > 0, "sanity"); 5573 5574 int opcode = this->ideal_Opcode(); 5575 int vlen = Matcher::vector_length(this, $src); 5576 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5577 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5578 %} 5579 ins_pipe( pipe_slow ); 5580 %} 5581 5582 5583 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5584 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5585 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5586 Matcher::vector_length(n->in(2)) >= 4); 5587 match(Set dst (MinReductionV dst src)); 5588 match(Set dst (MaxReductionV dst src)); 5589 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5590 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5591 ins_encode %{ 5592 assert(UseAVX > 0, "sanity"); 5593 5594 int opcode = this->ideal_Opcode(); 5595 int vlen = Matcher::vector_length(this, $src); 5596 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5597 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5598 %} 5599 ins_pipe( pipe_slow ); 5600 %} 5601 5602 5603 //--------------------Min Double Reduction -------------------- 5604 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5605 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5606 rFlagsReg cr) %{ 5607 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5608 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5609 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5610 Matcher::vector_length(n->in(2)) == 2); 5611 match(Set dst (MinReductionV src1 src2)); 5612 match(Set dst (MaxReductionV src1 src2)); 5613 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5614 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5615 ins_encode %{ 5616 assert(UseAVX > 0, "sanity"); 5617 5618 int opcode = this->ideal_Opcode(); 5619 int vlen = Matcher::vector_length(this, $src2); 5620 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5621 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5622 %} 5623 ins_pipe( pipe_slow ); 5624 %} 5625 5626 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5627 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5628 rFlagsReg cr) %{ 5629 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5630 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5631 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5632 Matcher::vector_length(n->in(2)) >= 4); 5633 match(Set dst (MinReductionV src1 src2)); 5634 match(Set dst (MaxReductionV src1 src2)); 5635 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5636 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5637 ins_encode %{ 5638 assert(UseAVX > 0, "sanity"); 5639 5640 int opcode = this->ideal_Opcode(); 5641 int vlen = Matcher::vector_length(this, $src2); 5642 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5643 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5644 %} 5645 ins_pipe( pipe_slow ); 5646 %} 5647 5648 5649 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5650 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5651 rFlagsReg cr) %{ 5652 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5653 Matcher::vector_length(n->in(2)) == 2); 5654 match(Set dst (MinReductionV dst src)); 5655 match(Set dst (MaxReductionV dst src)); 5656 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5657 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5658 ins_encode %{ 5659 assert(UseAVX > 0, "sanity"); 5660 5661 int opcode = this->ideal_Opcode(); 5662 int vlen = Matcher::vector_length(this, $src); 5663 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5664 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5665 %} 5666 ins_pipe( pipe_slow ); 5667 %} 5668 5669 instruct minmax_reductionD_av(legRegD dst, legVec src, 5670 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5671 rFlagsReg cr) %{ 5672 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5673 Matcher::vector_length(n->in(2)) >= 4); 5674 match(Set dst (MinReductionV dst src)); 5675 match(Set dst (MaxReductionV dst src)); 5676 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5677 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5678 ins_encode %{ 5679 assert(UseAVX > 0, "sanity"); 5680 5681 int opcode = this->ideal_Opcode(); 5682 int vlen = Matcher::vector_length(this, $src); 5683 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5684 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5685 %} 5686 ins_pipe( pipe_slow ); 5687 %} 5688 5689 // ====================VECTOR ARITHMETIC======================================= 5690 5691 // --------------------------------- ADD -------------------------------------- 5692 5693 // Bytes vector add 5694 instruct vaddB(vec dst, vec src) %{ 5695 predicate(UseAVX == 0); 5696 match(Set dst (AddVB dst src)); 5697 format %{ "paddb $dst,$src\t! add packedB" %} 5698 ins_encode %{ 5699 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5700 %} 5701 ins_pipe( pipe_slow ); 5702 %} 5703 5704 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5705 predicate(UseAVX > 0); 5706 match(Set dst (AddVB src1 src2)); 5707 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5708 ins_encode %{ 5709 int vlen_enc = vector_length_encoding(this); 5710 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5711 %} 5712 ins_pipe( pipe_slow ); 5713 %} 5714 5715 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5716 predicate((UseAVX > 0) && 5717 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5718 match(Set dst (AddVB src (LoadVector mem))); 5719 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5720 ins_encode %{ 5721 int vlen_enc = vector_length_encoding(this); 5722 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5723 %} 5724 ins_pipe( pipe_slow ); 5725 %} 5726 5727 // Shorts/Chars vector add 5728 instruct vaddS(vec dst, vec src) %{ 5729 predicate(UseAVX == 0); 5730 match(Set dst (AddVS dst src)); 5731 format %{ "paddw $dst,$src\t! add packedS" %} 5732 ins_encode %{ 5733 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5734 %} 5735 ins_pipe( pipe_slow ); 5736 %} 5737 5738 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5739 predicate(UseAVX > 0); 5740 match(Set dst (AddVS src1 src2)); 5741 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5742 ins_encode %{ 5743 int vlen_enc = vector_length_encoding(this); 5744 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5745 %} 5746 ins_pipe( pipe_slow ); 5747 %} 5748 5749 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5750 predicate((UseAVX > 0) && 5751 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5752 match(Set dst (AddVS src (LoadVector mem))); 5753 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5754 ins_encode %{ 5755 int vlen_enc = vector_length_encoding(this); 5756 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5757 %} 5758 ins_pipe( pipe_slow ); 5759 %} 5760 5761 // Integers vector add 5762 instruct vaddI(vec dst, vec src) %{ 5763 predicate(UseAVX == 0); 5764 match(Set dst (AddVI dst src)); 5765 format %{ "paddd $dst,$src\t! add packedI" %} 5766 ins_encode %{ 5767 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5768 %} 5769 ins_pipe( pipe_slow ); 5770 %} 5771 5772 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5773 predicate(UseAVX > 0); 5774 match(Set dst (AddVI src1 src2)); 5775 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5776 ins_encode %{ 5777 int vlen_enc = vector_length_encoding(this); 5778 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5779 %} 5780 ins_pipe( pipe_slow ); 5781 %} 5782 5783 5784 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5785 predicate((UseAVX > 0) && 5786 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5787 match(Set dst (AddVI src (LoadVector mem))); 5788 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5789 ins_encode %{ 5790 int vlen_enc = vector_length_encoding(this); 5791 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5792 %} 5793 ins_pipe( pipe_slow ); 5794 %} 5795 5796 // Longs vector add 5797 instruct vaddL(vec dst, vec src) %{ 5798 predicate(UseAVX == 0); 5799 match(Set dst (AddVL dst src)); 5800 format %{ "paddq $dst,$src\t! add packedL" %} 5801 ins_encode %{ 5802 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5803 %} 5804 ins_pipe( pipe_slow ); 5805 %} 5806 5807 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5808 predicate(UseAVX > 0); 5809 match(Set dst (AddVL src1 src2)); 5810 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5811 ins_encode %{ 5812 int vlen_enc = vector_length_encoding(this); 5813 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5814 %} 5815 ins_pipe( pipe_slow ); 5816 %} 5817 5818 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5819 predicate((UseAVX > 0) && 5820 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5821 match(Set dst (AddVL src (LoadVector mem))); 5822 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5823 ins_encode %{ 5824 int vlen_enc = vector_length_encoding(this); 5825 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5826 %} 5827 ins_pipe( pipe_slow ); 5828 %} 5829 5830 // Floats vector add 5831 instruct vaddF(vec dst, vec src) %{ 5832 predicate(UseAVX == 0); 5833 match(Set dst (AddVF dst src)); 5834 format %{ "addps $dst,$src\t! add packedF" %} 5835 ins_encode %{ 5836 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5837 %} 5838 ins_pipe( pipe_slow ); 5839 %} 5840 5841 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5842 predicate(UseAVX > 0); 5843 match(Set dst (AddVF src1 src2)); 5844 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5845 ins_encode %{ 5846 int vlen_enc = vector_length_encoding(this); 5847 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5848 %} 5849 ins_pipe( pipe_slow ); 5850 %} 5851 5852 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5853 predicate((UseAVX > 0) && 5854 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5855 match(Set dst (AddVF src (LoadVector mem))); 5856 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5857 ins_encode %{ 5858 int vlen_enc = vector_length_encoding(this); 5859 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5860 %} 5861 ins_pipe( pipe_slow ); 5862 %} 5863 5864 // Doubles vector add 5865 instruct vaddD(vec dst, vec src) %{ 5866 predicate(UseAVX == 0); 5867 match(Set dst (AddVD dst src)); 5868 format %{ "addpd $dst,$src\t! add packedD" %} 5869 ins_encode %{ 5870 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5871 %} 5872 ins_pipe( pipe_slow ); 5873 %} 5874 5875 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5876 predicate(UseAVX > 0); 5877 match(Set dst (AddVD src1 src2)); 5878 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5879 ins_encode %{ 5880 int vlen_enc = vector_length_encoding(this); 5881 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5882 %} 5883 ins_pipe( pipe_slow ); 5884 %} 5885 5886 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5887 predicate((UseAVX > 0) && 5888 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5889 match(Set dst (AddVD src (LoadVector mem))); 5890 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5891 ins_encode %{ 5892 int vlen_enc = vector_length_encoding(this); 5893 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5894 %} 5895 ins_pipe( pipe_slow ); 5896 %} 5897 5898 // --------------------------------- SUB -------------------------------------- 5899 5900 // Bytes vector sub 5901 instruct vsubB(vec dst, vec src) %{ 5902 predicate(UseAVX == 0); 5903 match(Set dst (SubVB dst src)); 5904 format %{ "psubb $dst,$src\t! sub packedB" %} 5905 ins_encode %{ 5906 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5907 %} 5908 ins_pipe( pipe_slow ); 5909 %} 5910 5911 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5912 predicate(UseAVX > 0); 5913 match(Set dst (SubVB src1 src2)); 5914 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5915 ins_encode %{ 5916 int vlen_enc = vector_length_encoding(this); 5917 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5918 %} 5919 ins_pipe( pipe_slow ); 5920 %} 5921 5922 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5923 predicate((UseAVX > 0) && 5924 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5925 match(Set dst (SubVB src (LoadVector mem))); 5926 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5927 ins_encode %{ 5928 int vlen_enc = vector_length_encoding(this); 5929 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5930 %} 5931 ins_pipe( pipe_slow ); 5932 %} 5933 5934 // Shorts/Chars vector sub 5935 instruct vsubS(vec dst, vec src) %{ 5936 predicate(UseAVX == 0); 5937 match(Set dst (SubVS dst src)); 5938 format %{ "psubw $dst,$src\t! sub packedS" %} 5939 ins_encode %{ 5940 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5941 %} 5942 ins_pipe( pipe_slow ); 5943 %} 5944 5945 5946 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5947 predicate(UseAVX > 0); 5948 match(Set dst (SubVS src1 src2)); 5949 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5950 ins_encode %{ 5951 int vlen_enc = vector_length_encoding(this); 5952 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5953 %} 5954 ins_pipe( pipe_slow ); 5955 %} 5956 5957 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5958 predicate((UseAVX > 0) && 5959 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5960 match(Set dst (SubVS src (LoadVector mem))); 5961 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5962 ins_encode %{ 5963 int vlen_enc = vector_length_encoding(this); 5964 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5965 %} 5966 ins_pipe( pipe_slow ); 5967 %} 5968 5969 // Integers vector sub 5970 instruct vsubI(vec dst, vec src) %{ 5971 predicate(UseAVX == 0); 5972 match(Set dst (SubVI dst src)); 5973 format %{ "psubd $dst,$src\t! sub packedI" %} 5974 ins_encode %{ 5975 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5976 %} 5977 ins_pipe( pipe_slow ); 5978 %} 5979 5980 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5981 predicate(UseAVX > 0); 5982 match(Set dst (SubVI src1 src2)); 5983 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5984 ins_encode %{ 5985 int vlen_enc = vector_length_encoding(this); 5986 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5987 %} 5988 ins_pipe( pipe_slow ); 5989 %} 5990 5991 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5992 predicate((UseAVX > 0) && 5993 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5994 match(Set dst (SubVI src (LoadVector mem))); 5995 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5996 ins_encode %{ 5997 int vlen_enc = vector_length_encoding(this); 5998 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5999 %} 6000 ins_pipe( pipe_slow ); 6001 %} 6002 6003 // Longs vector sub 6004 instruct vsubL(vec dst, vec src) %{ 6005 predicate(UseAVX == 0); 6006 match(Set dst (SubVL dst src)); 6007 format %{ "psubq $dst,$src\t! sub packedL" %} 6008 ins_encode %{ 6009 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 6010 %} 6011 ins_pipe( pipe_slow ); 6012 %} 6013 6014 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 6015 predicate(UseAVX > 0); 6016 match(Set dst (SubVL src1 src2)); 6017 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 6018 ins_encode %{ 6019 int vlen_enc = vector_length_encoding(this); 6020 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6021 %} 6022 ins_pipe( pipe_slow ); 6023 %} 6024 6025 6026 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 6027 predicate((UseAVX > 0) && 6028 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6029 match(Set dst (SubVL src (LoadVector mem))); 6030 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 6031 ins_encode %{ 6032 int vlen_enc = vector_length_encoding(this); 6033 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6034 %} 6035 ins_pipe( pipe_slow ); 6036 %} 6037 6038 // Floats vector sub 6039 instruct vsubF(vec dst, vec src) %{ 6040 predicate(UseAVX == 0); 6041 match(Set dst (SubVF dst src)); 6042 format %{ "subps $dst,$src\t! sub packedF" %} 6043 ins_encode %{ 6044 __ subps($dst$$XMMRegister, $src$$XMMRegister); 6045 %} 6046 ins_pipe( pipe_slow ); 6047 %} 6048 6049 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 6050 predicate(UseAVX > 0); 6051 match(Set dst (SubVF src1 src2)); 6052 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 6053 ins_encode %{ 6054 int vlen_enc = vector_length_encoding(this); 6055 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6056 %} 6057 ins_pipe( pipe_slow ); 6058 %} 6059 6060 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 6061 predicate((UseAVX > 0) && 6062 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6063 match(Set dst (SubVF src (LoadVector mem))); 6064 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 6065 ins_encode %{ 6066 int vlen_enc = vector_length_encoding(this); 6067 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6068 %} 6069 ins_pipe( pipe_slow ); 6070 %} 6071 6072 // Doubles vector sub 6073 instruct vsubD(vec dst, vec src) %{ 6074 predicate(UseAVX == 0); 6075 match(Set dst (SubVD dst src)); 6076 format %{ "subpd $dst,$src\t! sub packedD" %} 6077 ins_encode %{ 6078 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 6079 %} 6080 ins_pipe( pipe_slow ); 6081 %} 6082 6083 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 6084 predicate(UseAVX > 0); 6085 match(Set dst (SubVD src1 src2)); 6086 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 6087 ins_encode %{ 6088 int vlen_enc = vector_length_encoding(this); 6089 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6090 %} 6091 ins_pipe( pipe_slow ); 6092 %} 6093 6094 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 6095 predicate((UseAVX > 0) && 6096 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6097 match(Set dst (SubVD src (LoadVector mem))); 6098 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 6099 ins_encode %{ 6100 int vlen_enc = vector_length_encoding(this); 6101 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6102 %} 6103 ins_pipe( pipe_slow ); 6104 %} 6105 6106 // --------------------------------- MUL -------------------------------------- 6107 6108 // Byte vector mul 6109 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 6110 predicate(Matcher::vector_length_in_bytes(n) <= 8); 6111 match(Set dst (MulVB src1 src2)); 6112 effect(TEMP dst, TEMP xtmp); 6113 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6114 ins_encode %{ 6115 assert(UseSSE > 3, "required"); 6116 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 6117 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 6118 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6119 __ psllw($dst$$XMMRegister, 8); 6120 __ psrlw($dst$$XMMRegister, 8); 6121 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6122 %} 6123 ins_pipe( pipe_slow ); 6124 %} 6125 6126 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 6127 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 6128 match(Set dst (MulVB src1 src2)); 6129 effect(TEMP dst, TEMP xtmp); 6130 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6131 ins_encode %{ 6132 assert(UseSSE > 3, "required"); 6133 // Odd-index elements 6134 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 6135 __ psrlw($dst$$XMMRegister, 8); 6136 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 6137 __ psrlw($xtmp$$XMMRegister, 8); 6138 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6139 __ psllw($dst$$XMMRegister, 8); 6140 // Even-index elements 6141 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6142 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 6143 __ psllw($xtmp$$XMMRegister, 8); 6144 __ psrlw($xtmp$$XMMRegister, 8); 6145 // Combine 6146 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 6147 %} 6148 ins_pipe( pipe_slow ); 6149 %} 6150 6151 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6152 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 6153 match(Set dst (MulVB src1 src2)); 6154 effect(TEMP xtmp1, TEMP xtmp2); 6155 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6156 ins_encode %{ 6157 int vlen_enc = vector_length_encoding(this); 6158 // Odd-index elements 6159 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 6160 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 6161 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6162 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 6163 // Even-index elements 6164 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6165 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6166 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6167 // Combine 6168 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6169 %} 6170 ins_pipe( pipe_slow ); 6171 %} 6172 6173 // Shorts/Chars vector mul 6174 instruct vmulS(vec dst, vec src) %{ 6175 predicate(UseAVX == 0); 6176 match(Set dst (MulVS dst src)); 6177 format %{ "pmullw $dst,$src\t! mul packedS" %} 6178 ins_encode %{ 6179 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6180 %} 6181 ins_pipe( pipe_slow ); 6182 %} 6183 6184 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 6185 predicate(UseAVX > 0); 6186 match(Set dst (MulVS src1 src2)); 6187 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 6188 ins_encode %{ 6189 int vlen_enc = vector_length_encoding(this); 6190 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6191 %} 6192 ins_pipe( pipe_slow ); 6193 %} 6194 6195 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 6196 predicate((UseAVX > 0) && 6197 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6198 match(Set dst (MulVS src (LoadVector mem))); 6199 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 6200 ins_encode %{ 6201 int vlen_enc = vector_length_encoding(this); 6202 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6203 %} 6204 ins_pipe( pipe_slow ); 6205 %} 6206 6207 // Integers vector mul 6208 instruct vmulI(vec dst, vec src) %{ 6209 predicate(UseAVX == 0); 6210 match(Set dst (MulVI dst src)); 6211 format %{ "pmulld $dst,$src\t! mul packedI" %} 6212 ins_encode %{ 6213 assert(UseSSE > 3, "required"); 6214 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6215 %} 6216 ins_pipe( pipe_slow ); 6217 %} 6218 6219 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 6220 predicate(UseAVX > 0); 6221 match(Set dst (MulVI src1 src2)); 6222 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 6223 ins_encode %{ 6224 int vlen_enc = vector_length_encoding(this); 6225 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6226 %} 6227 ins_pipe( pipe_slow ); 6228 %} 6229 6230 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 6231 predicate((UseAVX > 0) && 6232 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6233 match(Set dst (MulVI src (LoadVector mem))); 6234 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 6235 ins_encode %{ 6236 int vlen_enc = vector_length_encoding(this); 6237 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6238 %} 6239 ins_pipe( pipe_slow ); 6240 %} 6241 6242 // Longs vector mul 6243 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 6244 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6245 VM_Version::supports_avx512dq()) || 6246 VM_Version::supports_avx512vldq()); 6247 match(Set dst (MulVL src1 src2)); 6248 ins_cost(500); 6249 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 6250 ins_encode %{ 6251 assert(UseAVX > 2, "required"); 6252 int vlen_enc = vector_length_encoding(this); 6253 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6254 %} 6255 ins_pipe( pipe_slow ); 6256 %} 6257 6258 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 6259 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6260 VM_Version::supports_avx512dq()) || 6261 (Matcher::vector_length_in_bytes(n) > 8 && 6262 VM_Version::supports_avx512vldq())); 6263 match(Set dst (MulVL src (LoadVector mem))); 6264 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 6265 ins_cost(500); 6266 ins_encode %{ 6267 assert(UseAVX > 2, "required"); 6268 int vlen_enc = vector_length_encoding(this); 6269 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6270 %} 6271 ins_pipe( pipe_slow ); 6272 %} 6273 6274 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 6275 predicate(UseAVX == 0); 6276 match(Set dst (MulVL src1 src2)); 6277 ins_cost(500); 6278 effect(TEMP dst, TEMP xtmp); 6279 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6280 ins_encode %{ 6281 assert(VM_Version::supports_sse4_1(), "required"); 6282 // Get the lo-hi products, only the lower 32 bits is in concerns 6283 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 6284 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 6285 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 6286 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 6287 __ psllq($dst$$XMMRegister, 32); 6288 // Get the lo-lo products 6289 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6290 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 6291 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 6292 %} 6293 ins_pipe( pipe_slow ); 6294 %} 6295 6296 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6297 predicate(UseAVX > 0 && 6298 ((Matcher::vector_length_in_bytes(n) == 64 && 6299 !VM_Version::supports_avx512dq()) || 6300 (Matcher::vector_length_in_bytes(n) < 64 && 6301 !VM_Version::supports_avx512vldq()))); 6302 match(Set dst (MulVL src1 src2)); 6303 effect(TEMP xtmp1, TEMP xtmp2); 6304 ins_cost(500); 6305 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6306 ins_encode %{ 6307 int vlen_enc = vector_length_encoding(this); 6308 // Get the lo-hi products, only the lower 32 bits is in concerns 6309 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 6310 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6311 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 6312 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6313 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 6314 // Get the lo-lo products 6315 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6316 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6317 %} 6318 ins_pipe( pipe_slow ); 6319 %} 6320 6321 instruct vmuludq_reg(vec dst, vec src1, vec src2) %{ 6322 predicate(UseAVX > 0 && n->as_MulVL()->has_uint_inputs()); 6323 match(Set dst (MulVL src1 src2)); 6324 ins_cost(100); 6325 format %{ "vpmuludq $dst,$src1,$src2\t! muludq packedL" %} 6326 ins_encode %{ 6327 int vlen_enc = vector_length_encoding(this); 6328 __ vpmuludq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6329 %} 6330 ins_pipe( pipe_slow ); 6331 %} 6332 6333 instruct vmuldq_reg(vec dst, vec src1, vec src2) %{ 6334 predicate(UseAVX > 0 && n->as_MulVL()->has_int_inputs()); 6335 match(Set dst (MulVL src1 src2)); 6336 ins_cost(100); 6337 format %{ "vpmuldq $dst,$src1,$src2\t! muldq packedL" %} 6338 ins_encode %{ 6339 int vlen_enc = vector_length_encoding(this); 6340 __ vpmuldq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6341 %} 6342 ins_pipe( pipe_slow ); 6343 %} 6344 6345 // Floats vector mul 6346 instruct vmulF(vec dst, vec src) %{ 6347 predicate(UseAVX == 0); 6348 match(Set dst (MulVF dst src)); 6349 format %{ "mulps $dst,$src\t! mul packedF" %} 6350 ins_encode %{ 6351 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6352 %} 6353 ins_pipe( pipe_slow ); 6354 %} 6355 6356 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 6357 predicate(UseAVX > 0); 6358 match(Set dst (MulVF src1 src2)); 6359 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 6360 ins_encode %{ 6361 int vlen_enc = vector_length_encoding(this); 6362 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6363 %} 6364 ins_pipe( pipe_slow ); 6365 %} 6366 6367 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 6368 predicate((UseAVX > 0) && 6369 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6370 match(Set dst (MulVF src (LoadVector mem))); 6371 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 6372 ins_encode %{ 6373 int vlen_enc = vector_length_encoding(this); 6374 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6375 %} 6376 ins_pipe( pipe_slow ); 6377 %} 6378 6379 // Doubles vector mul 6380 instruct vmulD(vec dst, vec src) %{ 6381 predicate(UseAVX == 0); 6382 match(Set dst (MulVD dst src)); 6383 format %{ "mulpd $dst,$src\t! mul packedD" %} 6384 ins_encode %{ 6385 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6386 %} 6387 ins_pipe( pipe_slow ); 6388 %} 6389 6390 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 6391 predicate(UseAVX > 0); 6392 match(Set dst (MulVD src1 src2)); 6393 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 6394 ins_encode %{ 6395 int vlen_enc = vector_length_encoding(this); 6396 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6397 %} 6398 ins_pipe( pipe_slow ); 6399 %} 6400 6401 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 6402 predicate((UseAVX > 0) && 6403 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6404 match(Set dst (MulVD src (LoadVector mem))); 6405 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 6406 ins_encode %{ 6407 int vlen_enc = vector_length_encoding(this); 6408 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6409 %} 6410 ins_pipe( pipe_slow ); 6411 %} 6412 6413 // --------------------------------- DIV -------------------------------------- 6414 6415 // Floats vector div 6416 instruct vdivF(vec dst, vec src) %{ 6417 predicate(UseAVX == 0); 6418 match(Set dst (DivVF dst src)); 6419 format %{ "divps $dst,$src\t! div packedF" %} 6420 ins_encode %{ 6421 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6422 %} 6423 ins_pipe( pipe_slow ); 6424 %} 6425 6426 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 6427 predicate(UseAVX > 0); 6428 match(Set dst (DivVF src1 src2)); 6429 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 6430 ins_encode %{ 6431 int vlen_enc = vector_length_encoding(this); 6432 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6433 %} 6434 ins_pipe( pipe_slow ); 6435 %} 6436 6437 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6438 predicate((UseAVX > 0) && 6439 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6440 match(Set dst (DivVF src (LoadVector mem))); 6441 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6442 ins_encode %{ 6443 int vlen_enc = vector_length_encoding(this); 6444 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6445 %} 6446 ins_pipe( pipe_slow ); 6447 %} 6448 6449 // Doubles vector div 6450 instruct vdivD(vec dst, vec src) %{ 6451 predicate(UseAVX == 0); 6452 match(Set dst (DivVD dst src)); 6453 format %{ "divpd $dst,$src\t! div packedD" %} 6454 ins_encode %{ 6455 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6456 %} 6457 ins_pipe( pipe_slow ); 6458 %} 6459 6460 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6461 predicate(UseAVX > 0); 6462 match(Set dst (DivVD src1 src2)); 6463 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6464 ins_encode %{ 6465 int vlen_enc = vector_length_encoding(this); 6466 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6467 %} 6468 ins_pipe( pipe_slow ); 6469 %} 6470 6471 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6472 predicate((UseAVX > 0) && 6473 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6474 match(Set dst (DivVD src (LoadVector mem))); 6475 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6476 ins_encode %{ 6477 int vlen_enc = vector_length_encoding(this); 6478 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6479 %} 6480 ins_pipe( pipe_slow ); 6481 %} 6482 6483 // ------------------------------ MinMax --------------------------------------- 6484 6485 // Byte, Short, Int vector Min/Max 6486 instruct minmax_reg_sse(vec dst, vec src) %{ 6487 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6488 UseAVX == 0); 6489 match(Set dst (MinV dst src)); 6490 match(Set dst (MaxV dst src)); 6491 format %{ "vector_minmax $dst,$src\t! " %} 6492 ins_encode %{ 6493 assert(UseSSE >= 4, "required"); 6494 6495 int opcode = this->ideal_Opcode(); 6496 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6497 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6498 %} 6499 ins_pipe( pipe_slow ); 6500 %} 6501 6502 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6503 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6504 UseAVX > 0); 6505 match(Set dst (MinV src1 src2)); 6506 match(Set dst (MaxV src1 src2)); 6507 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6508 ins_encode %{ 6509 int opcode = this->ideal_Opcode(); 6510 int vlen_enc = vector_length_encoding(this); 6511 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6512 6513 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6514 %} 6515 ins_pipe( pipe_slow ); 6516 %} 6517 6518 // Long vector Min/Max 6519 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6520 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6521 UseAVX == 0); 6522 match(Set dst (MinV dst src)); 6523 match(Set dst (MaxV src dst)); 6524 effect(TEMP dst, TEMP tmp); 6525 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6526 ins_encode %{ 6527 assert(UseSSE >= 4, "required"); 6528 6529 int opcode = this->ideal_Opcode(); 6530 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6531 assert(elem_bt == T_LONG, "sanity"); 6532 6533 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6534 %} 6535 ins_pipe( pipe_slow ); 6536 %} 6537 6538 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6539 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6540 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6541 match(Set dst (MinV src1 src2)); 6542 match(Set dst (MaxV src1 src2)); 6543 effect(TEMP dst); 6544 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6545 ins_encode %{ 6546 int vlen_enc = vector_length_encoding(this); 6547 int opcode = this->ideal_Opcode(); 6548 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6549 assert(elem_bt == T_LONG, "sanity"); 6550 6551 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6552 %} 6553 ins_pipe( pipe_slow ); 6554 %} 6555 6556 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6557 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6558 Matcher::vector_element_basic_type(n) == T_LONG); 6559 match(Set dst (MinV src1 src2)); 6560 match(Set dst (MaxV src1 src2)); 6561 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6562 ins_encode %{ 6563 assert(UseAVX > 2, "required"); 6564 6565 int vlen_enc = vector_length_encoding(this); 6566 int opcode = this->ideal_Opcode(); 6567 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6568 assert(elem_bt == T_LONG, "sanity"); 6569 6570 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6571 %} 6572 ins_pipe( pipe_slow ); 6573 %} 6574 6575 // Float/Double vector Min/Max 6576 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6577 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6578 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6579 UseAVX > 0); 6580 match(Set dst (MinV a b)); 6581 match(Set dst (MaxV a b)); 6582 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6583 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6584 ins_encode %{ 6585 assert(UseAVX > 0, "required"); 6586 6587 int opcode = this->ideal_Opcode(); 6588 int vlen_enc = vector_length_encoding(this); 6589 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6590 6591 __ vminmax_fp(opcode, elem_bt, 6592 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6593 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6594 %} 6595 ins_pipe( pipe_slow ); 6596 %} 6597 6598 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6599 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6600 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6601 match(Set dst (MinV a b)); 6602 match(Set dst (MaxV a b)); 6603 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6604 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6605 ins_encode %{ 6606 assert(UseAVX > 2, "required"); 6607 6608 int opcode = this->ideal_Opcode(); 6609 int vlen_enc = vector_length_encoding(this); 6610 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6611 6612 __ evminmax_fp(opcode, elem_bt, 6613 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6614 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6615 %} 6616 ins_pipe( pipe_slow ); 6617 %} 6618 6619 // ------------------------------ Unsigned vector Min/Max ---------------------- 6620 6621 instruct vector_uminmax_reg(vec dst, vec a, vec b) %{ 6622 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6623 match(Set dst (UMinV a b)); 6624 match(Set dst (UMaxV a b)); 6625 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6626 ins_encode %{ 6627 int opcode = this->ideal_Opcode(); 6628 int vlen_enc = vector_length_encoding(this); 6629 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6630 assert(is_integral_type(elem_bt), ""); 6631 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, vlen_enc); 6632 %} 6633 ins_pipe( pipe_slow ); 6634 %} 6635 6636 instruct vector_uminmax_mem(vec dst, vec a, memory b) %{ 6637 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_LONG); 6638 match(Set dst (UMinV a (LoadVector b))); 6639 match(Set dst (UMaxV a (LoadVector b))); 6640 format %{ "vector_uminmax $dst,$a,$b\t!" %} 6641 ins_encode %{ 6642 int opcode = this->ideal_Opcode(); 6643 int vlen_enc = vector_length_encoding(this); 6644 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6645 assert(is_integral_type(elem_bt), ""); 6646 __ vpuminmax(opcode, elem_bt, $dst$$XMMRegister, $a$$XMMRegister, $b$$Address, vlen_enc); 6647 %} 6648 ins_pipe( pipe_slow ); 6649 %} 6650 6651 instruct vector_uminmaxq_reg(vec dst, vec a, vec b, vec xtmp1, vec xtmp2) %{ 6652 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_LONG); 6653 match(Set dst (UMinV a b)); 6654 match(Set dst (UMaxV a b)); 6655 effect(TEMP xtmp1, TEMP xtmp2); 6656 format %{ "vector_uminmaxq $dst,$a,$b\t! using xtmp1 and xtmp2 as TEMP" %} 6657 ins_encode %{ 6658 int opcode = this->ideal_Opcode(); 6659 int vlen_enc = vector_length_encoding(this); 6660 __ vpuminmaxq(opcode, $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6661 %} 6662 ins_pipe( pipe_slow ); 6663 %} 6664 6665 instruct vector_uminmax_reg_masked(vec dst, vec src2, kReg mask) %{ 6666 match(Set dst (UMinV (Binary dst src2) mask)); 6667 match(Set dst (UMaxV (Binary dst src2) mask)); 6668 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6669 ins_encode %{ 6670 int vlen_enc = vector_length_encoding(this); 6671 BasicType bt = Matcher::vector_element_basic_type(this); 6672 int opc = this->ideal_Opcode(); 6673 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6674 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 6675 %} 6676 ins_pipe( pipe_slow ); 6677 %} 6678 6679 instruct vector_uminmax_mem_masked(vec dst, memory src2, kReg mask) %{ 6680 match(Set dst (UMinV (Binary dst (LoadVector src2)) mask)); 6681 match(Set dst (UMaxV (Binary dst (LoadVector src2)) mask)); 6682 format %{ "vector_uminmax_masked $dst, $dst, $src2, $mask\t! umin/max masked operation" %} 6683 ins_encode %{ 6684 int vlen_enc = vector_length_encoding(this); 6685 BasicType bt = Matcher::vector_element_basic_type(this); 6686 int opc = this->ideal_Opcode(); 6687 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 6688 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 6689 %} 6690 ins_pipe( pipe_slow ); 6691 %} 6692 6693 // --------------------------------- Signum/CopySign --------------------------- 6694 6695 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6696 match(Set dst (SignumF dst (Binary zero one))); 6697 effect(KILL cr); 6698 format %{ "signumF $dst, $dst" %} 6699 ins_encode %{ 6700 int opcode = this->ideal_Opcode(); 6701 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6702 %} 6703 ins_pipe( pipe_slow ); 6704 %} 6705 6706 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6707 match(Set dst (SignumD dst (Binary zero one))); 6708 effect(KILL cr); 6709 format %{ "signumD $dst, $dst" %} 6710 ins_encode %{ 6711 int opcode = this->ideal_Opcode(); 6712 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6713 %} 6714 ins_pipe( pipe_slow ); 6715 %} 6716 6717 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6718 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6719 match(Set dst (SignumVF src (Binary zero one))); 6720 match(Set dst (SignumVD src (Binary zero one))); 6721 effect(TEMP dst, TEMP xtmp1); 6722 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6723 ins_encode %{ 6724 int opcode = this->ideal_Opcode(); 6725 int vec_enc = vector_length_encoding(this); 6726 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6727 $xtmp1$$XMMRegister, vec_enc); 6728 %} 6729 ins_pipe( pipe_slow ); 6730 %} 6731 6732 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6733 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6734 match(Set dst (SignumVF src (Binary zero one))); 6735 match(Set dst (SignumVD src (Binary zero one))); 6736 effect(TEMP dst, TEMP ktmp1); 6737 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6738 ins_encode %{ 6739 int opcode = this->ideal_Opcode(); 6740 int vec_enc = vector_length_encoding(this); 6741 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6742 $ktmp1$$KRegister, vec_enc); 6743 %} 6744 ins_pipe( pipe_slow ); 6745 %} 6746 6747 // --------------------------------------- 6748 // For copySign use 0xE4 as writemask for vpternlog 6749 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6750 // C (xmm2) is set to 0x7FFFFFFF 6751 // Wherever xmm2 is 0, we want to pick from B (sign) 6752 // Wherever xmm2 is 1, we want to pick from A (src) 6753 // 6754 // A B C Result 6755 // 0 0 0 0 6756 // 0 0 1 0 6757 // 0 1 0 1 6758 // 0 1 1 0 6759 // 1 0 0 0 6760 // 1 0 1 1 6761 // 1 1 0 1 6762 // 1 1 1 1 6763 // 6764 // Result going from high bit to low bit is 0x11100100 = 0xe4 6765 // --------------------------------------- 6766 6767 #ifdef _LP64 6768 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6769 match(Set dst (CopySignF dst src)); 6770 effect(TEMP tmp1, TEMP tmp2); 6771 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6772 ins_encode %{ 6773 __ movl($tmp2$$Register, 0x7FFFFFFF); 6774 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6775 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6776 %} 6777 ins_pipe( pipe_slow ); 6778 %} 6779 6780 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6781 match(Set dst (CopySignD dst (Binary src zero))); 6782 ins_cost(100); 6783 effect(TEMP tmp1, TEMP tmp2); 6784 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6785 ins_encode %{ 6786 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6787 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6788 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6789 %} 6790 ins_pipe( pipe_slow ); 6791 %} 6792 6793 #endif // _LP64 6794 6795 //----------------------------- CompressBits/ExpandBits ------------------------ 6796 6797 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6798 predicate(n->bottom_type()->isa_int()); 6799 match(Set dst (CompressBits src mask)); 6800 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6801 ins_encode %{ 6802 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6803 %} 6804 ins_pipe( pipe_slow ); 6805 %} 6806 6807 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6808 predicate(n->bottom_type()->isa_int()); 6809 match(Set dst (ExpandBits src mask)); 6810 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6811 ins_encode %{ 6812 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6813 %} 6814 ins_pipe( pipe_slow ); 6815 %} 6816 6817 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6818 predicate(n->bottom_type()->isa_int()); 6819 match(Set dst (CompressBits src (LoadI mask))); 6820 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6821 ins_encode %{ 6822 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6823 %} 6824 ins_pipe( pipe_slow ); 6825 %} 6826 6827 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6828 predicate(n->bottom_type()->isa_int()); 6829 match(Set dst (ExpandBits src (LoadI mask))); 6830 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6831 ins_encode %{ 6832 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6833 %} 6834 ins_pipe( pipe_slow ); 6835 %} 6836 6837 // --------------------------------- Sqrt -------------------------------------- 6838 6839 instruct vsqrtF_reg(vec dst, vec src) %{ 6840 match(Set dst (SqrtVF src)); 6841 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6842 ins_encode %{ 6843 assert(UseAVX > 0, "required"); 6844 int vlen_enc = vector_length_encoding(this); 6845 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6846 %} 6847 ins_pipe( pipe_slow ); 6848 %} 6849 6850 instruct vsqrtF_mem(vec dst, memory mem) %{ 6851 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6852 match(Set dst (SqrtVF (LoadVector mem))); 6853 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6854 ins_encode %{ 6855 assert(UseAVX > 0, "required"); 6856 int vlen_enc = vector_length_encoding(this); 6857 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6858 %} 6859 ins_pipe( pipe_slow ); 6860 %} 6861 6862 // Floating point vector sqrt 6863 instruct vsqrtD_reg(vec dst, vec src) %{ 6864 match(Set dst (SqrtVD src)); 6865 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6866 ins_encode %{ 6867 assert(UseAVX > 0, "required"); 6868 int vlen_enc = vector_length_encoding(this); 6869 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6870 %} 6871 ins_pipe( pipe_slow ); 6872 %} 6873 6874 instruct vsqrtD_mem(vec dst, memory mem) %{ 6875 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6876 match(Set dst (SqrtVD (LoadVector mem))); 6877 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6878 ins_encode %{ 6879 assert(UseAVX > 0, "required"); 6880 int vlen_enc = vector_length_encoding(this); 6881 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6882 %} 6883 ins_pipe( pipe_slow ); 6884 %} 6885 6886 // ------------------------------ Shift --------------------------------------- 6887 6888 // Left and right shift count vectors are the same on x86 6889 // (only lowest bits of xmm reg are used for count). 6890 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6891 match(Set dst (LShiftCntV cnt)); 6892 match(Set dst (RShiftCntV cnt)); 6893 format %{ "movdl $dst,$cnt\t! load shift count" %} 6894 ins_encode %{ 6895 __ movdl($dst$$XMMRegister, $cnt$$Register); 6896 %} 6897 ins_pipe( pipe_slow ); 6898 %} 6899 6900 // Byte vector shift 6901 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6902 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6903 match(Set dst ( LShiftVB src shift)); 6904 match(Set dst ( RShiftVB src shift)); 6905 match(Set dst (URShiftVB src shift)); 6906 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6907 format %{"vector_byte_shift $dst,$src,$shift" %} 6908 ins_encode %{ 6909 assert(UseSSE > 3, "required"); 6910 int opcode = this->ideal_Opcode(); 6911 bool sign = (opcode != Op_URShiftVB); 6912 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6913 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6914 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6915 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6916 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6917 %} 6918 ins_pipe( pipe_slow ); 6919 %} 6920 6921 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6922 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6923 UseAVX <= 1); 6924 match(Set dst ( LShiftVB src shift)); 6925 match(Set dst ( RShiftVB src shift)); 6926 match(Set dst (URShiftVB src shift)); 6927 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6928 format %{"vector_byte_shift $dst,$src,$shift" %} 6929 ins_encode %{ 6930 assert(UseSSE > 3, "required"); 6931 int opcode = this->ideal_Opcode(); 6932 bool sign = (opcode != Op_URShiftVB); 6933 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6934 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6935 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6936 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6937 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6938 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6939 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6940 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6941 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6942 %} 6943 ins_pipe( pipe_slow ); 6944 %} 6945 6946 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6947 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6948 UseAVX > 1); 6949 match(Set dst ( LShiftVB src shift)); 6950 match(Set dst ( RShiftVB src shift)); 6951 match(Set dst (URShiftVB src shift)); 6952 effect(TEMP dst, TEMP tmp); 6953 format %{"vector_byte_shift $dst,$src,$shift" %} 6954 ins_encode %{ 6955 int opcode = this->ideal_Opcode(); 6956 bool sign = (opcode != Op_URShiftVB); 6957 int vlen_enc = Assembler::AVX_256bit; 6958 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6959 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6960 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6961 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6962 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6963 %} 6964 ins_pipe( pipe_slow ); 6965 %} 6966 6967 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6968 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6969 match(Set dst ( LShiftVB src shift)); 6970 match(Set dst ( RShiftVB src shift)); 6971 match(Set dst (URShiftVB src shift)); 6972 effect(TEMP dst, TEMP tmp); 6973 format %{"vector_byte_shift $dst,$src,$shift" %} 6974 ins_encode %{ 6975 assert(UseAVX > 1, "required"); 6976 int opcode = this->ideal_Opcode(); 6977 bool sign = (opcode != Op_URShiftVB); 6978 int vlen_enc = Assembler::AVX_256bit; 6979 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6980 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6981 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6982 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6983 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6984 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6985 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6986 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6987 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6988 %} 6989 ins_pipe( pipe_slow ); 6990 %} 6991 6992 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6993 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6994 match(Set dst ( LShiftVB src shift)); 6995 match(Set dst (RShiftVB src shift)); 6996 match(Set dst (URShiftVB src shift)); 6997 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6998 format %{"vector_byte_shift $dst,$src,$shift" %} 6999 ins_encode %{ 7000 assert(UseAVX > 2, "required"); 7001 int opcode = this->ideal_Opcode(); 7002 bool sign = (opcode != Op_URShiftVB); 7003 int vlen_enc = Assembler::AVX_512bit; 7004 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 7005 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 7006 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7007 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7008 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7009 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 7010 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7011 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7012 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7013 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 7014 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 7015 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7016 %} 7017 ins_pipe( pipe_slow ); 7018 %} 7019 7020 // Shorts vector logical right shift produces incorrect Java result 7021 // for negative data because java code convert short value into int with 7022 // sign extension before a shift. But char vectors are fine since chars are 7023 // unsigned values. 7024 // Shorts/Chars vector left shift 7025 instruct vshiftS(vec dst, vec src, vec shift) %{ 7026 predicate(!n->as_ShiftV()->is_var_shift()); 7027 match(Set dst ( LShiftVS src shift)); 7028 match(Set dst ( RShiftVS src shift)); 7029 match(Set dst (URShiftVS src shift)); 7030 effect(TEMP dst, USE src, USE shift); 7031 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 7032 ins_encode %{ 7033 int opcode = this->ideal_Opcode(); 7034 if (UseAVX > 0) { 7035 int vlen_enc = vector_length_encoding(this); 7036 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7037 } else { 7038 int vlen = Matcher::vector_length(this); 7039 if (vlen == 2) { 7040 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7041 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7042 } else if (vlen == 4) { 7043 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7044 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7045 } else { 7046 assert (vlen == 8, "sanity"); 7047 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7048 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7049 } 7050 } 7051 %} 7052 ins_pipe( pipe_slow ); 7053 %} 7054 7055 // Integers vector left shift 7056 instruct vshiftI(vec dst, vec src, vec shift) %{ 7057 predicate(!n->as_ShiftV()->is_var_shift()); 7058 match(Set dst ( LShiftVI src shift)); 7059 match(Set dst ( RShiftVI src shift)); 7060 match(Set dst (URShiftVI src shift)); 7061 effect(TEMP dst, USE src, USE shift); 7062 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 7063 ins_encode %{ 7064 int opcode = this->ideal_Opcode(); 7065 if (UseAVX > 0) { 7066 int vlen_enc = vector_length_encoding(this); 7067 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7068 } else { 7069 int vlen = Matcher::vector_length(this); 7070 if (vlen == 2) { 7071 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7072 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7073 } else { 7074 assert(vlen == 4, "sanity"); 7075 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7076 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7077 } 7078 } 7079 %} 7080 ins_pipe( pipe_slow ); 7081 %} 7082 7083 // Integers vector left constant shift 7084 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 7085 match(Set dst (LShiftVI src (LShiftCntV shift))); 7086 match(Set dst (RShiftVI src (RShiftCntV shift))); 7087 match(Set dst (URShiftVI src (RShiftCntV shift))); 7088 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 7089 ins_encode %{ 7090 int opcode = this->ideal_Opcode(); 7091 if (UseAVX > 0) { 7092 int vector_len = vector_length_encoding(this); 7093 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 7094 } else { 7095 int vlen = Matcher::vector_length(this); 7096 if (vlen == 2) { 7097 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 7098 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7099 } else { 7100 assert(vlen == 4, "sanity"); 7101 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7102 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7103 } 7104 } 7105 %} 7106 ins_pipe( pipe_slow ); 7107 %} 7108 7109 // Longs vector shift 7110 instruct vshiftL(vec dst, vec src, vec shift) %{ 7111 predicate(!n->as_ShiftV()->is_var_shift()); 7112 match(Set dst ( LShiftVL src shift)); 7113 match(Set dst (URShiftVL src shift)); 7114 effect(TEMP dst, USE src, USE shift); 7115 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 7116 ins_encode %{ 7117 int opcode = this->ideal_Opcode(); 7118 if (UseAVX > 0) { 7119 int vlen_enc = vector_length_encoding(this); 7120 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7121 } else { 7122 assert(Matcher::vector_length(this) == 2, ""); 7123 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7124 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 7125 } 7126 %} 7127 ins_pipe( pipe_slow ); 7128 %} 7129 7130 // Longs vector constant shift 7131 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 7132 match(Set dst (LShiftVL src (LShiftCntV shift))); 7133 match(Set dst (URShiftVL src (RShiftCntV shift))); 7134 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 7135 ins_encode %{ 7136 int opcode = this->ideal_Opcode(); 7137 if (UseAVX > 0) { 7138 int vector_len = vector_length_encoding(this); 7139 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 7140 } else { 7141 assert(Matcher::vector_length(this) == 2, ""); 7142 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7143 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 7144 } 7145 %} 7146 ins_pipe( pipe_slow ); 7147 %} 7148 7149 // -------------------ArithmeticRightShift ----------------------------------- 7150 // Long vector arithmetic right shift 7151 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 7152 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 7153 match(Set dst (RShiftVL src shift)); 7154 effect(TEMP dst, TEMP tmp); 7155 format %{ "vshiftq $dst,$src,$shift" %} 7156 ins_encode %{ 7157 uint vlen = Matcher::vector_length(this); 7158 if (vlen == 2) { 7159 assert(UseSSE >= 2, "required"); 7160 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 7161 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 7162 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 7163 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 7164 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 7165 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 7166 } else { 7167 assert(vlen == 4, "sanity"); 7168 assert(UseAVX > 1, "required"); 7169 int vlen_enc = Assembler::AVX_256bit; 7170 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7171 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 7172 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7173 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7174 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 7175 } 7176 %} 7177 ins_pipe( pipe_slow ); 7178 %} 7179 7180 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 7181 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 7182 match(Set dst (RShiftVL src shift)); 7183 format %{ "vshiftq $dst,$src,$shift" %} 7184 ins_encode %{ 7185 int vlen_enc = vector_length_encoding(this); 7186 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7187 %} 7188 ins_pipe( pipe_slow ); 7189 %} 7190 7191 // ------------------- Variable Shift ----------------------------- 7192 // Byte variable shift 7193 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7194 predicate(Matcher::vector_length(n) <= 8 && 7195 n->as_ShiftV()->is_var_shift() && 7196 !VM_Version::supports_avx512bw()); 7197 match(Set dst ( LShiftVB src shift)); 7198 match(Set dst ( RShiftVB src shift)); 7199 match(Set dst (URShiftVB src shift)); 7200 effect(TEMP dst, TEMP vtmp); 7201 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7202 ins_encode %{ 7203 assert(UseAVX >= 2, "required"); 7204 7205 int opcode = this->ideal_Opcode(); 7206 int vlen_enc = Assembler::AVX_128bit; 7207 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7208 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7209 %} 7210 ins_pipe( pipe_slow ); 7211 %} 7212 7213 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7214 predicate(Matcher::vector_length(n) == 16 && 7215 n->as_ShiftV()->is_var_shift() && 7216 !VM_Version::supports_avx512bw()); 7217 match(Set dst ( LShiftVB src shift)); 7218 match(Set dst ( RShiftVB src shift)); 7219 match(Set dst (URShiftVB src shift)); 7220 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7221 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7222 ins_encode %{ 7223 assert(UseAVX >= 2, "required"); 7224 7225 int opcode = this->ideal_Opcode(); 7226 int vlen_enc = Assembler::AVX_128bit; 7227 // Shift lower half and get word result in dst 7228 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7229 7230 // Shift upper half and get word result in vtmp1 7231 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7232 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7233 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7234 7235 // Merge and down convert the two word results to byte in dst 7236 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7237 %} 7238 ins_pipe( pipe_slow ); 7239 %} 7240 7241 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 7242 predicate(Matcher::vector_length(n) == 32 && 7243 n->as_ShiftV()->is_var_shift() && 7244 !VM_Version::supports_avx512bw()); 7245 match(Set dst ( LShiftVB src shift)); 7246 match(Set dst ( RShiftVB src shift)); 7247 match(Set dst (URShiftVB src shift)); 7248 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 7249 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 7250 ins_encode %{ 7251 assert(UseAVX >= 2, "required"); 7252 7253 int opcode = this->ideal_Opcode(); 7254 int vlen_enc = Assembler::AVX_128bit; 7255 // Process lower 128 bits and get result in dst 7256 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7257 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7258 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7259 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7260 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7261 7262 // Process higher 128 bits and get result in vtmp3 7263 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7264 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7265 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 7266 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 7267 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 7268 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7269 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 7270 7271 // Merge the two results in dst 7272 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7273 %} 7274 ins_pipe( pipe_slow ); 7275 %} 7276 7277 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 7278 predicate(Matcher::vector_length(n) <= 32 && 7279 n->as_ShiftV()->is_var_shift() && 7280 VM_Version::supports_avx512bw()); 7281 match(Set dst ( LShiftVB src shift)); 7282 match(Set dst ( RShiftVB src shift)); 7283 match(Set dst (URShiftVB src shift)); 7284 effect(TEMP dst, TEMP vtmp); 7285 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7286 ins_encode %{ 7287 assert(UseAVX > 2, "required"); 7288 7289 int opcode = this->ideal_Opcode(); 7290 int vlen_enc = vector_length_encoding(this); 7291 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7292 %} 7293 ins_pipe( pipe_slow ); 7294 %} 7295 7296 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7297 predicate(Matcher::vector_length(n) == 64 && 7298 n->as_ShiftV()->is_var_shift() && 7299 VM_Version::supports_avx512bw()); 7300 match(Set dst ( LShiftVB src shift)); 7301 match(Set dst ( RShiftVB src shift)); 7302 match(Set dst (URShiftVB src shift)); 7303 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7304 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7305 ins_encode %{ 7306 assert(UseAVX > 2, "required"); 7307 7308 int opcode = this->ideal_Opcode(); 7309 int vlen_enc = Assembler::AVX_256bit; 7310 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7311 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7312 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7313 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7314 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7315 %} 7316 ins_pipe( pipe_slow ); 7317 %} 7318 7319 // Short variable shift 7320 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7321 predicate(Matcher::vector_length(n) <= 8 && 7322 n->as_ShiftV()->is_var_shift() && 7323 !VM_Version::supports_avx512bw()); 7324 match(Set dst ( LShiftVS src shift)); 7325 match(Set dst ( RShiftVS src shift)); 7326 match(Set dst (URShiftVS src shift)); 7327 effect(TEMP dst, TEMP vtmp); 7328 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7329 ins_encode %{ 7330 assert(UseAVX >= 2, "required"); 7331 7332 int opcode = this->ideal_Opcode(); 7333 bool sign = (opcode != Op_URShiftVS); 7334 int vlen_enc = Assembler::AVX_256bit; 7335 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 7336 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 7337 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7338 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7339 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 7340 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7341 %} 7342 ins_pipe( pipe_slow ); 7343 %} 7344 7345 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7346 predicate(Matcher::vector_length(n) == 16 && 7347 n->as_ShiftV()->is_var_shift() && 7348 !VM_Version::supports_avx512bw()); 7349 match(Set dst ( LShiftVS src shift)); 7350 match(Set dst ( RShiftVS src shift)); 7351 match(Set dst (URShiftVS src shift)); 7352 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7353 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7354 ins_encode %{ 7355 assert(UseAVX >= 2, "required"); 7356 7357 int opcode = this->ideal_Opcode(); 7358 bool sign = (opcode != Op_URShiftVS); 7359 int vlen_enc = Assembler::AVX_256bit; 7360 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 7361 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7362 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7363 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7364 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7365 7366 // Shift upper half, with result in dst using vtmp1 as TEMP 7367 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 7368 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 7369 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7370 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7371 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7372 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7373 7374 // Merge lower and upper half result into dst 7375 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7376 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 7377 %} 7378 ins_pipe( pipe_slow ); 7379 %} 7380 7381 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 7382 predicate(n->as_ShiftV()->is_var_shift() && 7383 VM_Version::supports_avx512bw()); 7384 match(Set dst ( LShiftVS src shift)); 7385 match(Set dst ( RShiftVS src shift)); 7386 match(Set dst (URShiftVS src shift)); 7387 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 7388 ins_encode %{ 7389 assert(UseAVX > 2, "required"); 7390 7391 int opcode = this->ideal_Opcode(); 7392 int vlen_enc = vector_length_encoding(this); 7393 if (!VM_Version::supports_avx512vl()) { 7394 vlen_enc = Assembler::AVX_512bit; 7395 } 7396 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7397 %} 7398 ins_pipe( pipe_slow ); 7399 %} 7400 7401 //Integer variable shift 7402 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 7403 predicate(n->as_ShiftV()->is_var_shift()); 7404 match(Set dst ( LShiftVI src shift)); 7405 match(Set dst ( RShiftVI src shift)); 7406 match(Set dst (URShiftVI src shift)); 7407 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 7408 ins_encode %{ 7409 assert(UseAVX >= 2, "required"); 7410 7411 int opcode = this->ideal_Opcode(); 7412 int vlen_enc = vector_length_encoding(this); 7413 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7414 %} 7415 ins_pipe( pipe_slow ); 7416 %} 7417 7418 //Long variable shift 7419 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 7420 predicate(n->as_ShiftV()->is_var_shift()); 7421 match(Set dst ( LShiftVL src shift)); 7422 match(Set dst (URShiftVL src shift)); 7423 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 7424 ins_encode %{ 7425 assert(UseAVX >= 2, "required"); 7426 7427 int opcode = this->ideal_Opcode(); 7428 int vlen_enc = vector_length_encoding(this); 7429 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7430 %} 7431 ins_pipe( pipe_slow ); 7432 %} 7433 7434 //Long variable right shift arithmetic 7435 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 7436 predicate(Matcher::vector_length(n) <= 4 && 7437 n->as_ShiftV()->is_var_shift() && 7438 UseAVX == 2); 7439 match(Set dst (RShiftVL src shift)); 7440 effect(TEMP dst, TEMP vtmp); 7441 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 7442 ins_encode %{ 7443 int opcode = this->ideal_Opcode(); 7444 int vlen_enc = vector_length_encoding(this); 7445 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 7446 $vtmp$$XMMRegister); 7447 %} 7448 ins_pipe( pipe_slow ); 7449 %} 7450 7451 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 7452 predicate(n->as_ShiftV()->is_var_shift() && 7453 UseAVX > 2); 7454 match(Set dst (RShiftVL src shift)); 7455 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 7456 ins_encode %{ 7457 int opcode = this->ideal_Opcode(); 7458 int vlen_enc = vector_length_encoding(this); 7459 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7460 %} 7461 ins_pipe( pipe_slow ); 7462 %} 7463 7464 // --------------------------------- AND -------------------------------------- 7465 7466 instruct vand(vec dst, vec src) %{ 7467 predicate(UseAVX == 0); 7468 match(Set dst (AndV dst src)); 7469 format %{ "pand $dst,$src\t! and vectors" %} 7470 ins_encode %{ 7471 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7472 %} 7473 ins_pipe( pipe_slow ); 7474 %} 7475 7476 instruct vand_reg(vec dst, vec src1, vec src2) %{ 7477 predicate(UseAVX > 0); 7478 match(Set dst (AndV src1 src2)); 7479 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 7480 ins_encode %{ 7481 int vlen_enc = vector_length_encoding(this); 7482 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7483 %} 7484 ins_pipe( pipe_slow ); 7485 %} 7486 7487 instruct vand_mem(vec dst, vec src, memory mem) %{ 7488 predicate((UseAVX > 0) && 7489 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7490 match(Set dst (AndV src (LoadVector mem))); 7491 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 7492 ins_encode %{ 7493 int vlen_enc = vector_length_encoding(this); 7494 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7495 %} 7496 ins_pipe( pipe_slow ); 7497 %} 7498 7499 // --------------------------------- OR --------------------------------------- 7500 7501 instruct vor(vec dst, vec src) %{ 7502 predicate(UseAVX == 0); 7503 match(Set dst (OrV dst src)); 7504 format %{ "por $dst,$src\t! or vectors" %} 7505 ins_encode %{ 7506 __ por($dst$$XMMRegister, $src$$XMMRegister); 7507 %} 7508 ins_pipe( pipe_slow ); 7509 %} 7510 7511 instruct vor_reg(vec dst, vec src1, vec src2) %{ 7512 predicate(UseAVX > 0); 7513 match(Set dst (OrV src1 src2)); 7514 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7515 ins_encode %{ 7516 int vlen_enc = vector_length_encoding(this); 7517 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7518 %} 7519 ins_pipe( pipe_slow ); 7520 %} 7521 7522 instruct vor_mem(vec dst, vec src, memory mem) %{ 7523 predicate((UseAVX > 0) && 7524 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7525 match(Set dst (OrV src (LoadVector mem))); 7526 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7527 ins_encode %{ 7528 int vlen_enc = vector_length_encoding(this); 7529 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7530 %} 7531 ins_pipe( pipe_slow ); 7532 %} 7533 7534 // --------------------------------- XOR -------------------------------------- 7535 7536 instruct vxor(vec dst, vec src) %{ 7537 predicate(UseAVX == 0); 7538 match(Set dst (XorV dst src)); 7539 format %{ "pxor $dst,$src\t! xor vectors" %} 7540 ins_encode %{ 7541 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7542 %} 7543 ins_pipe( pipe_slow ); 7544 %} 7545 7546 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7547 predicate(UseAVX > 0); 7548 match(Set dst (XorV src1 src2)); 7549 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7550 ins_encode %{ 7551 int vlen_enc = vector_length_encoding(this); 7552 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7553 %} 7554 ins_pipe( pipe_slow ); 7555 %} 7556 7557 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7558 predicate((UseAVX > 0) && 7559 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7560 match(Set dst (XorV src (LoadVector mem))); 7561 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7562 ins_encode %{ 7563 int vlen_enc = vector_length_encoding(this); 7564 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7565 %} 7566 ins_pipe( pipe_slow ); 7567 %} 7568 7569 // --------------------------------- VectorCast -------------------------------------- 7570 7571 instruct vcastBtoX(vec dst, vec src) %{ 7572 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE); 7573 match(Set dst (VectorCastB2X src)); 7574 format %{ "vector_cast_b2x $dst,$src\t!" %} 7575 ins_encode %{ 7576 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7577 int vlen_enc = vector_length_encoding(this); 7578 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7579 %} 7580 ins_pipe( pipe_slow ); 7581 %} 7582 7583 instruct vcastBtoD(legVec dst, legVec src) %{ 7584 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE); 7585 match(Set dst (VectorCastB2X src)); 7586 format %{ "vector_cast_b2x $dst,$src\t!" %} 7587 ins_encode %{ 7588 int vlen_enc = vector_length_encoding(this); 7589 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7590 %} 7591 ins_pipe( pipe_slow ); 7592 %} 7593 7594 instruct castStoX(vec dst, vec src) %{ 7595 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7596 Matcher::vector_length(n->in(1)) <= 8 && // src 7597 Matcher::vector_element_basic_type(n) == T_BYTE); 7598 match(Set dst (VectorCastS2X src)); 7599 format %{ "vector_cast_s2x $dst,$src" %} 7600 ins_encode %{ 7601 assert(UseAVX > 0, "required"); 7602 7603 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7604 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7605 %} 7606 ins_pipe( pipe_slow ); 7607 %} 7608 7609 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7610 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7611 Matcher::vector_length(n->in(1)) == 16 && // src 7612 Matcher::vector_element_basic_type(n) == T_BYTE); 7613 effect(TEMP dst, TEMP vtmp); 7614 match(Set dst (VectorCastS2X src)); 7615 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7616 ins_encode %{ 7617 assert(UseAVX > 0, "required"); 7618 7619 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7620 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7621 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7622 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7623 %} 7624 ins_pipe( pipe_slow ); 7625 %} 7626 7627 instruct vcastStoX_evex(vec dst, vec src) %{ 7628 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7629 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7630 match(Set dst (VectorCastS2X src)); 7631 format %{ "vector_cast_s2x $dst,$src\t!" %} 7632 ins_encode %{ 7633 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7634 int src_vlen_enc = vector_length_encoding(this, $src); 7635 int vlen_enc = vector_length_encoding(this); 7636 switch (to_elem_bt) { 7637 case T_BYTE: 7638 if (!VM_Version::supports_avx512vl()) { 7639 vlen_enc = Assembler::AVX_512bit; 7640 } 7641 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7642 break; 7643 case T_INT: 7644 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7645 break; 7646 case T_FLOAT: 7647 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7648 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7649 break; 7650 case T_LONG: 7651 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7652 break; 7653 case T_DOUBLE: { 7654 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7655 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7656 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7657 break; 7658 } 7659 default: 7660 ShouldNotReachHere(); 7661 } 7662 %} 7663 ins_pipe( pipe_slow ); 7664 %} 7665 7666 instruct castItoX(vec dst, vec src) %{ 7667 predicate(UseAVX <= 2 && 7668 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7669 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7670 match(Set dst (VectorCastI2X src)); 7671 format %{ "vector_cast_i2x $dst,$src" %} 7672 ins_encode %{ 7673 assert(UseAVX > 0, "required"); 7674 7675 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7676 int vlen_enc = vector_length_encoding(this, $src); 7677 7678 if (to_elem_bt == T_BYTE) { 7679 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7680 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7681 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7682 } else { 7683 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7684 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7685 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7686 } 7687 %} 7688 ins_pipe( pipe_slow ); 7689 %} 7690 7691 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7692 predicate(UseAVX <= 2 && 7693 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7694 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7695 match(Set dst (VectorCastI2X src)); 7696 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7697 effect(TEMP dst, TEMP vtmp); 7698 ins_encode %{ 7699 assert(UseAVX > 0, "required"); 7700 7701 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7702 int vlen_enc = vector_length_encoding(this, $src); 7703 7704 if (to_elem_bt == T_BYTE) { 7705 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7706 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7707 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7708 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7709 } else { 7710 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7711 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7712 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7713 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7714 } 7715 %} 7716 ins_pipe( pipe_slow ); 7717 %} 7718 7719 instruct vcastItoX_evex(vec dst, vec src) %{ 7720 predicate(UseAVX > 2 || 7721 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7722 match(Set dst (VectorCastI2X src)); 7723 format %{ "vector_cast_i2x $dst,$src\t!" %} 7724 ins_encode %{ 7725 assert(UseAVX > 0, "required"); 7726 7727 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7728 int src_vlen_enc = vector_length_encoding(this, $src); 7729 int dst_vlen_enc = vector_length_encoding(this); 7730 switch (dst_elem_bt) { 7731 case T_BYTE: 7732 if (!VM_Version::supports_avx512vl()) { 7733 src_vlen_enc = Assembler::AVX_512bit; 7734 } 7735 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7736 break; 7737 case T_SHORT: 7738 if (!VM_Version::supports_avx512vl()) { 7739 src_vlen_enc = Assembler::AVX_512bit; 7740 } 7741 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7742 break; 7743 case T_FLOAT: 7744 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7745 break; 7746 case T_LONG: 7747 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7748 break; 7749 case T_DOUBLE: 7750 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7751 break; 7752 default: 7753 ShouldNotReachHere(); 7754 } 7755 %} 7756 ins_pipe( pipe_slow ); 7757 %} 7758 7759 instruct vcastLtoBS(vec dst, vec src) %{ 7760 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7761 UseAVX <= 2); 7762 match(Set dst (VectorCastL2X src)); 7763 format %{ "vector_cast_l2x $dst,$src" %} 7764 ins_encode %{ 7765 assert(UseAVX > 0, "required"); 7766 7767 int vlen = Matcher::vector_length_in_bytes(this, $src); 7768 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7769 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7770 : ExternalAddress(vector_int_to_short_mask()); 7771 if (vlen <= 16) { 7772 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7773 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7774 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7775 } else { 7776 assert(vlen <= 32, "required"); 7777 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7778 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7779 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7780 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7781 } 7782 if (to_elem_bt == T_BYTE) { 7783 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7784 } 7785 %} 7786 ins_pipe( pipe_slow ); 7787 %} 7788 7789 instruct vcastLtoX_evex(vec dst, vec src) %{ 7790 predicate(UseAVX > 2 || 7791 (Matcher::vector_element_basic_type(n) == T_INT || 7792 Matcher::vector_element_basic_type(n) == T_FLOAT || 7793 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7794 match(Set dst (VectorCastL2X src)); 7795 format %{ "vector_cast_l2x $dst,$src\t!" %} 7796 ins_encode %{ 7797 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7798 int vlen = Matcher::vector_length_in_bytes(this, $src); 7799 int vlen_enc = vector_length_encoding(this, $src); 7800 switch (to_elem_bt) { 7801 case T_BYTE: 7802 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7803 vlen_enc = Assembler::AVX_512bit; 7804 } 7805 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7806 break; 7807 case T_SHORT: 7808 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7809 vlen_enc = Assembler::AVX_512bit; 7810 } 7811 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7812 break; 7813 case T_INT: 7814 if (vlen == 8) { 7815 if ($dst$$XMMRegister != $src$$XMMRegister) { 7816 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7817 } 7818 } else if (vlen == 16) { 7819 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7820 } else if (vlen == 32) { 7821 if (UseAVX > 2) { 7822 if (!VM_Version::supports_avx512vl()) { 7823 vlen_enc = Assembler::AVX_512bit; 7824 } 7825 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7826 } else { 7827 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7828 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7829 } 7830 } else { // vlen == 64 7831 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7832 } 7833 break; 7834 case T_FLOAT: 7835 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7836 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7837 break; 7838 case T_DOUBLE: 7839 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7840 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7841 break; 7842 7843 default: assert(false, "%s", type2name(to_elem_bt)); 7844 } 7845 %} 7846 ins_pipe( pipe_slow ); 7847 %} 7848 7849 instruct vcastFtoD_reg(vec dst, vec src) %{ 7850 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7851 match(Set dst (VectorCastF2X src)); 7852 format %{ "vector_cast_f2d $dst,$src\t!" %} 7853 ins_encode %{ 7854 int vlen_enc = vector_length_encoding(this); 7855 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7856 %} 7857 ins_pipe( pipe_slow ); 7858 %} 7859 7860 7861 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7862 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7863 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7864 match(Set dst (VectorCastF2X src)); 7865 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7866 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7867 ins_encode %{ 7868 int vlen_enc = vector_length_encoding(this, $src); 7869 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7870 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7871 // 32 bit addresses for register indirect addressing mode since stub constants 7872 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7873 // However, targets are free to increase this limit, but having a large code cache size 7874 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7875 // cap we save a temporary register allocation which in limiting case can prevent 7876 // spilling in high register pressure blocks. 7877 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7878 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7879 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7880 %} 7881 ins_pipe( pipe_slow ); 7882 %} 7883 7884 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7885 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7886 is_integral_type(Matcher::vector_element_basic_type(n))); 7887 match(Set dst (VectorCastF2X src)); 7888 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7889 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7890 ins_encode %{ 7891 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7892 if (to_elem_bt == T_LONG) { 7893 int vlen_enc = vector_length_encoding(this); 7894 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7895 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7896 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7897 } else { 7898 int vlen_enc = vector_length_encoding(this, $src); 7899 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7900 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7901 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7902 } 7903 %} 7904 ins_pipe( pipe_slow ); 7905 %} 7906 7907 instruct vcastDtoF_reg(vec dst, vec src) %{ 7908 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7909 match(Set dst (VectorCastD2X src)); 7910 format %{ "vector_cast_d2x $dst,$src\t!" %} 7911 ins_encode %{ 7912 int vlen_enc = vector_length_encoding(this, $src); 7913 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7914 %} 7915 ins_pipe( pipe_slow ); 7916 %} 7917 7918 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7919 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7920 is_integral_type(Matcher::vector_element_basic_type(n))); 7921 match(Set dst (VectorCastD2X src)); 7922 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7923 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7924 ins_encode %{ 7925 int vlen_enc = vector_length_encoding(this, $src); 7926 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7927 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7928 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7929 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7930 %} 7931 ins_pipe( pipe_slow ); 7932 %} 7933 7934 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7935 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7936 is_integral_type(Matcher::vector_element_basic_type(n))); 7937 match(Set dst (VectorCastD2X src)); 7938 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7939 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7940 ins_encode %{ 7941 int vlen_enc = vector_length_encoding(this, $src); 7942 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7943 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7944 ExternalAddress(vector_float_signflip()); 7945 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7946 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7947 %} 7948 ins_pipe( pipe_slow ); 7949 %} 7950 7951 instruct vucast(vec dst, vec src) %{ 7952 match(Set dst (VectorUCastB2X src)); 7953 match(Set dst (VectorUCastS2X src)); 7954 match(Set dst (VectorUCastI2X src)); 7955 format %{ "vector_ucast $dst,$src\t!" %} 7956 ins_encode %{ 7957 assert(UseAVX > 0, "required"); 7958 7959 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7960 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7961 int vlen_enc = vector_length_encoding(this); 7962 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7963 %} 7964 ins_pipe( pipe_slow ); 7965 %} 7966 7967 #ifdef _LP64 7968 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7969 predicate(!VM_Version::supports_avx512vl() && 7970 Matcher::vector_length_in_bytes(n) < 64 && 7971 Matcher::vector_element_basic_type(n) == T_INT); 7972 match(Set dst (RoundVF src)); 7973 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7974 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7975 ins_encode %{ 7976 int vlen_enc = vector_length_encoding(this); 7977 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7978 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7979 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7980 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7981 %} 7982 ins_pipe( pipe_slow ); 7983 %} 7984 7985 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7986 predicate((VM_Version::supports_avx512vl() || 7987 Matcher::vector_length_in_bytes(n) == 64) && 7988 Matcher::vector_element_basic_type(n) == T_INT); 7989 match(Set dst (RoundVF src)); 7990 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7991 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7992 ins_encode %{ 7993 int vlen_enc = vector_length_encoding(this); 7994 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7995 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7996 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7997 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7998 %} 7999 ins_pipe( pipe_slow ); 8000 %} 8001 8002 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 8003 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 8004 match(Set dst (RoundVD src)); 8005 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 8006 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 8007 ins_encode %{ 8008 int vlen_enc = vector_length_encoding(this); 8009 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 8010 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 8011 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 8012 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 8013 %} 8014 ins_pipe( pipe_slow ); 8015 %} 8016 8017 #endif // _LP64 8018 8019 // --------------------------------- VectorMaskCmp -------------------------------------- 8020 8021 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 8022 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8023 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 8024 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8025 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 8026 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8027 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 8028 ins_encode %{ 8029 int vlen_enc = vector_length_encoding(this, $src1); 8030 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 8031 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 8032 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8033 } else { 8034 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8035 } 8036 %} 8037 ins_pipe( pipe_slow ); 8038 %} 8039 8040 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 8041 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 8042 n->bottom_type()->isa_vectmask() == nullptr && 8043 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 8044 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8045 effect(TEMP ktmp); 8046 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 8047 ins_encode %{ 8048 int vlen_enc = Assembler::AVX_512bit; 8049 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 8050 KRegister mask = k0; // The comparison itself is not being masked. 8051 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 8052 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8053 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 8054 } else { 8055 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8056 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 8057 } 8058 %} 8059 ins_pipe( pipe_slow ); 8060 %} 8061 8062 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 8063 predicate(n->bottom_type()->isa_vectmask() && 8064 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 8065 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8066 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 8067 ins_encode %{ 8068 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 8069 int vlen_enc = vector_length_encoding(this, $src1); 8070 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 8071 KRegister mask = k0; // The comparison itself is not being masked. 8072 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 8073 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8074 } else { 8075 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 8076 } 8077 %} 8078 ins_pipe( pipe_slow ); 8079 %} 8080 8081 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 8082 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8083 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8084 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8085 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8086 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 8087 (n->in(2)->get_int() == BoolTest::eq || 8088 n->in(2)->get_int() == BoolTest::lt || 8089 n->in(2)->get_int() == BoolTest::gt)); // cond 8090 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8091 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 8092 ins_encode %{ 8093 int vlen_enc = vector_length_encoding(this, $src1); 8094 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8095 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8096 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 8097 %} 8098 ins_pipe( pipe_slow ); 8099 %} 8100 8101 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 8102 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8103 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8104 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8105 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8106 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 8107 (n->in(2)->get_int() == BoolTest::ne || 8108 n->in(2)->get_int() == BoolTest::le || 8109 n->in(2)->get_int() == BoolTest::ge)); // cond 8110 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8111 effect(TEMP dst, TEMP xtmp); 8112 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 8113 ins_encode %{ 8114 int vlen_enc = vector_length_encoding(this, $src1); 8115 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8116 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8117 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 8118 %} 8119 ins_pipe( pipe_slow ); 8120 %} 8121 8122 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 8123 predicate(n->bottom_type()->isa_vectmask() == nullptr && 8124 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 8125 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 8126 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 8127 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8128 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8129 effect(TEMP dst, TEMP xtmp); 8130 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 8131 ins_encode %{ 8132 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 8133 int vlen_enc = vector_length_encoding(this, $src1); 8134 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8135 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 8136 8137 if (vlen_enc == Assembler::AVX_128bit) { 8138 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 8139 } else { 8140 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 8141 } 8142 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8143 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8144 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 8145 %} 8146 ins_pipe( pipe_slow ); 8147 %} 8148 8149 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 8150 predicate((n->bottom_type()->isa_vectmask() == nullptr && 8151 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 8152 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8153 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8154 effect(TEMP ktmp); 8155 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 8156 ins_encode %{ 8157 assert(UseAVX > 2, "required"); 8158 8159 int vlen_enc = vector_length_encoding(this, $src1); 8160 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8161 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8162 KRegister mask = k0; // The comparison itself is not being masked. 8163 bool merge = false; 8164 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8165 8166 switch (src1_elem_bt) { 8167 case T_INT: { 8168 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8169 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8170 break; 8171 } 8172 case T_LONG: { 8173 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8174 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 8175 break; 8176 } 8177 default: assert(false, "%s", type2name(src1_elem_bt)); 8178 } 8179 %} 8180 ins_pipe( pipe_slow ); 8181 %} 8182 8183 8184 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 8185 predicate(n->bottom_type()->isa_vectmask() && 8186 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8187 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8188 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 8189 ins_encode %{ 8190 assert(UseAVX > 2, "required"); 8191 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 8192 8193 int vlen_enc = vector_length_encoding(this, $src1); 8194 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8195 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8196 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8197 8198 // Comparison i 8199 switch (src1_elem_bt) { 8200 case T_BYTE: { 8201 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8202 break; 8203 } 8204 case T_SHORT: { 8205 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8206 break; 8207 } 8208 case T_INT: { 8209 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8210 break; 8211 } 8212 case T_LONG: { 8213 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8214 break; 8215 } 8216 default: assert(false, "%s", type2name(src1_elem_bt)); 8217 } 8218 %} 8219 ins_pipe( pipe_slow ); 8220 %} 8221 8222 // Extract 8223 8224 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 8225 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 8226 match(Set dst (ExtractI src idx)); 8227 match(Set dst (ExtractS src idx)); 8228 #ifdef _LP64 8229 match(Set dst (ExtractB src idx)); 8230 #endif 8231 format %{ "extractI $dst,$src,$idx\t!" %} 8232 ins_encode %{ 8233 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8234 8235 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8236 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8237 %} 8238 ins_pipe( pipe_slow ); 8239 %} 8240 8241 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 8242 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 8243 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 8244 match(Set dst (ExtractI src idx)); 8245 match(Set dst (ExtractS src idx)); 8246 #ifdef _LP64 8247 match(Set dst (ExtractB src idx)); 8248 #endif 8249 effect(TEMP vtmp); 8250 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 8251 ins_encode %{ 8252 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8253 8254 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8255 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8256 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 8257 %} 8258 ins_pipe( pipe_slow ); 8259 %} 8260 8261 #ifdef _LP64 8262 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 8263 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 8264 match(Set dst (ExtractL src idx)); 8265 format %{ "extractL $dst,$src,$idx\t!" %} 8266 ins_encode %{ 8267 assert(UseSSE >= 4, "required"); 8268 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8269 8270 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8271 %} 8272 ins_pipe( pipe_slow ); 8273 %} 8274 8275 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 8276 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8277 Matcher::vector_length(n->in(1)) == 8); // src 8278 match(Set dst (ExtractL src idx)); 8279 effect(TEMP vtmp); 8280 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 8281 ins_encode %{ 8282 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8283 8284 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8285 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 8286 %} 8287 ins_pipe( pipe_slow ); 8288 %} 8289 #endif 8290 8291 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8292 predicate(Matcher::vector_length(n->in(1)) <= 4); 8293 match(Set dst (ExtractF src idx)); 8294 effect(TEMP dst, TEMP vtmp); 8295 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8296 ins_encode %{ 8297 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8298 8299 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 8300 %} 8301 ins_pipe( pipe_slow ); 8302 %} 8303 8304 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8305 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 8306 Matcher::vector_length(n->in(1)/*src*/) == 16); 8307 match(Set dst (ExtractF src idx)); 8308 effect(TEMP vtmp); 8309 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8310 ins_encode %{ 8311 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8312 8313 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8314 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 8315 %} 8316 ins_pipe( pipe_slow ); 8317 %} 8318 8319 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 8320 predicate(Matcher::vector_length(n->in(1)) == 2); // src 8321 match(Set dst (ExtractD src idx)); 8322 format %{ "extractD $dst,$src,$idx\t!" %} 8323 ins_encode %{ 8324 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8325 8326 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8327 %} 8328 ins_pipe( pipe_slow ); 8329 %} 8330 8331 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 8332 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8333 Matcher::vector_length(n->in(1)) == 8); // src 8334 match(Set dst (ExtractD src idx)); 8335 effect(TEMP vtmp); 8336 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 8337 ins_encode %{ 8338 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8339 8340 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8341 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 8342 %} 8343 ins_pipe( pipe_slow ); 8344 %} 8345 8346 // --------------------------------- Vector Blend -------------------------------------- 8347 8348 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 8349 predicate(UseAVX == 0); 8350 match(Set dst (VectorBlend (Binary dst src) mask)); 8351 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 8352 effect(TEMP tmp); 8353 ins_encode %{ 8354 assert(UseSSE >= 4, "required"); 8355 8356 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 8357 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 8358 } 8359 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 8360 %} 8361 ins_pipe( pipe_slow ); 8362 %} 8363 8364 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8365 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8366 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8367 Matcher::vector_length_in_bytes(n) <= 32 && 8368 is_integral_type(Matcher::vector_element_basic_type(n))); 8369 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8370 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8371 ins_encode %{ 8372 int vlen_enc = vector_length_encoding(this); 8373 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8374 %} 8375 ins_pipe( pipe_slow ); 8376 %} 8377 8378 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8379 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8380 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8381 Matcher::vector_length_in_bytes(n) <= 32 && 8382 !is_integral_type(Matcher::vector_element_basic_type(n))); 8383 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8384 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8385 ins_encode %{ 8386 int vlen_enc = vector_length_encoding(this); 8387 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8388 %} 8389 ins_pipe( pipe_slow ); 8390 %} 8391 8392 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ 8393 predicate(UseAVX > 0 && EnableX86ECoreOpts && 8394 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8395 Matcher::vector_length_in_bytes(n) <= 32); 8396 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8397 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} 8398 effect(TEMP vtmp, TEMP dst); 8399 ins_encode %{ 8400 int vlen_enc = vector_length_encoding(this); 8401 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8402 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8403 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8404 %} 8405 ins_pipe( pipe_slow ); 8406 %} 8407 8408 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 8409 predicate(Matcher::vector_length_in_bytes(n) == 64 && 8410 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 8411 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8412 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8413 effect(TEMP ktmp); 8414 ins_encode %{ 8415 int vlen_enc = Assembler::AVX_512bit; 8416 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8417 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 8418 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8419 %} 8420 ins_pipe( pipe_slow ); 8421 %} 8422 8423 8424 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 8425 predicate(n->in(2)->bottom_type()->isa_vectmask() && 8426 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 8427 VM_Version::supports_avx512bw())); 8428 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8429 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8430 ins_encode %{ 8431 int vlen_enc = vector_length_encoding(this); 8432 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8433 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8434 %} 8435 ins_pipe( pipe_slow ); 8436 %} 8437 8438 // --------------------------------- ABS -------------------------------------- 8439 // a = |a| 8440 instruct vabsB_reg(vec dst, vec src) %{ 8441 match(Set dst (AbsVB src)); 8442 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 8443 ins_encode %{ 8444 uint vlen = Matcher::vector_length(this); 8445 if (vlen <= 16) { 8446 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8447 } else { 8448 int vlen_enc = vector_length_encoding(this); 8449 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8450 } 8451 %} 8452 ins_pipe( pipe_slow ); 8453 %} 8454 8455 instruct vabsS_reg(vec dst, vec src) %{ 8456 match(Set dst (AbsVS src)); 8457 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 8458 ins_encode %{ 8459 uint vlen = Matcher::vector_length(this); 8460 if (vlen <= 8) { 8461 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8462 } else { 8463 int vlen_enc = vector_length_encoding(this); 8464 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8465 } 8466 %} 8467 ins_pipe( pipe_slow ); 8468 %} 8469 8470 instruct vabsI_reg(vec dst, vec src) %{ 8471 match(Set dst (AbsVI src)); 8472 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 8473 ins_encode %{ 8474 uint vlen = Matcher::vector_length(this); 8475 if (vlen <= 4) { 8476 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8477 } else { 8478 int vlen_enc = vector_length_encoding(this); 8479 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8480 } 8481 %} 8482 ins_pipe( pipe_slow ); 8483 %} 8484 8485 instruct vabsL_reg(vec dst, vec src) %{ 8486 match(Set dst (AbsVL src)); 8487 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 8488 ins_encode %{ 8489 assert(UseAVX > 2, "required"); 8490 int vlen_enc = vector_length_encoding(this); 8491 if (!VM_Version::supports_avx512vl()) { 8492 vlen_enc = Assembler::AVX_512bit; 8493 } 8494 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8495 %} 8496 ins_pipe( pipe_slow ); 8497 %} 8498 8499 // --------------------------------- ABSNEG -------------------------------------- 8500 8501 instruct vabsnegF(vec dst, vec src) %{ 8502 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 8503 match(Set dst (AbsVF src)); 8504 match(Set dst (NegVF src)); 8505 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 8506 ins_cost(150); 8507 ins_encode %{ 8508 int opcode = this->ideal_Opcode(); 8509 int vlen = Matcher::vector_length(this); 8510 if (vlen == 2) { 8511 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8512 } else { 8513 assert(vlen == 8 || vlen == 16, "required"); 8514 int vlen_enc = vector_length_encoding(this); 8515 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8516 } 8517 %} 8518 ins_pipe( pipe_slow ); 8519 %} 8520 8521 instruct vabsneg4F(vec dst) %{ 8522 predicate(Matcher::vector_length(n) == 4); 8523 match(Set dst (AbsVF dst)); 8524 match(Set dst (NegVF dst)); 8525 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 8526 ins_cost(150); 8527 ins_encode %{ 8528 int opcode = this->ideal_Opcode(); 8529 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 8530 %} 8531 ins_pipe( pipe_slow ); 8532 %} 8533 8534 instruct vabsnegD(vec dst, vec src) %{ 8535 match(Set dst (AbsVD src)); 8536 match(Set dst (NegVD src)); 8537 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 8538 ins_encode %{ 8539 int opcode = this->ideal_Opcode(); 8540 uint vlen = Matcher::vector_length(this); 8541 if (vlen == 2) { 8542 assert(UseSSE >= 2, "required"); 8543 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8544 } else { 8545 int vlen_enc = vector_length_encoding(this); 8546 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8547 } 8548 %} 8549 ins_pipe( pipe_slow ); 8550 %} 8551 8552 //------------------------------------- VectorTest -------------------------------------------- 8553 8554 #ifdef _LP64 8555 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8556 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8557 match(Set cr (VectorTest src1 src2)); 8558 effect(TEMP vtmp); 8559 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8560 ins_encode %{ 8561 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8562 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8563 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8564 %} 8565 ins_pipe( pipe_slow ); 8566 %} 8567 8568 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8569 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8570 match(Set cr (VectorTest src1 src2)); 8571 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8572 ins_encode %{ 8573 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8574 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8575 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8576 %} 8577 ins_pipe( pipe_slow ); 8578 %} 8579 8580 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8581 predicate((Matcher::vector_length(n->in(1)) < 8 || 8582 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8583 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8584 match(Set cr (VectorTest src1 src2)); 8585 effect(TEMP tmp); 8586 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8587 ins_encode %{ 8588 uint masklen = Matcher::vector_length(this, $src1); 8589 __ kmovwl($tmp$$Register, $src1$$KRegister); 8590 __ andl($tmp$$Register, (1 << masklen) - 1); 8591 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8592 %} 8593 ins_pipe( pipe_slow ); 8594 %} 8595 8596 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8597 predicate((Matcher::vector_length(n->in(1)) < 8 || 8598 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8599 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8600 match(Set cr (VectorTest src1 src2)); 8601 effect(TEMP tmp); 8602 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8603 ins_encode %{ 8604 uint masklen = Matcher::vector_length(this, $src1); 8605 __ kmovwl($tmp$$Register, $src1$$KRegister); 8606 __ andl($tmp$$Register, (1 << masklen) - 1); 8607 %} 8608 ins_pipe( pipe_slow ); 8609 %} 8610 8611 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8612 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8613 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8614 match(Set cr (VectorTest src1 src2)); 8615 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8616 ins_encode %{ 8617 uint masklen = Matcher::vector_length(this, $src1); 8618 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8619 %} 8620 ins_pipe( pipe_slow ); 8621 %} 8622 #endif 8623 8624 //------------------------------------- LoadMask -------------------------------------------- 8625 8626 instruct loadMask(legVec dst, legVec src) %{ 8627 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8628 match(Set dst (VectorLoadMask src)); 8629 effect(TEMP dst); 8630 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8631 ins_encode %{ 8632 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8633 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8634 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8635 %} 8636 ins_pipe( pipe_slow ); 8637 %} 8638 8639 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8640 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8641 match(Set dst (VectorLoadMask src)); 8642 effect(TEMP xtmp); 8643 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8644 ins_encode %{ 8645 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8646 true, Assembler::AVX_512bit); 8647 %} 8648 ins_pipe( pipe_slow ); 8649 %} 8650 8651 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8652 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8653 match(Set dst (VectorLoadMask src)); 8654 effect(TEMP xtmp); 8655 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8656 ins_encode %{ 8657 int vlen_enc = vector_length_encoding(in(1)); 8658 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8659 false, vlen_enc); 8660 %} 8661 ins_pipe( pipe_slow ); 8662 %} 8663 8664 //------------------------------------- StoreMask -------------------------------------------- 8665 8666 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8667 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8668 match(Set dst (VectorStoreMask src size)); 8669 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8670 ins_encode %{ 8671 int vlen = Matcher::vector_length(this); 8672 if (vlen <= 16 && UseAVX <= 2) { 8673 assert(UseSSE >= 3, "required"); 8674 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8675 } else { 8676 assert(UseAVX > 0, "required"); 8677 int src_vlen_enc = vector_length_encoding(this, $src); 8678 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8679 } 8680 %} 8681 ins_pipe( pipe_slow ); 8682 %} 8683 8684 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8685 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8686 match(Set dst (VectorStoreMask src size)); 8687 effect(TEMP_DEF dst, TEMP xtmp); 8688 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8689 ins_encode %{ 8690 int vlen_enc = Assembler::AVX_128bit; 8691 int vlen = Matcher::vector_length(this); 8692 if (vlen <= 8) { 8693 assert(UseSSE >= 3, "required"); 8694 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8695 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8696 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8697 } else { 8698 assert(UseAVX > 0, "required"); 8699 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8700 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8701 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8702 } 8703 %} 8704 ins_pipe( pipe_slow ); 8705 %} 8706 8707 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8708 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8709 match(Set dst (VectorStoreMask src size)); 8710 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8711 effect(TEMP_DEF dst, TEMP xtmp); 8712 ins_encode %{ 8713 int vlen_enc = Assembler::AVX_128bit; 8714 int vlen = Matcher::vector_length(this); 8715 if (vlen <= 4) { 8716 assert(UseSSE >= 3, "required"); 8717 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8718 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8719 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8720 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8721 } else { 8722 assert(UseAVX > 0, "required"); 8723 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8724 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8725 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8726 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8727 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8728 } 8729 %} 8730 ins_pipe( pipe_slow ); 8731 %} 8732 8733 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8734 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8735 match(Set dst (VectorStoreMask src size)); 8736 effect(TEMP_DEF dst, TEMP xtmp); 8737 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8738 ins_encode %{ 8739 assert(UseSSE >= 3, "required"); 8740 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8741 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8742 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8743 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8744 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8745 %} 8746 ins_pipe( pipe_slow ); 8747 %} 8748 8749 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8750 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8751 match(Set dst (VectorStoreMask src size)); 8752 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8753 effect(TEMP_DEF dst, TEMP vtmp); 8754 ins_encode %{ 8755 int vlen_enc = Assembler::AVX_128bit; 8756 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8757 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8758 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8759 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8760 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8761 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8762 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8763 %} 8764 ins_pipe( pipe_slow ); 8765 %} 8766 8767 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8768 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8769 match(Set dst (VectorStoreMask src size)); 8770 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8771 ins_encode %{ 8772 int src_vlen_enc = vector_length_encoding(this, $src); 8773 int dst_vlen_enc = vector_length_encoding(this); 8774 if (!VM_Version::supports_avx512vl()) { 8775 src_vlen_enc = Assembler::AVX_512bit; 8776 } 8777 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8778 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8779 %} 8780 ins_pipe( pipe_slow ); 8781 %} 8782 8783 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8784 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8785 match(Set dst (VectorStoreMask src size)); 8786 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8787 ins_encode %{ 8788 int src_vlen_enc = vector_length_encoding(this, $src); 8789 int dst_vlen_enc = vector_length_encoding(this); 8790 if (!VM_Version::supports_avx512vl()) { 8791 src_vlen_enc = Assembler::AVX_512bit; 8792 } 8793 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8794 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8795 %} 8796 ins_pipe( pipe_slow ); 8797 %} 8798 8799 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8800 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8801 match(Set dst (VectorStoreMask mask size)); 8802 effect(TEMP_DEF dst); 8803 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8804 ins_encode %{ 8805 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8806 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8807 false, Assembler::AVX_512bit, noreg); 8808 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8809 %} 8810 ins_pipe( pipe_slow ); 8811 %} 8812 8813 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8814 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8815 match(Set dst (VectorStoreMask mask size)); 8816 effect(TEMP_DEF dst); 8817 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8818 ins_encode %{ 8819 int dst_vlen_enc = vector_length_encoding(this); 8820 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8821 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8822 %} 8823 ins_pipe( pipe_slow ); 8824 %} 8825 8826 instruct vmaskcast_evex(kReg dst) %{ 8827 match(Set dst (VectorMaskCast dst)); 8828 ins_cost(0); 8829 format %{ "vector_mask_cast $dst" %} 8830 ins_encode %{ 8831 // empty 8832 %} 8833 ins_pipe(empty); 8834 %} 8835 8836 instruct vmaskcast(vec dst) %{ 8837 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8838 match(Set dst (VectorMaskCast dst)); 8839 ins_cost(0); 8840 format %{ "vector_mask_cast $dst" %} 8841 ins_encode %{ 8842 // empty 8843 %} 8844 ins_pipe(empty); 8845 %} 8846 8847 instruct vmaskcast_avx(vec dst, vec src) %{ 8848 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8849 match(Set dst (VectorMaskCast src)); 8850 format %{ "vector_mask_cast $dst, $src" %} 8851 ins_encode %{ 8852 int vlen = Matcher::vector_length(this); 8853 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8854 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8855 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8856 %} 8857 ins_pipe(pipe_slow); 8858 %} 8859 8860 //-------------------------------- Load Iota Indices ---------------------------------- 8861 8862 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8863 match(Set dst (VectorLoadConst src)); 8864 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8865 ins_encode %{ 8866 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8867 BasicType bt = Matcher::vector_element_basic_type(this); 8868 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8869 %} 8870 ins_pipe( pipe_slow ); 8871 %} 8872 8873 #ifdef _LP64 8874 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8875 match(Set dst (PopulateIndex src1 src2)); 8876 effect(TEMP dst, TEMP vtmp); 8877 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8878 ins_encode %{ 8879 assert($src2$$constant == 1, "required"); 8880 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8881 int vlen_enc = vector_length_encoding(this); 8882 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8883 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8884 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8885 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8886 %} 8887 ins_pipe( pipe_slow ); 8888 %} 8889 8890 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8891 match(Set dst (PopulateIndex src1 src2)); 8892 effect(TEMP dst, TEMP vtmp); 8893 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8894 ins_encode %{ 8895 assert($src2$$constant == 1, "required"); 8896 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8897 int vlen_enc = vector_length_encoding(this); 8898 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8899 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8900 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8901 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8902 %} 8903 ins_pipe( pipe_slow ); 8904 %} 8905 #endif 8906 //-------------------------------- Rearrange ---------------------------------- 8907 8908 // LoadShuffle/Rearrange for Byte 8909 instruct rearrangeB(vec dst, vec shuffle) %{ 8910 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8911 Matcher::vector_length(n) < 32); 8912 match(Set dst (VectorRearrange dst shuffle)); 8913 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8914 ins_encode %{ 8915 assert(UseSSE >= 4, "required"); 8916 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8917 %} 8918 ins_pipe( pipe_slow ); 8919 %} 8920 8921 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8922 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8923 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8924 match(Set dst (VectorRearrange src shuffle)); 8925 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8926 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8927 ins_encode %{ 8928 assert(UseAVX >= 2, "required"); 8929 // Swap src into vtmp1 8930 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8931 // Shuffle swapped src to get entries from other 128 bit lane 8932 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8933 // Shuffle original src to get entries from self 128 bit lane 8934 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8935 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8936 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8937 // Perform the blend 8938 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8939 %} 8940 ins_pipe( pipe_slow ); 8941 %} 8942 8943 8944 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8945 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8946 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8947 match(Set dst (VectorRearrange src shuffle)); 8948 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8949 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8950 ins_encode %{ 8951 int vlen_enc = vector_length_encoding(this); 8952 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8953 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8954 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8955 %} 8956 ins_pipe( pipe_slow ); 8957 %} 8958 8959 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8960 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8961 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8962 match(Set dst (VectorRearrange src shuffle)); 8963 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8964 ins_encode %{ 8965 int vlen_enc = vector_length_encoding(this); 8966 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8967 %} 8968 ins_pipe( pipe_slow ); 8969 %} 8970 8971 // LoadShuffle/Rearrange for Short 8972 8973 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8974 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8975 !VM_Version::supports_avx512bw()); 8976 match(Set dst (VectorLoadShuffle src)); 8977 effect(TEMP dst, TEMP vtmp); 8978 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8979 ins_encode %{ 8980 // Create a byte shuffle mask from short shuffle mask 8981 // only byte shuffle instruction available on these platforms 8982 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8983 if (UseAVX == 0) { 8984 assert(vlen_in_bytes <= 16, "required"); 8985 // Multiply each shuffle by two to get byte index 8986 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 8987 __ psllw($vtmp$$XMMRegister, 1); 8988 8989 // Duplicate to create 2 copies of byte index 8990 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8991 __ psllw($dst$$XMMRegister, 8); 8992 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8993 8994 // Add one to get alternate byte index 8995 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8996 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8997 } else { 8998 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8999 int vlen_enc = vector_length_encoding(this); 9000 // Multiply each shuffle by two to get byte index 9001 __ vpsllw($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 9002 9003 // Duplicate to create 2 copies of byte index 9004 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 9005 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 9006 9007 // Add one to get alternate byte index 9008 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 9009 } 9010 %} 9011 ins_pipe( pipe_slow ); 9012 %} 9013 9014 instruct rearrangeS(vec dst, vec shuffle) %{ 9015 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 9016 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 9017 match(Set dst (VectorRearrange dst shuffle)); 9018 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 9019 ins_encode %{ 9020 assert(UseSSE >= 4, "required"); 9021 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 9022 %} 9023 ins_pipe( pipe_slow ); 9024 %} 9025 9026 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 9027 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 9028 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 9029 match(Set dst (VectorRearrange src shuffle)); 9030 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 9031 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 9032 ins_encode %{ 9033 assert(UseAVX >= 2, "required"); 9034 // Swap src into vtmp1 9035 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 9036 // Shuffle swapped src to get entries from other 128 bit lane 9037 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 9038 // Shuffle original src to get entries from self 128 bit lane 9039 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 9040 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 9041 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 9042 // Perform the blend 9043 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 9044 %} 9045 ins_pipe( pipe_slow ); 9046 %} 9047 9048 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 9049 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 9050 VM_Version::supports_avx512bw()); 9051 match(Set dst (VectorRearrange src shuffle)); 9052 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9053 ins_encode %{ 9054 int vlen_enc = vector_length_encoding(this); 9055 if (!VM_Version::supports_avx512vl()) { 9056 vlen_enc = Assembler::AVX_512bit; 9057 } 9058 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9059 %} 9060 ins_pipe( pipe_slow ); 9061 %} 9062 9063 // LoadShuffle/Rearrange for Integer and Float 9064 9065 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 9066 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9067 Matcher::vector_length(n) == 4 && UseAVX == 0); 9068 match(Set dst (VectorLoadShuffle src)); 9069 effect(TEMP dst, TEMP vtmp); 9070 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 9071 ins_encode %{ 9072 assert(UseSSE >= 4, "required"); 9073 9074 // Create a byte shuffle mask from int shuffle mask 9075 // only byte shuffle instruction available on these platforms 9076 9077 // Duplicate and multiply each shuffle by 4 9078 __ movdqu($vtmp$$XMMRegister, $src$$XMMRegister); 9079 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 9080 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 9081 __ psllw($vtmp$$XMMRegister, 2); 9082 9083 // Duplicate again to create 4 copies of byte index 9084 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 9085 __ psllw($dst$$XMMRegister, 8); 9086 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 9087 9088 // Add 3,2,1,0 to get alternate byte index 9089 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 9090 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 9091 %} 9092 ins_pipe( pipe_slow ); 9093 %} 9094 9095 instruct rearrangeI(vec dst, vec shuffle) %{ 9096 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9097 UseAVX == 0); 9098 match(Set dst (VectorRearrange dst shuffle)); 9099 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 9100 ins_encode %{ 9101 assert(UseSSE >= 4, "required"); 9102 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 9103 %} 9104 ins_pipe( pipe_slow ); 9105 %} 9106 9107 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 9108 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 9109 UseAVX > 0); 9110 match(Set dst (VectorRearrange src shuffle)); 9111 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9112 ins_encode %{ 9113 int vlen_enc = vector_length_encoding(this); 9114 BasicType bt = Matcher::vector_element_basic_type(this); 9115 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9116 %} 9117 ins_pipe( pipe_slow ); 9118 %} 9119 9120 // LoadShuffle/Rearrange for Long and Double 9121 9122 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 9123 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9124 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 9125 match(Set dst (VectorLoadShuffle src)); 9126 effect(TEMP dst, TEMP vtmp); 9127 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 9128 ins_encode %{ 9129 assert(UseAVX >= 2, "required"); 9130 9131 int vlen_enc = vector_length_encoding(this); 9132 // Create a double word shuffle mask from long shuffle mask 9133 // only double word shuffle instruction available on these platforms 9134 9135 // Multiply each shuffle by two to get double word index 9136 __ vpsllq($vtmp$$XMMRegister, $src$$XMMRegister, 1, vlen_enc); 9137 9138 // Duplicate each double word shuffle 9139 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 9140 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 9141 9142 // Add one to get alternate double word index 9143 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 9144 %} 9145 ins_pipe( pipe_slow ); 9146 %} 9147 9148 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 9149 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9150 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 9151 match(Set dst (VectorRearrange src shuffle)); 9152 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9153 ins_encode %{ 9154 assert(UseAVX >= 2, "required"); 9155 9156 int vlen_enc = vector_length_encoding(this); 9157 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9158 %} 9159 ins_pipe( pipe_slow ); 9160 %} 9161 9162 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 9163 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9164 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 9165 match(Set dst (VectorRearrange src shuffle)); 9166 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9167 ins_encode %{ 9168 assert(UseAVX > 2, "required"); 9169 9170 int vlen_enc = vector_length_encoding(this); 9171 if (vlen_enc == Assembler::AVX_128bit) { 9172 vlen_enc = Assembler::AVX_256bit; 9173 } 9174 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9175 %} 9176 ins_pipe( pipe_slow ); 9177 %} 9178 9179 // --------------------------------- FMA -------------------------------------- 9180 // a * b + c 9181 9182 instruct vfmaF_reg(vec a, vec b, vec c) %{ 9183 match(Set c (FmaVF c (Binary a b))); 9184 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9185 ins_cost(150); 9186 ins_encode %{ 9187 assert(UseFMA, "not enabled"); 9188 int vlen_enc = vector_length_encoding(this); 9189 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9190 %} 9191 ins_pipe( pipe_slow ); 9192 %} 9193 9194 instruct vfmaF_mem(vec a, memory b, vec c) %{ 9195 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9196 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9197 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9198 ins_cost(150); 9199 ins_encode %{ 9200 assert(UseFMA, "not enabled"); 9201 int vlen_enc = vector_length_encoding(this); 9202 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9203 %} 9204 ins_pipe( pipe_slow ); 9205 %} 9206 9207 instruct vfmaD_reg(vec a, vec b, vec c) %{ 9208 match(Set c (FmaVD c (Binary a b))); 9209 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9210 ins_cost(150); 9211 ins_encode %{ 9212 assert(UseFMA, "not enabled"); 9213 int vlen_enc = vector_length_encoding(this); 9214 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9215 %} 9216 ins_pipe( pipe_slow ); 9217 %} 9218 9219 instruct vfmaD_mem(vec a, memory b, vec c) %{ 9220 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9221 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9222 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9223 ins_cost(150); 9224 ins_encode %{ 9225 assert(UseFMA, "not enabled"); 9226 int vlen_enc = vector_length_encoding(this); 9227 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9228 %} 9229 ins_pipe( pipe_slow ); 9230 %} 9231 9232 // --------------------------------- Vector Multiply Add -------------------------------------- 9233 9234 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 9235 predicate(UseAVX == 0); 9236 match(Set dst (MulAddVS2VI dst src1)); 9237 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 9238 ins_encode %{ 9239 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9240 %} 9241 ins_pipe( pipe_slow ); 9242 %} 9243 9244 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 9245 predicate(UseAVX > 0); 9246 match(Set dst (MulAddVS2VI src1 src2)); 9247 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 9248 ins_encode %{ 9249 int vlen_enc = vector_length_encoding(this); 9250 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9251 %} 9252 ins_pipe( pipe_slow ); 9253 %} 9254 9255 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9256 9257 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 9258 predicate(VM_Version::supports_avx512_vnni()); 9259 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9260 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 9261 ins_encode %{ 9262 assert(UseAVX > 2, "required"); 9263 int vlen_enc = vector_length_encoding(this); 9264 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9265 %} 9266 ins_pipe( pipe_slow ); 9267 ins_cost(10); 9268 %} 9269 9270 // --------------------------------- PopCount -------------------------------------- 9271 9272 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 9273 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9274 match(Set dst (PopCountVI src)); 9275 match(Set dst (PopCountVL src)); 9276 format %{ "vector_popcount_integral $dst, $src" %} 9277 ins_encode %{ 9278 int opcode = this->ideal_Opcode(); 9279 int vlen_enc = vector_length_encoding(this, $src); 9280 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9281 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 9282 %} 9283 ins_pipe( pipe_slow ); 9284 %} 9285 9286 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9287 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9288 match(Set dst (PopCountVI src mask)); 9289 match(Set dst (PopCountVL src mask)); 9290 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 9291 ins_encode %{ 9292 int vlen_enc = vector_length_encoding(this, $src); 9293 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9294 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9295 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 9296 %} 9297 ins_pipe( pipe_slow ); 9298 %} 9299 9300 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 9301 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9302 match(Set dst (PopCountVI src)); 9303 match(Set dst (PopCountVL src)); 9304 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9305 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 9306 ins_encode %{ 9307 int opcode = this->ideal_Opcode(); 9308 int vlen_enc = vector_length_encoding(this, $src); 9309 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9310 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9311 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 9312 %} 9313 ins_pipe( pipe_slow ); 9314 %} 9315 9316 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 9317 9318 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 9319 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9320 Matcher::vector_length_in_bytes(n->in(1)))); 9321 match(Set dst (CountTrailingZerosV src)); 9322 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 9323 ins_cost(400); 9324 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 9325 ins_encode %{ 9326 int vlen_enc = vector_length_encoding(this, $src); 9327 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9328 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9329 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9330 %} 9331 ins_pipe( pipe_slow ); 9332 %} 9333 9334 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9335 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9336 VM_Version::supports_avx512cd() && 9337 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9338 match(Set dst (CountTrailingZerosV src)); 9339 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9340 ins_cost(400); 9341 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 9342 ins_encode %{ 9343 int vlen_enc = vector_length_encoding(this, $src); 9344 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9345 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9346 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9347 %} 9348 ins_pipe( pipe_slow ); 9349 %} 9350 9351 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 9352 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9353 match(Set dst (CountTrailingZerosV src)); 9354 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 9355 ins_cost(400); 9356 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 9357 ins_encode %{ 9358 int vlen_enc = vector_length_encoding(this, $src); 9359 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9360 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9361 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 9362 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 9363 %} 9364 ins_pipe( pipe_slow ); 9365 %} 9366 9367 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9368 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9369 match(Set dst (CountTrailingZerosV src)); 9370 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9371 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9372 ins_encode %{ 9373 int vlen_enc = vector_length_encoding(this, $src); 9374 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9375 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9376 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9377 %} 9378 ins_pipe( pipe_slow ); 9379 %} 9380 9381 9382 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 9383 9384 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 9385 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 9386 effect(TEMP dst); 9387 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9388 ins_encode %{ 9389 int vector_len = vector_length_encoding(this); 9390 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 9391 %} 9392 ins_pipe( pipe_slow ); 9393 %} 9394 9395 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 9396 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 9397 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 9398 effect(TEMP dst); 9399 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9400 ins_encode %{ 9401 int vector_len = vector_length_encoding(this); 9402 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 9403 %} 9404 ins_pipe( pipe_slow ); 9405 %} 9406 9407 // --------------------------------- Rotation Operations ---------------------------------- 9408 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 9409 match(Set dst (RotateLeftV src shift)); 9410 match(Set dst (RotateRightV src shift)); 9411 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 9412 ins_encode %{ 9413 int opcode = this->ideal_Opcode(); 9414 int vector_len = vector_length_encoding(this); 9415 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9416 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 9417 %} 9418 ins_pipe( pipe_slow ); 9419 %} 9420 9421 instruct vprorate(vec dst, vec src, vec shift) %{ 9422 match(Set dst (RotateLeftV src shift)); 9423 match(Set dst (RotateRightV src shift)); 9424 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 9425 ins_encode %{ 9426 int opcode = this->ideal_Opcode(); 9427 int vector_len = vector_length_encoding(this); 9428 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9429 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9430 %} 9431 ins_pipe( pipe_slow ); 9432 %} 9433 9434 // ---------------------------------- Masked Operations ------------------------------------ 9435 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 9436 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 9437 match(Set dst (LoadVectorMasked mem mask)); 9438 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9439 ins_encode %{ 9440 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9441 int vlen_enc = vector_length_encoding(this); 9442 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 9443 %} 9444 ins_pipe( pipe_slow ); 9445 %} 9446 9447 9448 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 9449 predicate(n->in(3)->bottom_type()->isa_vectmask()); 9450 match(Set dst (LoadVectorMasked mem mask)); 9451 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9452 ins_encode %{ 9453 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9454 int vector_len = vector_length_encoding(this); 9455 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 9456 %} 9457 ins_pipe( pipe_slow ); 9458 %} 9459 9460 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 9461 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9462 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9463 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9464 ins_encode %{ 9465 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9466 int vlen_enc = vector_length_encoding(src_node); 9467 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9468 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 9469 %} 9470 ins_pipe( pipe_slow ); 9471 %} 9472 9473 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 9474 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9475 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9476 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9477 ins_encode %{ 9478 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9479 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9480 int vlen_enc = vector_length_encoding(src_node); 9481 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 9482 %} 9483 ins_pipe( pipe_slow ); 9484 %} 9485 9486 #ifdef _LP64 9487 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{ 9488 match(Set addr (VerifyVectorAlignment addr mask)); 9489 effect(KILL cr); 9490 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %} 9491 ins_encode %{ 9492 Label Lskip; 9493 // check if masked bits of addr are zero 9494 __ testq($addr$$Register, $mask$$constant); 9495 __ jccb(Assembler::equal, Lskip); 9496 __ stop("verify_vector_alignment found a misaligned vector memory access"); 9497 __ bind(Lskip); 9498 %} 9499 ins_pipe(pipe_slow); 9500 %} 9501 9502 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9503 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9504 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9505 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9506 ins_encode %{ 9507 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9508 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9509 9510 Label DONE; 9511 int vlen_enc = vector_length_encoding(this, $src1); 9512 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9513 9514 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9515 __ mov64($dst$$Register, -1L); 9516 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9517 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9518 __ jccb(Assembler::carrySet, DONE); 9519 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9520 __ notq($dst$$Register); 9521 __ tzcntq($dst$$Register, $dst$$Register); 9522 __ bind(DONE); 9523 %} 9524 ins_pipe( pipe_slow ); 9525 %} 9526 9527 9528 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9529 match(Set dst (VectorMaskGen len)); 9530 effect(TEMP temp, KILL cr); 9531 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9532 ins_encode %{ 9533 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9534 %} 9535 ins_pipe( pipe_slow ); 9536 %} 9537 9538 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9539 match(Set dst (VectorMaskGen len)); 9540 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9541 effect(TEMP temp); 9542 ins_encode %{ 9543 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9544 __ kmovql($dst$$KRegister, $temp$$Register); 9545 %} 9546 ins_pipe( pipe_slow ); 9547 %} 9548 9549 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9550 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9551 match(Set dst (VectorMaskToLong mask)); 9552 effect(TEMP dst, KILL cr); 9553 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9554 ins_encode %{ 9555 int opcode = this->ideal_Opcode(); 9556 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9557 int mask_len = Matcher::vector_length(this, $mask); 9558 int mask_size = mask_len * type2aelembytes(mbt); 9559 int vlen_enc = vector_length_encoding(this, $mask); 9560 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9561 $dst$$Register, mask_len, mask_size, vlen_enc); 9562 %} 9563 ins_pipe( pipe_slow ); 9564 %} 9565 9566 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9567 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9568 match(Set dst (VectorMaskToLong mask)); 9569 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9570 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9571 ins_encode %{ 9572 int opcode = this->ideal_Opcode(); 9573 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9574 int mask_len = Matcher::vector_length(this, $mask); 9575 int vlen_enc = vector_length_encoding(this, $mask); 9576 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9577 $dst$$Register, mask_len, mbt, vlen_enc); 9578 %} 9579 ins_pipe( pipe_slow ); 9580 %} 9581 9582 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9583 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9584 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9585 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9586 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9587 ins_encode %{ 9588 int opcode = this->ideal_Opcode(); 9589 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9590 int mask_len = Matcher::vector_length(this, $mask); 9591 int vlen_enc = vector_length_encoding(this, $mask); 9592 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9593 $dst$$Register, mask_len, mbt, vlen_enc); 9594 %} 9595 ins_pipe( pipe_slow ); 9596 %} 9597 9598 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9599 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9600 match(Set dst (VectorMaskTrueCount mask)); 9601 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9602 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9603 ins_encode %{ 9604 int opcode = this->ideal_Opcode(); 9605 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9606 int mask_len = Matcher::vector_length(this, $mask); 9607 int mask_size = mask_len * type2aelembytes(mbt); 9608 int vlen_enc = vector_length_encoding(this, $mask); 9609 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9610 $tmp$$Register, mask_len, mask_size, vlen_enc); 9611 %} 9612 ins_pipe( pipe_slow ); 9613 %} 9614 9615 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9616 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9617 match(Set dst (VectorMaskTrueCount mask)); 9618 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9619 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9620 ins_encode %{ 9621 int opcode = this->ideal_Opcode(); 9622 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9623 int mask_len = Matcher::vector_length(this, $mask); 9624 int vlen_enc = vector_length_encoding(this, $mask); 9625 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9626 $tmp$$Register, mask_len, mbt, vlen_enc); 9627 %} 9628 ins_pipe( pipe_slow ); 9629 %} 9630 9631 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9632 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9633 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9634 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9635 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9636 ins_encode %{ 9637 int opcode = this->ideal_Opcode(); 9638 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9639 int mask_len = Matcher::vector_length(this, $mask); 9640 int vlen_enc = vector_length_encoding(this, $mask); 9641 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9642 $tmp$$Register, mask_len, mbt, vlen_enc); 9643 %} 9644 ins_pipe( pipe_slow ); 9645 %} 9646 9647 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9648 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9649 match(Set dst (VectorMaskFirstTrue mask)); 9650 match(Set dst (VectorMaskLastTrue mask)); 9651 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9652 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9653 ins_encode %{ 9654 int opcode = this->ideal_Opcode(); 9655 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9656 int mask_len = Matcher::vector_length(this, $mask); 9657 int mask_size = mask_len * type2aelembytes(mbt); 9658 int vlen_enc = vector_length_encoding(this, $mask); 9659 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9660 $tmp$$Register, mask_len, mask_size, vlen_enc); 9661 %} 9662 ins_pipe( pipe_slow ); 9663 %} 9664 9665 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9666 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9667 match(Set dst (VectorMaskFirstTrue mask)); 9668 match(Set dst (VectorMaskLastTrue mask)); 9669 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9670 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9671 ins_encode %{ 9672 int opcode = this->ideal_Opcode(); 9673 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9674 int mask_len = Matcher::vector_length(this, $mask); 9675 int vlen_enc = vector_length_encoding(this, $mask); 9676 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9677 $tmp$$Register, mask_len, mbt, vlen_enc); 9678 %} 9679 ins_pipe( pipe_slow ); 9680 %} 9681 9682 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9683 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9684 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9685 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9686 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9687 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9688 ins_encode %{ 9689 int opcode = this->ideal_Opcode(); 9690 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9691 int mask_len = Matcher::vector_length(this, $mask); 9692 int vlen_enc = vector_length_encoding(this, $mask); 9693 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9694 $tmp$$Register, mask_len, mbt, vlen_enc); 9695 %} 9696 ins_pipe( pipe_slow ); 9697 %} 9698 9699 // --------------------------------- Compress/Expand Operations --------------------------- 9700 #ifdef _LP64 9701 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{ 9702 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 9703 match(Set dst (CompressV src mask)); 9704 match(Set dst (ExpandV src mask)); 9705 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr); 9706 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %} 9707 ins_encode %{ 9708 int opcode = this->ideal_Opcode(); 9709 int vlen_enc = vector_length_encoding(this); 9710 BasicType bt = Matcher::vector_element_basic_type(this); 9711 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register, 9712 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc); 9713 %} 9714 ins_pipe( pipe_slow ); 9715 %} 9716 #endif 9717 9718 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9719 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 9720 match(Set dst (CompressV src mask)); 9721 match(Set dst (ExpandV src mask)); 9722 format %{ "vector_compress_expand $dst, $src, $mask" %} 9723 ins_encode %{ 9724 int opcode = this->ideal_Opcode(); 9725 int vector_len = vector_length_encoding(this); 9726 BasicType bt = Matcher::vector_element_basic_type(this); 9727 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9728 %} 9729 ins_pipe( pipe_slow ); 9730 %} 9731 9732 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9733 match(Set dst (CompressM mask)); 9734 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9735 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9736 ins_encode %{ 9737 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9738 int mask_len = Matcher::vector_length(this); 9739 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9740 %} 9741 ins_pipe( pipe_slow ); 9742 %} 9743 9744 #endif // _LP64 9745 9746 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9747 9748 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9749 predicate(!VM_Version::supports_gfni()); 9750 match(Set dst (ReverseV src)); 9751 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9752 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9753 ins_encode %{ 9754 int vec_enc = vector_length_encoding(this); 9755 BasicType bt = Matcher::vector_element_basic_type(this); 9756 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9757 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9758 %} 9759 ins_pipe( pipe_slow ); 9760 %} 9761 9762 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9763 predicate(VM_Version::supports_gfni()); 9764 match(Set dst (ReverseV src)); 9765 effect(TEMP dst, TEMP xtmp); 9766 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9767 ins_encode %{ 9768 int vec_enc = vector_length_encoding(this); 9769 BasicType bt = Matcher::vector_element_basic_type(this); 9770 InternalAddress addr = $constantaddress(jlong(0x8040201008040201)); 9771 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9772 $xtmp$$XMMRegister); 9773 %} 9774 ins_pipe( pipe_slow ); 9775 %} 9776 9777 instruct vreverse_byte_reg(vec dst, vec src) %{ 9778 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9779 match(Set dst (ReverseBytesV src)); 9780 effect(TEMP dst); 9781 format %{ "vector_reverse_byte $dst, $src" %} 9782 ins_encode %{ 9783 int vec_enc = vector_length_encoding(this); 9784 BasicType bt = Matcher::vector_element_basic_type(this); 9785 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9786 %} 9787 ins_pipe( pipe_slow ); 9788 %} 9789 9790 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9791 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9792 match(Set dst (ReverseBytesV src)); 9793 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9794 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9795 ins_encode %{ 9796 int vec_enc = vector_length_encoding(this); 9797 BasicType bt = Matcher::vector_element_basic_type(this); 9798 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9799 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9800 %} 9801 ins_pipe( pipe_slow ); 9802 %} 9803 9804 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9805 9806 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9807 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9808 Matcher::vector_length_in_bytes(n->in(1)))); 9809 match(Set dst (CountLeadingZerosV src)); 9810 format %{ "vector_count_leading_zeros $dst, $src" %} 9811 ins_encode %{ 9812 int vlen_enc = vector_length_encoding(this, $src); 9813 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9814 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9815 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9816 %} 9817 ins_pipe( pipe_slow ); 9818 %} 9819 9820 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9821 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9822 Matcher::vector_length_in_bytes(n->in(1)))); 9823 match(Set dst (CountLeadingZerosV src mask)); 9824 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9825 ins_encode %{ 9826 int vlen_enc = vector_length_encoding(this, $src); 9827 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9828 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9829 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9830 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9831 %} 9832 ins_pipe( pipe_slow ); 9833 %} 9834 9835 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9836 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9837 VM_Version::supports_avx512cd() && 9838 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9839 match(Set dst (CountLeadingZerosV src)); 9840 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9841 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9842 ins_encode %{ 9843 int vlen_enc = vector_length_encoding(this, $src); 9844 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9845 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9846 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9847 %} 9848 ins_pipe( pipe_slow ); 9849 %} 9850 9851 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9852 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9853 match(Set dst (CountLeadingZerosV src)); 9854 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9855 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9856 ins_encode %{ 9857 int vlen_enc = vector_length_encoding(this, $src); 9858 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9859 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9860 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9861 $rtmp$$Register, true, vlen_enc); 9862 %} 9863 ins_pipe( pipe_slow ); 9864 %} 9865 9866 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9867 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9868 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9869 match(Set dst (CountLeadingZerosV src)); 9870 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9871 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9872 ins_encode %{ 9873 int vlen_enc = vector_length_encoding(this, $src); 9874 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9875 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9876 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9877 %} 9878 ins_pipe( pipe_slow ); 9879 %} 9880 9881 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9882 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9883 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9884 match(Set dst (CountLeadingZerosV src)); 9885 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9886 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9887 ins_encode %{ 9888 int vlen_enc = vector_length_encoding(this, $src); 9889 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9890 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9891 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9892 %} 9893 ins_pipe( pipe_slow ); 9894 %} 9895 9896 // ---------------------------------- Vector Masked Operations ------------------------------------ 9897 9898 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9899 match(Set dst (AddVB (Binary dst src2) mask)); 9900 match(Set dst (AddVS (Binary dst src2) mask)); 9901 match(Set dst (AddVI (Binary dst src2) mask)); 9902 match(Set dst (AddVL (Binary dst src2) mask)); 9903 match(Set dst (AddVF (Binary dst src2) mask)); 9904 match(Set dst (AddVD (Binary dst src2) mask)); 9905 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9906 ins_encode %{ 9907 int vlen_enc = vector_length_encoding(this); 9908 BasicType bt = Matcher::vector_element_basic_type(this); 9909 int opc = this->ideal_Opcode(); 9910 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9911 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9912 %} 9913 ins_pipe( pipe_slow ); 9914 %} 9915 9916 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9917 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9918 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9919 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9920 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9921 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9922 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9923 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9924 ins_encode %{ 9925 int vlen_enc = vector_length_encoding(this); 9926 BasicType bt = Matcher::vector_element_basic_type(this); 9927 int opc = this->ideal_Opcode(); 9928 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9929 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9930 %} 9931 ins_pipe( pipe_slow ); 9932 %} 9933 9934 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9935 match(Set dst (XorV (Binary dst src2) mask)); 9936 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9937 ins_encode %{ 9938 int vlen_enc = vector_length_encoding(this); 9939 BasicType bt = Matcher::vector_element_basic_type(this); 9940 int opc = this->ideal_Opcode(); 9941 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9942 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9943 %} 9944 ins_pipe( pipe_slow ); 9945 %} 9946 9947 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9948 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9949 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9950 ins_encode %{ 9951 int vlen_enc = vector_length_encoding(this); 9952 BasicType bt = Matcher::vector_element_basic_type(this); 9953 int opc = this->ideal_Opcode(); 9954 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9955 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9956 %} 9957 ins_pipe( pipe_slow ); 9958 %} 9959 9960 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9961 match(Set dst (OrV (Binary dst src2) mask)); 9962 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9963 ins_encode %{ 9964 int vlen_enc = vector_length_encoding(this); 9965 BasicType bt = Matcher::vector_element_basic_type(this); 9966 int opc = this->ideal_Opcode(); 9967 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9968 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9969 %} 9970 ins_pipe( pipe_slow ); 9971 %} 9972 9973 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9974 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9975 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9976 ins_encode %{ 9977 int vlen_enc = vector_length_encoding(this); 9978 BasicType bt = Matcher::vector_element_basic_type(this); 9979 int opc = this->ideal_Opcode(); 9980 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9981 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9982 %} 9983 ins_pipe( pipe_slow ); 9984 %} 9985 9986 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9987 match(Set dst (AndV (Binary dst src2) mask)); 9988 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9989 ins_encode %{ 9990 int vlen_enc = vector_length_encoding(this); 9991 BasicType bt = Matcher::vector_element_basic_type(this); 9992 int opc = this->ideal_Opcode(); 9993 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9994 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9995 %} 9996 ins_pipe( pipe_slow ); 9997 %} 9998 9999 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 10000 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 10001 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 10002 ins_encode %{ 10003 int vlen_enc = vector_length_encoding(this); 10004 BasicType bt = Matcher::vector_element_basic_type(this); 10005 int opc = this->ideal_Opcode(); 10006 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10007 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10008 %} 10009 ins_pipe( pipe_slow ); 10010 %} 10011 10012 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 10013 match(Set dst (SubVB (Binary dst src2) mask)); 10014 match(Set dst (SubVS (Binary dst src2) mask)); 10015 match(Set dst (SubVI (Binary dst src2) mask)); 10016 match(Set dst (SubVL (Binary dst src2) mask)); 10017 match(Set dst (SubVF (Binary dst src2) mask)); 10018 match(Set dst (SubVD (Binary dst src2) mask)); 10019 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 10020 ins_encode %{ 10021 int vlen_enc = vector_length_encoding(this); 10022 BasicType bt = Matcher::vector_element_basic_type(this); 10023 int opc = this->ideal_Opcode(); 10024 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10025 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10026 %} 10027 ins_pipe( pipe_slow ); 10028 %} 10029 10030 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 10031 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 10032 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 10033 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 10034 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 10035 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 10036 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 10037 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 10038 ins_encode %{ 10039 int vlen_enc = vector_length_encoding(this); 10040 BasicType bt = Matcher::vector_element_basic_type(this); 10041 int opc = this->ideal_Opcode(); 10042 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10043 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10044 %} 10045 ins_pipe( pipe_slow ); 10046 %} 10047 10048 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 10049 match(Set dst (MulVS (Binary dst src2) mask)); 10050 match(Set dst (MulVI (Binary dst src2) mask)); 10051 match(Set dst (MulVL (Binary dst src2) mask)); 10052 match(Set dst (MulVF (Binary dst src2) mask)); 10053 match(Set dst (MulVD (Binary dst src2) mask)); 10054 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 10055 ins_encode %{ 10056 int vlen_enc = vector_length_encoding(this); 10057 BasicType bt = Matcher::vector_element_basic_type(this); 10058 int opc = this->ideal_Opcode(); 10059 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10060 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10061 %} 10062 ins_pipe( pipe_slow ); 10063 %} 10064 10065 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 10066 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 10067 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 10068 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 10069 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 10070 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 10071 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 10072 ins_encode %{ 10073 int vlen_enc = vector_length_encoding(this); 10074 BasicType bt = Matcher::vector_element_basic_type(this); 10075 int opc = this->ideal_Opcode(); 10076 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10077 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10078 %} 10079 ins_pipe( pipe_slow ); 10080 %} 10081 10082 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 10083 match(Set dst (SqrtVF dst mask)); 10084 match(Set dst (SqrtVD dst mask)); 10085 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 10086 ins_encode %{ 10087 int vlen_enc = vector_length_encoding(this); 10088 BasicType bt = Matcher::vector_element_basic_type(this); 10089 int opc = this->ideal_Opcode(); 10090 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10091 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10092 %} 10093 ins_pipe( pipe_slow ); 10094 %} 10095 10096 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 10097 match(Set dst (DivVF (Binary dst src2) mask)); 10098 match(Set dst (DivVD (Binary dst src2) mask)); 10099 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 10100 ins_encode %{ 10101 int vlen_enc = vector_length_encoding(this); 10102 BasicType bt = Matcher::vector_element_basic_type(this); 10103 int opc = this->ideal_Opcode(); 10104 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10105 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10106 %} 10107 ins_pipe( pipe_slow ); 10108 %} 10109 10110 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 10111 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 10112 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 10113 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 10114 ins_encode %{ 10115 int vlen_enc = vector_length_encoding(this); 10116 BasicType bt = Matcher::vector_element_basic_type(this); 10117 int opc = this->ideal_Opcode(); 10118 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10119 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10120 %} 10121 ins_pipe( pipe_slow ); 10122 %} 10123 10124 10125 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10126 match(Set dst (RotateLeftV (Binary dst shift) mask)); 10127 match(Set dst (RotateRightV (Binary dst shift) mask)); 10128 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 10129 ins_encode %{ 10130 int vlen_enc = vector_length_encoding(this); 10131 BasicType bt = Matcher::vector_element_basic_type(this); 10132 int opc = this->ideal_Opcode(); 10133 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10134 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10135 %} 10136 ins_pipe( pipe_slow ); 10137 %} 10138 10139 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 10140 match(Set dst (RotateLeftV (Binary dst src2) mask)); 10141 match(Set dst (RotateRightV (Binary dst src2) mask)); 10142 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 10143 ins_encode %{ 10144 int vlen_enc = vector_length_encoding(this); 10145 BasicType bt = Matcher::vector_element_basic_type(this); 10146 int opc = this->ideal_Opcode(); 10147 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10148 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10149 %} 10150 ins_pipe( pipe_slow ); 10151 %} 10152 10153 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10154 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 10155 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 10156 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 10157 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 10158 ins_encode %{ 10159 int vlen_enc = vector_length_encoding(this); 10160 BasicType bt = Matcher::vector_element_basic_type(this); 10161 int opc = this->ideal_Opcode(); 10162 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10163 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10164 %} 10165 ins_pipe( pipe_slow ); 10166 %} 10167 10168 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10169 predicate(!n->as_ShiftV()->is_var_shift()); 10170 match(Set dst (LShiftVS (Binary dst src2) mask)); 10171 match(Set dst (LShiftVI (Binary dst src2) mask)); 10172 match(Set dst (LShiftVL (Binary dst src2) mask)); 10173 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10174 ins_encode %{ 10175 int vlen_enc = vector_length_encoding(this); 10176 BasicType bt = Matcher::vector_element_basic_type(this); 10177 int opc = this->ideal_Opcode(); 10178 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10179 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10180 %} 10181 ins_pipe( pipe_slow ); 10182 %} 10183 10184 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10185 predicate(n->as_ShiftV()->is_var_shift()); 10186 match(Set dst (LShiftVS (Binary dst src2) mask)); 10187 match(Set dst (LShiftVI (Binary dst src2) mask)); 10188 match(Set dst (LShiftVL (Binary dst src2) mask)); 10189 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10190 ins_encode %{ 10191 int vlen_enc = vector_length_encoding(this); 10192 BasicType bt = Matcher::vector_element_basic_type(this); 10193 int opc = this->ideal_Opcode(); 10194 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10195 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10196 %} 10197 ins_pipe( pipe_slow ); 10198 %} 10199 10200 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10201 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 10202 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 10203 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 10204 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 10205 ins_encode %{ 10206 int vlen_enc = vector_length_encoding(this); 10207 BasicType bt = Matcher::vector_element_basic_type(this); 10208 int opc = this->ideal_Opcode(); 10209 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10210 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10211 %} 10212 ins_pipe( pipe_slow ); 10213 %} 10214 10215 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10216 predicate(!n->as_ShiftV()->is_var_shift()); 10217 match(Set dst (RShiftVS (Binary dst src2) mask)); 10218 match(Set dst (RShiftVI (Binary dst src2) mask)); 10219 match(Set dst (RShiftVL (Binary dst src2) mask)); 10220 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10221 ins_encode %{ 10222 int vlen_enc = vector_length_encoding(this); 10223 BasicType bt = Matcher::vector_element_basic_type(this); 10224 int opc = this->ideal_Opcode(); 10225 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10226 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10227 %} 10228 ins_pipe( pipe_slow ); 10229 %} 10230 10231 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10232 predicate(n->as_ShiftV()->is_var_shift()); 10233 match(Set dst (RShiftVS (Binary dst src2) mask)); 10234 match(Set dst (RShiftVI (Binary dst src2) mask)); 10235 match(Set dst (RShiftVL (Binary dst src2) mask)); 10236 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10237 ins_encode %{ 10238 int vlen_enc = vector_length_encoding(this); 10239 BasicType bt = Matcher::vector_element_basic_type(this); 10240 int opc = this->ideal_Opcode(); 10241 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10242 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10243 %} 10244 ins_pipe( pipe_slow ); 10245 %} 10246 10247 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10248 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 10249 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 10250 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 10251 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 10252 ins_encode %{ 10253 int vlen_enc = vector_length_encoding(this); 10254 BasicType bt = Matcher::vector_element_basic_type(this); 10255 int opc = this->ideal_Opcode(); 10256 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10257 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10258 %} 10259 ins_pipe( pipe_slow ); 10260 %} 10261 10262 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10263 predicate(!n->as_ShiftV()->is_var_shift()); 10264 match(Set dst (URShiftVS (Binary dst src2) mask)); 10265 match(Set dst (URShiftVI (Binary dst src2) mask)); 10266 match(Set dst (URShiftVL (Binary dst src2) mask)); 10267 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10268 ins_encode %{ 10269 int vlen_enc = vector_length_encoding(this); 10270 BasicType bt = Matcher::vector_element_basic_type(this); 10271 int opc = this->ideal_Opcode(); 10272 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10273 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10274 %} 10275 ins_pipe( pipe_slow ); 10276 %} 10277 10278 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10279 predicate(n->as_ShiftV()->is_var_shift()); 10280 match(Set dst (URShiftVS (Binary dst src2) mask)); 10281 match(Set dst (URShiftVI (Binary dst src2) mask)); 10282 match(Set dst (URShiftVL (Binary dst src2) mask)); 10283 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10284 ins_encode %{ 10285 int vlen_enc = vector_length_encoding(this); 10286 BasicType bt = Matcher::vector_element_basic_type(this); 10287 int opc = this->ideal_Opcode(); 10288 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10289 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10290 %} 10291 ins_pipe( pipe_slow ); 10292 %} 10293 10294 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 10295 match(Set dst (MaxV (Binary dst src2) mask)); 10296 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10297 ins_encode %{ 10298 int vlen_enc = vector_length_encoding(this); 10299 BasicType bt = Matcher::vector_element_basic_type(this); 10300 int opc = this->ideal_Opcode(); 10301 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10302 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10303 %} 10304 ins_pipe( pipe_slow ); 10305 %} 10306 10307 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 10308 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 10309 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10310 ins_encode %{ 10311 int vlen_enc = vector_length_encoding(this); 10312 BasicType bt = Matcher::vector_element_basic_type(this); 10313 int opc = this->ideal_Opcode(); 10314 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10315 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10316 %} 10317 ins_pipe( pipe_slow ); 10318 %} 10319 10320 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 10321 match(Set dst (MinV (Binary dst src2) mask)); 10322 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10323 ins_encode %{ 10324 int vlen_enc = vector_length_encoding(this); 10325 BasicType bt = Matcher::vector_element_basic_type(this); 10326 int opc = this->ideal_Opcode(); 10327 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10328 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10329 %} 10330 ins_pipe( pipe_slow ); 10331 %} 10332 10333 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 10334 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 10335 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10336 ins_encode %{ 10337 int vlen_enc = vector_length_encoding(this); 10338 BasicType bt = Matcher::vector_element_basic_type(this); 10339 int opc = this->ideal_Opcode(); 10340 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10341 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10342 %} 10343 ins_pipe( pipe_slow ); 10344 %} 10345 10346 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 10347 match(Set dst (VectorRearrange (Binary dst src2) mask)); 10348 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 10349 ins_encode %{ 10350 int vlen_enc = vector_length_encoding(this); 10351 BasicType bt = Matcher::vector_element_basic_type(this); 10352 int opc = this->ideal_Opcode(); 10353 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10354 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10355 %} 10356 ins_pipe( pipe_slow ); 10357 %} 10358 10359 instruct vabs_masked(vec dst, kReg mask) %{ 10360 match(Set dst (AbsVB dst mask)); 10361 match(Set dst (AbsVS dst mask)); 10362 match(Set dst (AbsVI dst mask)); 10363 match(Set dst (AbsVL dst mask)); 10364 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 10365 ins_encode %{ 10366 int vlen_enc = vector_length_encoding(this); 10367 BasicType bt = Matcher::vector_element_basic_type(this); 10368 int opc = this->ideal_Opcode(); 10369 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10370 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10371 %} 10372 ins_pipe( pipe_slow ); 10373 %} 10374 10375 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 10376 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 10377 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 10378 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10379 ins_encode %{ 10380 assert(UseFMA, "Needs FMA instructions support."); 10381 int vlen_enc = vector_length_encoding(this); 10382 BasicType bt = Matcher::vector_element_basic_type(this); 10383 int opc = this->ideal_Opcode(); 10384 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10385 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 10386 %} 10387 ins_pipe( pipe_slow ); 10388 %} 10389 10390 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 10391 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 10392 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 10393 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10394 ins_encode %{ 10395 assert(UseFMA, "Needs FMA instructions support."); 10396 int vlen_enc = vector_length_encoding(this); 10397 BasicType bt = Matcher::vector_element_basic_type(this); 10398 int opc = this->ideal_Opcode(); 10399 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10400 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 10401 %} 10402 ins_pipe( pipe_slow ); 10403 %} 10404 10405 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 10406 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 10407 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 10408 ins_encode %{ 10409 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 10410 int vlen_enc = vector_length_encoding(this, $src1); 10411 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 10412 10413 // Comparison i 10414 switch (src1_elem_bt) { 10415 case T_BYTE: { 10416 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10417 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10418 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10419 break; 10420 } 10421 case T_SHORT: { 10422 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10423 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10424 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10425 break; 10426 } 10427 case T_INT: { 10428 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10429 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10430 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10431 break; 10432 } 10433 case T_LONG: { 10434 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10435 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10436 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10437 break; 10438 } 10439 case T_FLOAT: { 10440 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10441 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10442 break; 10443 } 10444 case T_DOUBLE: { 10445 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10446 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10447 break; 10448 } 10449 default: assert(false, "%s", type2name(src1_elem_bt)); break; 10450 } 10451 %} 10452 ins_pipe( pipe_slow ); 10453 %} 10454 10455 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 10456 predicate(Matcher::vector_length(n) <= 32); 10457 match(Set dst (MaskAll src)); 10458 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 10459 ins_encode %{ 10460 int mask_len = Matcher::vector_length(this); 10461 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 10462 %} 10463 ins_pipe( pipe_slow ); 10464 %} 10465 10466 #ifdef _LP64 10467 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10468 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10469 match(Set dst (XorVMask src (MaskAll cnt))); 10470 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10471 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10472 ins_encode %{ 10473 uint masklen = Matcher::vector_length(this); 10474 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10475 %} 10476 ins_pipe( pipe_slow ); 10477 %} 10478 10479 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10480 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10481 (Matcher::vector_length(n) == 16) || 10482 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10483 match(Set dst (XorVMask src (MaskAll cnt))); 10484 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10485 ins_encode %{ 10486 uint masklen = Matcher::vector_length(this); 10487 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10488 %} 10489 ins_pipe( pipe_slow ); 10490 %} 10491 10492 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10493 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10494 match(Set dst (VectorLongToMask src)); 10495 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10496 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10497 ins_encode %{ 10498 int mask_len = Matcher::vector_length(this); 10499 int vec_enc = vector_length_encoding(mask_len); 10500 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10501 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10502 %} 10503 ins_pipe( pipe_slow ); 10504 %} 10505 10506 10507 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10508 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10509 match(Set dst (VectorLongToMask src)); 10510 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10511 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10512 ins_encode %{ 10513 int mask_len = Matcher::vector_length(this); 10514 assert(mask_len <= 32, "invalid mask length"); 10515 int vec_enc = vector_length_encoding(mask_len); 10516 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10517 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10518 %} 10519 ins_pipe( pipe_slow ); 10520 %} 10521 10522 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10523 predicate(n->bottom_type()->isa_vectmask()); 10524 match(Set dst (VectorLongToMask src)); 10525 format %{ "long_to_mask_evex $dst, $src\t!" %} 10526 ins_encode %{ 10527 __ kmov($dst$$KRegister, $src$$Register); 10528 %} 10529 ins_pipe( pipe_slow ); 10530 %} 10531 #endif 10532 10533 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10534 match(Set dst (AndVMask src1 src2)); 10535 match(Set dst (OrVMask src1 src2)); 10536 match(Set dst (XorVMask src1 src2)); 10537 effect(TEMP kscratch); 10538 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10539 ins_encode %{ 10540 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10541 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10542 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal"); 10543 uint masklen = Matcher::vector_length(this); 10544 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10545 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10546 %} 10547 ins_pipe( pipe_slow ); 10548 %} 10549 10550 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10551 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10552 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10553 ins_encode %{ 10554 int vlen_enc = vector_length_encoding(this); 10555 BasicType bt = Matcher::vector_element_basic_type(this); 10556 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10557 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10558 %} 10559 ins_pipe( pipe_slow ); 10560 %} 10561 10562 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10563 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10564 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10565 ins_encode %{ 10566 int vlen_enc = vector_length_encoding(this); 10567 BasicType bt = Matcher::vector_element_basic_type(this); 10568 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10569 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10570 %} 10571 ins_pipe( pipe_slow ); 10572 %} 10573 10574 instruct castMM(kReg dst) 10575 %{ 10576 match(Set dst (CastVV dst)); 10577 10578 size(0); 10579 format %{ "# castVV of $dst" %} 10580 ins_encode(/* empty encoding */); 10581 ins_cost(0); 10582 ins_pipe(empty); 10583 %} 10584 10585 instruct castVV(vec dst) 10586 %{ 10587 match(Set dst (CastVV dst)); 10588 10589 size(0); 10590 format %{ "# castVV of $dst" %} 10591 ins_encode(/* empty encoding */); 10592 ins_cost(0); 10593 ins_pipe(empty); 10594 %} 10595 10596 instruct castVVLeg(legVec dst) 10597 %{ 10598 match(Set dst (CastVV dst)); 10599 10600 size(0); 10601 format %{ "# castVV of $dst" %} 10602 ins_encode(/* empty encoding */); 10603 ins_cost(0); 10604 ins_pipe(empty); 10605 %} 10606 10607 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10608 %{ 10609 match(Set dst (IsInfiniteF src)); 10610 effect(TEMP ktmp, KILL cr); 10611 format %{ "float_class_check $dst, $src" %} 10612 ins_encode %{ 10613 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10614 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10615 %} 10616 ins_pipe(pipe_slow); 10617 %} 10618 10619 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10620 %{ 10621 match(Set dst (IsInfiniteD src)); 10622 effect(TEMP ktmp, KILL cr); 10623 format %{ "double_class_check $dst, $src" %} 10624 ins_encode %{ 10625 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10626 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10627 %} 10628 ins_pipe(pipe_slow); 10629 %} 10630 10631 instruct vector_addsub_saturating_subword_reg(vec dst, vec src1, vec src2) 10632 %{ 10633 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10634 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10635 match(Set dst (SaturatingAddV src1 src2)); 10636 match(Set dst (SaturatingSubV src1 src2)); 10637 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10638 ins_encode %{ 10639 int vlen_enc = vector_length_encoding(this); 10640 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10641 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10642 $src1$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10643 %} 10644 ins_pipe(pipe_slow); 10645 %} 10646 10647 instruct vector_addsub_saturating_unsigned_subword_reg(vec dst, vec src1, vec src2) 10648 %{ 10649 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10650 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10651 match(Set dst (SaturatingAddV src1 src2)); 10652 match(Set dst (SaturatingSubV src1 src2)); 10653 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10654 ins_encode %{ 10655 int vlen_enc = vector_length_encoding(this); 10656 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10657 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10658 $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10659 %} 10660 ins_pipe(pipe_slow); 10661 %} 10662 10663 instruct vector_addsub_saturating_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2) 10664 %{ 10665 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10666 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10667 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10668 match(Set dst (SaturatingAddV src1 src2)); 10669 match(Set dst (SaturatingSubV src1 src2)); 10670 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2); 10671 format %{ "vector_addsub_saturating_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 10672 ins_encode %{ 10673 int vlen_enc = vector_length_encoding(this); 10674 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10675 __ vector_addsub_dq_saturating_evex(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10676 $src1$$XMMRegister, $src2$$XMMRegister, 10677 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10678 $ktmp1$$KRegister, $ktmp2$$KRegister, vlen_enc); 10679 %} 10680 ins_pipe(pipe_slow); 10681 %} 10682 10683 instruct vector_addsub_saturating_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4) 10684 %{ 10685 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10686 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned() && 10687 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10688 match(Set dst (SaturatingAddV src1 src2)); 10689 match(Set dst (SaturatingSubV src1 src2)); 10690 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4); 10691 format %{ "vector_addsub_saturating_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 10692 ins_encode %{ 10693 int vlen_enc = vector_length_encoding(this); 10694 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10695 __ vector_addsub_dq_saturating_avx(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10696 $src2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 10697 $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, vlen_enc); 10698 %} 10699 ins_pipe(pipe_slow); 10700 %} 10701 10702 instruct vector_add_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, kReg ktmp) 10703 %{ 10704 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10705 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10706 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10707 match(Set dst (SaturatingAddV src1 src2)); 10708 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp); 10709 format %{ "vector_add_saturating_unsigned_evex $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $ktmp as TEMP" %} 10710 ins_encode %{ 10711 int vlen_enc = vector_length_encoding(this); 10712 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10713 __ vector_add_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10714 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10715 %} 10716 ins_pipe(pipe_slow); 10717 %} 10718 10719 instruct vector_add_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2, vec xtmp3) 10720 %{ 10721 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10722 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10723 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10724 match(Set dst (SaturatingAddV src1 src2)); 10725 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 10726 format %{ "vector_add_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 10727 ins_encode %{ 10728 int vlen_enc = vector_length_encoding(this); 10729 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10730 __ vector_add_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10731 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, vlen_enc); 10732 %} 10733 ins_pipe(pipe_slow); 10734 %} 10735 10736 instruct vector_sub_saturating_unsigned_reg_evex(vec dst, vec src1, vec src2, kReg ktmp) 10737 %{ 10738 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10739 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10740 (Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl())); 10741 match(Set dst (SaturatingSubV src1 src2)); 10742 effect(TEMP ktmp); 10743 format %{ "vector_sub_saturating_unsigned_evex $dst, $src1, $src2 \t! using $ktmp as TEMP" %} 10744 ins_encode %{ 10745 int vlen_enc = vector_length_encoding(this); 10746 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10747 __ vector_sub_dq_saturating_unsigned_evex(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, 10748 $src2$$XMMRegister, $ktmp$$KRegister, vlen_enc); 10749 %} 10750 ins_pipe(pipe_slow); 10751 %} 10752 10753 instruct vector_sub_saturating_unsigned_reg_avx(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) 10754 %{ 10755 predicate(!is_subword_type(Matcher::vector_element_basic_type(n)) && 10756 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned() && 10757 Matcher::vector_length_in_bytes(n) <= 32 && !VM_Version::supports_avx512vl()); 10758 match(Set dst (SaturatingSubV src1 src2)); 10759 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 10760 format %{ "vector_sub_saturating_unsigned_avx $dst, $src1, $src2 \t! using $xtmp1 and $xtmp2 as TEMP" %} 10761 ins_encode %{ 10762 int vlen_enc = vector_length_encoding(this); 10763 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10764 __ vector_sub_dq_saturating_unsigned_avx(elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, 10765 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 10766 %} 10767 ins_pipe(pipe_slow); 10768 %} 10769 10770 instruct vector_addsub_saturating_subword_mem(vec dst, vec src1, memory src2) 10771 %{ 10772 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10773 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10774 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10775 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10776 format %{ "vector_addsub_saturating_subword $dst, $src1, $src2" %} 10777 ins_encode %{ 10778 int vlen_enc = vector_length_encoding(this); 10779 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10780 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10781 $src1$$XMMRegister, $src2$$Address, false, vlen_enc); 10782 %} 10783 ins_pipe(pipe_slow); 10784 %} 10785 10786 instruct vector_addsub_saturating_unsigned_subword_mem(vec dst, vec src1, memory src2) 10787 %{ 10788 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10789 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10790 match(Set dst (SaturatingAddV src1 (LoadVector src2))); 10791 match(Set dst (SaturatingSubV src1 (LoadVector src2))); 10792 format %{ "vector_addsub_saturating_unsigned_subword $dst, $src1, $src2" %} 10793 ins_encode %{ 10794 int vlen_enc = vector_length_encoding(this); 10795 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10796 __ vector_saturating_op(this->ideal_Opcode(), elem_bt, $dst$$XMMRegister, 10797 $src1$$XMMRegister, $src2$$Address, true, vlen_enc); 10798 %} 10799 ins_pipe(pipe_slow); 10800 %} 10801 10802 instruct vector_addsub_saturating_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10803 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10804 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10805 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10806 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10807 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10808 ins_encode %{ 10809 int vlen_enc = vector_length_encoding(this); 10810 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10811 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10812 $dst$$XMMRegister, $src$$XMMRegister, false, true, vlen_enc); 10813 %} 10814 ins_pipe( pipe_slow ); 10815 %} 10816 10817 instruct vector_addsub_saturating_unsigned_subword_masked_reg(vec dst, vec src, kReg mask) %{ 10818 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10819 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10820 match(Set dst (SaturatingAddV (Binary dst src) mask)); 10821 match(Set dst (SaturatingSubV (Binary dst src) mask)); 10822 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10823 ins_encode %{ 10824 int vlen_enc = vector_length_encoding(this); 10825 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10826 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10827 $dst$$XMMRegister, $src$$XMMRegister, true, true, vlen_enc); 10828 %} 10829 ins_pipe( pipe_slow ); 10830 %} 10831 10832 instruct vector_addsub_saturating_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10833 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10834 n->is_SaturatingVector() && !n->as_SaturatingVector()->is_unsigned()); 10835 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10836 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10837 format %{ "vector_addsub_saturating_subword_masked $dst, $mask, $src" %} 10838 ins_encode %{ 10839 int vlen_enc = vector_length_encoding(this); 10840 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10841 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10842 $dst$$XMMRegister, $src$$Address, false, true, vlen_enc); 10843 %} 10844 ins_pipe( pipe_slow ); 10845 %} 10846 10847 instruct vector_addsub_saturating_unsigned_subword_masked_mem(vec dst, memory src, kReg mask) %{ 10848 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && 10849 n->is_SaturatingVector() && n->as_SaturatingVector()->is_unsigned()); 10850 match(Set dst (SaturatingAddV (Binary dst (LoadVector src)) mask)); 10851 match(Set dst (SaturatingSubV (Binary dst (LoadVector src)) mask)); 10852 format %{ "vector_addsub_saturating_unsigned_subword_masked $dst, $mask, $src" %} 10853 ins_encode %{ 10854 int vlen_enc = vector_length_encoding(this); 10855 BasicType elem_bt = Matcher::vector_element_basic_type(this); 10856 __ evmasked_saturating_op(this->ideal_Opcode(), elem_bt, $mask$$KRegister, $dst$$XMMRegister, 10857 $dst$$XMMRegister, $src$$Address, true, true, vlen_enc); 10858 %} 10859 ins_pipe( pipe_slow ); 10860 %} 10861 10862 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2) 10863 %{ 10864 match(Set index (SelectFromTwoVector (Binary index src1) src2)); 10865 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %} 10866 ins_encode %{ 10867 int vlen_enc = vector_length_encoding(this); 10868 BasicType bt = Matcher::vector_element_basic_type(this); 10869 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10870 %} 10871 ins_pipe(pipe_slow); 10872 %}