1 // 2 // Copyright (c) 2011, 2024, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 // AVX3 Mask Registers. 632 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 633 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 634 635 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 636 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 637 638 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 639 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 640 641 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 642 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 643 644 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 645 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 646 647 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 648 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 649 650 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 651 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 652 653 654 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 655 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 656 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 657 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 658 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 659 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 660 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 661 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 662 #ifdef _LP64 663 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 664 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 665 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 666 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 667 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 668 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 669 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 670 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 671 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 672 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 673 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 674 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 675 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 676 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 677 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 678 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 679 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 680 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 681 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 682 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 683 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 684 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 685 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 686 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 687 #endif 688 ); 689 690 alloc_class chunk2(K7, K7_H, 691 K6, K6_H, 692 K5, K5_H, 693 K4, K4_H, 694 K3, K3_H, 695 K2, K2_H, 696 K1, K1_H); 697 698 reg_class vectmask_reg(K1, K1_H, 699 K2, K2_H, 700 K3, K3_H, 701 K4, K4_H, 702 K5, K5_H, 703 K6, K6_H, 704 K7, K7_H); 705 706 reg_class vectmask_reg_K1(K1, K1_H); 707 reg_class vectmask_reg_K2(K2, K2_H); 708 reg_class vectmask_reg_K3(K3, K3_H); 709 reg_class vectmask_reg_K4(K4, K4_H); 710 reg_class vectmask_reg_K5(K5, K5_H); 711 reg_class vectmask_reg_K6(K6, K6_H); 712 reg_class vectmask_reg_K7(K7, K7_H); 713 714 // flags allocation class should be last. 715 alloc_class chunk3(RFLAGS); 716 717 718 // Singleton class for condition codes 719 reg_class int_flags(RFLAGS); 720 721 // Class for pre evex float registers 722 reg_class float_reg_legacy(XMM0, 723 XMM1, 724 XMM2, 725 XMM3, 726 XMM4, 727 XMM5, 728 XMM6, 729 XMM7 730 #ifdef _LP64 731 ,XMM8, 732 XMM9, 733 XMM10, 734 XMM11, 735 XMM12, 736 XMM13, 737 XMM14, 738 XMM15 739 #endif 740 ); 741 742 // Class for evex float registers 743 reg_class float_reg_evex(XMM0, 744 XMM1, 745 XMM2, 746 XMM3, 747 XMM4, 748 XMM5, 749 XMM6, 750 XMM7 751 #ifdef _LP64 752 ,XMM8, 753 XMM9, 754 XMM10, 755 XMM11, 756 XMM12, 757 XMM13, 758 XMM14, 759 XMM15, 760 XMM16, 761 XMM17, 762 XMM18, 763 XMM19, 764 XMM20, 765 XMM21, 766 XMM22, 767 XMM23, 768 XMM24, 769 XMM25, 770 XMM26, 771 XMM27, 772 XMM28, 773 XMM29, 774 XMM30, 775 XMM31 776 #endif 777 ); 778 779 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 780 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 781 782 // Class for pre evex double registers 783 reg_class double_reg_legacy(XMM0, XMM0b, 784 XMM1, XMM1b, 785 XMM2, XMM2b, 786 XMM3, XMM3b, 787 XMM4, XMM4b, 788 XMM5, XMM5b, 789 XMM6, XMM6b, 790 XMM7, XMM7b 791 #ifdef _LP64 792 ,XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b 800 #endif 801 ); 802 803 // Class for evex double registers 804 reg_class double_reg_evex(XMM0, XMM0b, 805 XMM1, XMM1b, 806 XMM2, XMM2b, 807 XMM3, XMM3b, 808 XMM4, XMM4b, 809 XMM5, XMM5b, 810 XMM6, XMM6b, 811 XMM7, XMM7b 812 #ifdef _LP64 813 ,XMM8, XMM8b, 814 XMM9, XMM9b, 815 XMM10, XMM10b, 816 XMM11, XMM11b, 817 XMM12, XMM12b, 818 XMM13, XMM13b, 819 XMM14, XMM14b, 820 XMM15, XMM15b, 821 XMM16, XMM16b, 822 XMM17, XMM17b, 823 XMM18, XMM18b, 824 XMM19, XMM19b, 825 XMM20, XMM20b, 826 XMM21, XMM21b, 827 XMM22, XMM22b, 828 XMM23, XMM23b, 829 XMM24, XMM24b, 830 XMM25, XMM25b, 831 XMM26, XMM26b, 832 XMM27, XMM27b, 833 XMM28, XMM28b, 834 XMM29, XMM29b, 835 XMM30, XMM30b, 836 XMM31, XMM31b 837 #endif 838 ); 839 840 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 841 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 842 843 // Class for pre evex 32bit vector registers 844 reg_class vectors_reg_legacy(XMM0, 845 XMM1, 846 XMM2, 847 XMM3, 848 XMM4, 849 XMM5, 850 XMM6, 851 XMM7 852 #ifdef _LP64 853 ,XMM8, 854 XMM9, 855 XMM10, 856 XMM11, 857 XMM12, 858 XMM13, 859 XMM14, 860 XMM15 861 #endif 862 ); 863 864 // Class for evex 32bit vector registers 865 reg_class vectors_reg_evex(XMM0, 866 XMM1, 867 XMM2, 868 XMM3, 869 XMM4, 870 XMM5, 871 XMM6, 872 XMM7 873 #ifdef _LP64 874 ,XMM8, 875 XMM9, 876 XMM10, 877 XMM11, 878 XMM12, 879 XMM13, 880 XMM14, 881 XMM15, 882 XMM16, 883 XMM17, 884 XMM18, 885 XMM19, 886 XMM20, 887 XMM21, 888 XMM22, 889 XMM23, 890 XMM24, 891 XMM25, 892 XMM26, 893 XMM27, 894 XMM28, 895 XMM29, 896 XMM30, 897 XMM31 898 #endif 899 ); 900 901 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 902 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 903 904 // Class for all 64bit vector registers 905 reg_class vectord_reg_legacy(XMM0, XMM0b, 906 XMM1, XMM1b, 907 XMM2, XMM2b, 908 XMM3, XMM3b, 909 XMM4, XMM4b, 910 XMM5, XMM5b, 911 XMM6, XMM6b, 912 XMM7, XMM7b 913 #ifdef _LP64 914 ,XMM8, XMM8b, 915 XMM9, XMM9b, 916 XMM10, XMM10b, 917 XMM11, XMM11b, 918 XMM12, XMM12b, 919 XMM13, XMM13b, 920 XMM14, XMM14b, 921 XMM15, XMM15b 922 #endif 923 ); 924 925 // Class for all 64bit vector registers 926 reg_class vectord_reg_evex(XMM0, XMM0b, 927 XMM1, XMM1b, 928 XMM2, XMM2b, 929 XMM3, XMM3b, 930 XMM4, XMM4b, 931 XMM5, XMM5b, 932 XMM6, XMM6b, 933 XMM7, XMM7b 934 #ifdef _LP64 935 ,XMM8, XMM8b, 936 XMM9, XMM9b, 937 XMM10, XMM10b, 938 XMM11, XMM11b, 939 XMM12, XMM12b, 940 XMM13, XMM13b, 941 XMM14, XMM14b, 942 XMM15, XMM15b, 943 XMM16, XMM16b, 944 XMM17, XMM17b, 945 XMM18, XMM18b, 946 XMM19, XMM19b, 947 XMM20, XMM20b, 948 XMM21, XMM21b, 949 XMM22, XMM22b, 950 XMM23, XMM23b, 951 XMM24, XMM24b, 952 XMM25, XMM25b, 953 XMM26, XMM26b, 954 XMM27, XMM27b, 955 XMM28, XMM28b, 956 XMM29, XMM29b, 957 XMM30, XMM30b, 958 XMM31, XMM31b 959 #endif 960 ); 961 962 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 963 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 964 965 // Class for all 128bit vector registers 966 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 967 XMM1, XMM1b, XMM1c, XMM1d, 968 XMM2, XMM2b, XMM2c, XMM2d, 969 XMM3, XMM3b, XMM3c, XMM3d, 970 XMM4, XMM4b, XMM4c, XMM4d, 971 XMM5, XMM5b, XMM5c, XMM5d, 972 XMM6, XMM6b, XMM6c, XMM6d, 973 XMM7, XMM7b, XMM7c, XMM7d 974 #ifdef _LP64 975 ,XMM8, XMM8b, XMM8c, XMM8d, 976 XMM9, XMM9b, XMM9c, XMM9d, 977 XMM10, XMM10b, XMM10c, XMM10d, 978 XMM11, XMM11b, XMM11c, XMM11d, 979 XMM12, XMM12b, XMM12c, XMM12d, 980 XMM13, XMM13b, XMM13c, XMM13d, 981 XMM14, XMM14b, XMM14c, XMM14d, 982 XMM15, XMM15b, XMM15c, XMM15d 983 #endif 984 ); 985 986 // Class for all 128bit vector registers 987 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 988 XMM1, XMM1b, XMM1c, XMM1d, 989 XMM2, XMM2b, XMM2c, XMM2d, 990 XMM3, XMM3b, XMM3c, XMM3d, 991 XMM4, XMM4b, XMM4c, XMM4d, 992 XMM5, XMM5b, XMM5c, XMM5d, 993 XMM6, XMM6b, XMM6c, XMM6d, 994 XMM7, XMM7b, XMM7c, XMM7d 995 #ifdef _LP64 996 ,XMM8, XMM8b, XMM8c, XMM8d, 997 XMM9, XMM9b, XMM9c, XMM9d, 998 XMM10, XMM10b, XMM10c, XMM10d, 999 XMM11, XMM11b, XMM11c, XMM11d, 1000 XMM12, XMM12b, XMM12c, XMM12d, 1001 XMM13, XMM13b, XMM13c, XMM13d, 1002 XMM14, XMM14b, XMM14c, XMM14d, 1003 XMM15, XMM15b, XMM15c, XMM15d, 1004 XMM16, XMM16b, XMM16c, XMM16d, 1005 XMM17, XMM17b, XMM17c, XMM17d, 1006 XMM18, XMM18b, XMM18c, XMM18d, 1007 XMM19, XMM19b, XMM19c, XMM19d, 1008 XMM20, XMM20b, XMM20c, XMM20d, 1009 XMM21, XMM21b, XMM21c, XMM21d, 1010 XMM22, XMM22b, XMM22c, XMM22d, 1011 XMM23, XMM23b, XMM23c, XMM23d, 1012 XMM24, XMM24b, XMM24c, XMM24d, 1013 XMM25, XMM25b, XMM25c, XMM25d, 1014 XMM26, XMM26b, XMM26c, XMM26d, 1015 XMM27, XMM27b, XMM27c, XMM27d, 1016 XMM28, XMM28b, XMM28c, XMM28d, 1017 XMM29, XMM29b, XMM29c, XMM29d, 1018 XMM30, XMM30b, XMM30c, XMM30d, 1019 XMM31, XMM31b, XMM31c, XMM31d 1020 #endif 1021 ); 1022 1023 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1024 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1025 1026 // Class for all 256bit vector registers 1027 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1028 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1029 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1030 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1031 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1032 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1033 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1034 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1035 #ifdef _LP64 1036 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1037 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1038 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1039 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1040 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1041 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1042 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1043 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1044 #endif 1045 ); 1046 1047 // Class for all 256bit vector registers 1048 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1049 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1050 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1051 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1052 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1053 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1054 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1055 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1056 #ifdef _LP64 1057 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1058 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1059 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1060 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1061 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1062 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1063 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1064 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1065 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1066 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1067 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1068 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1069 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1070 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1071 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1072 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1073 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1074 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1075 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1076 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1077 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1078 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1079 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1080 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1081 #endif 1082 ); 1083 1084 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1085 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1086 1087 // Class for all 512bit vector registers 1088 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1089 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1090 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1091 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1092 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1093 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1094 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1095 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1096 #ifdef _LP64 1097 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1098 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1099 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1100 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1101 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1102 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1103 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1104 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1105 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1106 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1107 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1108 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1109 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1110 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1111 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1112 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1113 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1114 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1115 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1116 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1117 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1118 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1119 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1120 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1121 #endif 1122 ); 1123 1124 // Class for restricted 512bit vector registers 1125 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1126 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1127 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1128 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1129 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1130 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1131 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1132 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1133 #ifdef _LP64 1134 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1135 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1136 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1137 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1138 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1139 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1140 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1141 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1142 #endif 1143 ); 1144 1145 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1146 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1147 1148 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1149 %} 1150 1151 1152 //----------SOURCE BLOCK------------------------------------------------------- 1153 // This is a block of C++ code which provides values, functions, and 1154 // definitions necessary in the rest of the architecture description 1155 1156 source_hpp %{ 1157 // Header information of the source block. 1158 // Method declarations/definitions which are used outside 1159 // the ad-scope can conveniently be defined here. 1160 // 1161 // To keep related declarations/definitions/uses close together, 1162 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1163 1164 #include "runtime/vm_version.hpp" 1165 1166 class NativeJump; 1167 1168 class CallStubImpl { 1169 1170 //-------------------------------------------------------------- 1171 //---< Used for optimization in Compile::shorten_branches >--- 1172 //-------------------------------------------------------------- 1173 1174 public: 1175 // Size of call trampoline stub. 1176 static uint size_call_trampoline() { 1177 return 0; // no call trampolines on this platform 1178 } 1179 1180 // number of relocations needed by a call trampoline stub 1181 static uint reloc_call_trampoline() { 1182 return 0; // no call trampolines on this platform 1183 } 1184 }; 1185 1186 class HandlerImpl { 1187 1188 public: 1189 1190 static int emit_exception_handler(C2_MacroAssembler *masm); 1191 static int emit_deopt_handler(C2_MacroAssembler* masm); 1192 1193 static uint size_exception_handler() { 1194 // NativeCall instruction size is the same as NativeJump. 1195 // exception handler starts out as jump and can be patched to 1196 // a call be deoptimization. (4932387) 1197 // Note that this value is also credited (in output.cpp) to 1198 // the size of the code section. 1199 return NativeJump::instruction_size; 1200 } 1201 1202 #ifdef _LP64 1203 static uint size_deopt_handler() { 1204 // three 5 byte instructions plus one move for unreachable address. 1205 return 15+3; 1206 } 1207 #else 1208 static uint size_deopt_handler() { 1209 // NativeCall instruction size is the same as NativeJump. 1210 // exception handler starts out as jump and can be patched to 1211 // a call be deoptimization. (4932387) 1212 // Note that this value is also credited (in output.cpp) to 1213 // the size of the code section. 1214 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1215 } 1216 #endif 1217 }; 1218 1219 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1220 switch(bytes) { 1221 case 4: // fall-through 1222 case 8: // fall-through 1223 case 16: return Assembler::AVX_128bit; 1224 case 32: return Assembler::AVX_256bit; 1225 case 64: return Assembler::AVX_512bit; 1226 1227 default: { 1228 ShouldNotReachHere(); 1229 return Assembler::AVX_NoVec; 1230 } 1231 } 1232 } 1233 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1236 } 1237 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1239 uint def_idx = use->operand_index(opnd); 1240 Node* def = use->in(def_idx); 1241 return vector_length_encoding(def); 1242 } 1243 1244 static inline bool is_vector_popcount_predicate(BasicType bt) { 1245 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1246 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1247 } 1248 1249 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1250 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1251 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1252 } 1253 1254 class Node::PD { 1255 public: 1256 enum NodeFlags { 1257 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1258 Flag_sets_carry_flag = Node::_last_flag << 2, 1259 Flag_sets_parity_flag = Node::_last_flag << 3, 1260 Flag_sets_zero_flag = Node::_last_flag << 4, 1261 Flag_sets_overflow_flag = Node::_last_flag << 5, 1262 Flag_sets_sign_flag = Node::_last_flag << 6, 1263 Flag_clears_carry_flag = Node::_last_flag << 7, 1264 Flag_clears_parity_flag = Node::_last_flag << 8, 1265 Flag_clears_zero_flag = Node::_last_flag << 9, 1266 Flag_clears_overflow_flag = Node::_last_flag << 10, 1267 Flag_clears_sign_flag = Node::_last_flag << 11, 1268 _last_flag = Flag_clears_sign_flag 1269 }; 1270 }; 1271 1272 %} // end source_hpp 1273 1274 source %{ 1275 1276 #include "opto/addnode.hpp" 1277 #include "c2_intelJccErratum_x86.hpp" 1278 1279 void PhaseOutput::pd_perform_mach_node_analysis() { 1280 if (VM_Version::has_intel_jcc_erratum()) { 1281 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1282 _buf_sizes._code += extra_padding; 1283 } 1284 } 1285 1286 int MachNode::pd_alignment_required() const { 1287 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1288 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1289 return IntelJccErratum::largest_jcc_size() + 1; 1290 } else { 1291 return 1; 1292 } 1293 } 1294 1295 int MachNode::compute_padding(int current_offset) const { 1296 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1297 Compile* C = Compile::current(); 1298 PhaseOutput* output = C->output(); 1299 Block* block = output->block(); 1300 int index = output->index(); 1301 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1302 } else { 1303 return 0; 1304 } 1305 } 1306 1307 // Emit exception handler code. 1308 // Stuff framesize into a register and call a VM stub routine. 1309 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) { 1310 1311 // Note that the code buffer's insts_mark is always relative to insts. 1312 // That's why we must use the macroassembler to generate a handler. 1313 address base = __ start_a_stub(size_exception_handler()); 1314 if (base == nullptr) { 1315 ciEnv::current()->record_failure("CodeCache is full"); 1316 return 0; // CodeBuffer::expand failed 1317 } 1318 int offset = __ offset(); 1319 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1320 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1321 __ end_a_stub(); 1322 return offset; 1323 } 1324 1325 // Emit deopt handler code. 1326 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 address base = __ start_a_stub(size_deopt_handler()); 1331 if (base == nullptr) { 1332 ciEnv::current()->record_failure("CodeCache is full"); 1333 return 0; // CodeBuffer::expand failed 1334 } 1335 int offset = __ offset(); 1336 1337 #ifdef _LP64 1338 address the_pc = (address) __ pc(); 1339 Label next; 1340 // push a "the_pc" on the stack without destroying any registers 1341 // as they all may be live. 1342 1343 // push address of "next" 1344 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1345 __ bind(next); 1346 // adjust it so it matches "the_pc" 1347 __ subptr(Address(rsp, 0), __ offset() - offset); 1348 #else 1349 InternalAddress here(__ pc()); 1350 __ pushptr(here.addr(), noreg); 1351 #endif 1352 1353 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1354 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1355 __ end_a_stub(); 1356 return offset; 1357 } 1358 1359 static Assembler::Width widthForType(BasicType bt) { 1360 if (bt == T_BYTE) { 1361 return Assembler::B; 1362 } else if (bt == T_SHORT) { 1363 return Assembler::W; 1364 } else if (bt == T_INT) { 1365 return Assembler::D; 1366 } else { 1367 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1368 return Assembler::Q; 1369 } 1370 } 1371 1372 //============================================================================= 1373 1374 // Float masks come from different places depending on platform. 1375 #ifdef _LP64 1376 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1377 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1378 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1379 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1380 #else 1381 static address float_signmask() { return (address)float_signmask_pool; } 1382 static address float_signflip() { return (address)float_signflip_pool; } 1383 static address double_signmask() { return (address)double_signmask_pool; } 1384 static address double_signflip() { return (address)double_signflip_pool; } 1385 #endif 1386 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1387 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1388 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1389 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1390 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1391 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1392 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1393 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1394 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1395 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1396 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1397 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1398 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1399 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1400 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1401 1402 //============================================================================= 1403 bool Matcher::match_rule_supported(int opcode) { 1404 if (!has_match_rule(opcode)) { 1405 return false; // no match rule present 1406 } 1407 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1408 switch (opcode) { 1409 case Op_AbsVL: 1410 case Op_StoreVectorScatter: 1411 if (UseAVX < 3) { 1412 return false; 1413 } 1414 break; 1415 case Op_PopCountI: 1416 case Op_PopCountL: 1417 if (!UsePopCountInstruction) { 1418 return false; 1419 } 1420 break; 1421 case Op_PopCountVI: 1422 if (UseAVX < 2) { 1423 return false; 1424 } 1425 break; 1426 case Op_CompressV: 1427 case Op_ExpandV: 1428 case Op_PopCountVL: 1429 if (UseAVX < 2) { 1430 return false; 1431 } 1432 break; 1433 case Op_MulVI: 1434 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1435 return false; 1436 } 1437 break; 1438 case Op_MulVL: 1439 if (UseSSE < 4) { // only with SSE4_1 or AVX 1440 return false; 1441 } 1442 break; 1443 case Op_MulReductionVL: 1444 if (VM_Version::supports_avx512dq() == false) { 1445 return false; 1446 } 1447 break; 1448 case Op_AddReductionVL: 1449 if (UseSSE < 2) { // requires at least SSE2 1450 return false; 1451 } 1452 break; 1453 case Op_AbsVB: 1454 case Op_AbsVS: 1455 case Op_AbsVI: 1456 case Op_AddReductionVI: 1457 case Op_AndReductionV: 1458 case Op_OrReductionV: 1459 case Op_XorReductionV: 1460 if (UseSSE < 3) { // requires at least SSSE3 1461 return false; 1462 } 1463 break; 1464 case Op_VectorLoadShuffle: 1465 case Op_VectorRearrange: 1466 case Op_MulReductionVI: 1467 if (UseSSE < 4) { // requires at least SSE4 1468 return false; 1469 } 1470 break; 1471 case Op_IsInfiniteF: 1472 case Op_IsInfiniteD: 1473 if (!VM_Version::supports_avx512dq()) { 1474 return false; 1475 } 1476 break; 1477 case Op_SqrtVD: 1478 case Op_SqrtVF: 1479 case Op_VectorMaskCmp: 1480 case Op_VectorCastB2X: 1481 case Op_VectorCastS2X: 1482 case Op_VectorCastI2X: 1483 case Op_VectorCastL2X: 1484 case Op_VectorCastF2X: 1485 case Op_VectorCastD2X: 1486 case Op_VectorUCastB2X: 1487 case Op_VectorUCastS2X: 1488 case Op_VectorUCastI2X: 1489 case Op_VectorMaskCast: 1490 if (UseAVX < 1) { // enabled for AVX only 1491 return false; 1492 } 1493 break; 1494 case Op_PopulateIndex: 1495 if (!is_LP64 || (UseAVX < 2)) { 1496 return false; 1497 } 1498 break; 1499 case Op_RoundVF: 1500 if (UseAVX < 2) { // enabled for AVX2 only 1501 return false; 1502 } 1503 break; 1504 case Op_RoundVD: 1505 if (UseAVX < 3) { 1506 return false; // enabled for AVX3 only 1507 } 1508 break; 1509 case Op_CompareAndSwapL: 1510 #ifdef _LP64 1511 case Op_CompareAndSwapP: 1512 #endif 1513 break; 1514 case Op_StrIndexOf: 1515 if (!UseSSE42Intrinsics) { 1516 return false; 1517 } 1518 break; 1519 case Op_StrIndexOfChar: 1520 if (!UseSSE42Intrinsics) { 1521 return false; 1522 } 1523 break; 1524 case Op_OnSpinWait: 1525 if (VM_Version::supports_on_spin_wait() == false) { 1526 return false; 1527 } 1528 break; 1529 case Op_MulVB: 1530 case Op_LShiftVB: 1531 case Op_RShiftVB: 1532 case Op_URShiftVB: 1533 case Op_VectorInsert: 1534 case Op_VectorLoadMask: 1535 case Op_VectorStoreMask: 1536 case Op_VectorBlend: 1537 if (UseSSE < 4) { 1538 return false; 1539 } 1540 break; 1541 #ifdef _LP64 1542 case Op_MaxD: 1543 case Op_MaxF: 1544 case Op_MinD: 1545 case Op_MinF: 1546 if (UseAVX < 1) { // enabled for AVX only 1547 return false; 1548 } 1549 break; 1550 #endif 1551 case Op_CacheWB: 1552 case Op_CacheWBPreSync: 1553 case Op_CacheWBPostSync: 1554 if (!VM_Version::supports_data_cache_line_flush()) { 1555 return false; 1556 } 1557 break; 1558 case Op_ExtractB: 1559 case Op_ExtractL: 1560 case Op_ExtractI: 1561 case Op_RoundDoubleMode: 1562 if (UseSSE < 4) { 1563 return false; 1564 } 1565 break; 1566 case Op_RoundDoubleModeV: 1567 if (VM_Version::supports_avx() == false) { 1568 return false; // 128bit vroundpd is not available 1569 } 1570 break; 1571 case Op_LoadVectorGather: 1572 case Op_LoadVectorGatherMasked: 1573 if (UseAVX < 2) { 1574 return false; 1575 } 1576 break; 1577 case Op_FmaF: 1578 case Op_FmaD: 1579 case Op_FmaVD: 1580 case Op_FmaVF: 1581 if (!UseFMA) { 1582 return false; 1583 } 1584 break; 1585 case Op_MacroLogicV: 1586 if (UseAVX < 3 || !UseVectorMacroLogic) { 1587 return false; 1588 } 1589 break; 1590 1591 case Op_VectorCmpMasked: 1592 case Op_VectorMaskGen: 1593 if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { 1594 return false; 1595 } 1596 break; 1597 case Op_VectorMaskFirstTrue: 1598 case Op_VectorMaskLastTrue: 1599 case Op_VectorMaskTrueCount: 1600 case Op_VectorMaskToLong: 1601 if (!is_LP64 || UseAVX < 1) { 1602 return false; 1603 } 1604 break; 1605 case Op_RoundF: 1606 case Op_RoundD: 1607 if (!is_LP64) { 1608 return false; 1609 } 1610 break; 1611 case Op_CopySignD: 1612 case Op_CopySignF: 1613 if (UseAVX < 3 || !is_LP64) { 1614 return false; 1615 } 1616 if (!VM_Version::supports_avx512vl()) { 1617 return false; 1618 } 1619 break; 1620 #ifndef _LP64 1621 case Op_AddReductionVF: 1622 case Op_AddReductionVD: 1623 case Op_MulReductionVF: 1624 case Op_MulReductionVD: 1625 if (UseSSE < 1) { // requires at least SSE 1626 return false; 1627 } 1628 break; 1629 case Op_MulAddVS2VI: 1630 case Op_RShiftVL: 1631 case Op_AbsVD: 1632 case Op_NegVD: 1633 if (UseSSE < 2) { 1634 return false; 1635 } 1636 break; 1637 #endif // !LP64 1638 case Op_CompressBits: 1639 if (!VM_Version::supports_bmi2() || (!is_LP64 && UseSSE < 2)) { 1640 return false; 1641 } 1642 break; 1643 case Op_ExpandBits: 1644 if (!VM_Version::supports_bmi2() || (!is_LP64 && (UseSSE < 2 || !VM_Version::supports_bmi1()))) { 1645 return false; 1646 } 1647 break; 1648 case Op_SignumF: 1649 if (UseSSE < 1) { 1650 return false; 1651 } 1652 break; 1653 case Op_SignumD: 1654 if (UseSSE < 2) { 1655 return false; 1656 } 1657 break; 1658 case Op_CompressM: 1659 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1660 return false; 1661 } 1662 break; 1663 case Op_SqrtF: 1664 if (UseSSE < 1) { 1665 return false; 1666 } 1667 break; 1668 case Op_SqrtD: 1669 #ifdef _LP64 1670 if (UseSSE < 2) { 1671 return false; 1672 } 1673 #else 1674 // x86_32.ad has a special match rule for SqrtD. 1675 // Together with common x86 rules, this handles all UseSSE cases. 1676 #endif 1677 break; 1678 case Op_ConvF2HF: 1679 case Op_ConvHF2F: 1680 if (!VM_Version::supports_float16()) { 1681 return false; 1682 } 1683 break; 1684 case Op_VectorCastF2HF: 1685 case Op_VectorCastHF2F: 1686 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1687 return false; 1688 } 1689 break; 1690 } 1691 return true; // Match rules are supported by default. 1692 } 1693 1694 //------------------------------------------------------------------------ 1695 1696 static inline bool is_pop_count_instr_target(BasicType bt) { 1697 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1698 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1699 } 1700 1701 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) { 1702 return match_rule_supported_vector(opcode, vlen, bt); 1703 } 1704 1705 // Identify extra cases that we might want to provide match rules for vector nodes and 1706 // other intrinsics guarded with vector length (vlen) and element type (bt). 1707 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1708 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1709 if (!match_rule_supported(opcode)) { 1710 return false; 1711 } 1712 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1713 // * SSE2 supports 128bit vectors for all types; 1714 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1715 // * AVX2 supports 256bit vectors for all types; 1716 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1717 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1718 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1719 // And MaxVectorSize is taken into account as well. 1720 if (!vector_size_supported(bt, vlen)) { 1721 return false; 1722 } 1723 // Special cases which require vector length follow: 1724 // * implementation limitations 1725 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1726 // * 128bit vroundpd instruction is present only in AVX1 1727 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1728 switch (opcode) { 1729 case Op_AbsVF: 1730 case Op_NegVF: 1731 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1732 return false; // 512bit vandps and vxorps are not available 1733 } 1734 break; 1735 case Op_AbsVD: 1736 case Op_NegVD: 1737 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1738 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1739 } 1740 break; 1741 case Op_RotateRightV: 1742 case Op_RotateLeftV: 1743 if (bt != T_INT && bt != T_LONG) { 1744 return false; 1745 } // fallthrough 1746 case Op_MacroLogicV: 1747 if (!VM_Version::supports_evex() || 1748 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1749 return false; 1750 } 1751 break; 1752 case Op_ClearArray: 1753 case Op_VectorMaskGen: 1754 case Op_VectorCmpMasked: 1755 if (!is_LP64 || !VM_Version::supports_avx512bw()) { 1756 return false; 1757 } 1758 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1759 return false; 1760 } 1761 break; 1762 case Op_LoadVectorMasked: 1763 case Op_StoreVectorMasked: 1764 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1765 return false; 1766 } 1767 break; 1768 case Op_MaxV: 1769 case Op_MinV: 1770 if (UseSSE < 4 && is_integral_type(bt)) { 1771 return false; 1772 } 1773 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1774 // Float/Double intrinsics are enabled for AVX family currently. 1775 if (UseAVX == 0) { 1776 return false; 1777 } 1778 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1779 return false; 1780 } 1781 } 1782 break; 1783 case Op_CallLeafVector: 1784 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1785 return false; 1786 } 1787 break; 1788 case Op_AddReductionVI: 1789 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1790 return false; 1791 } 1792 // fallthrough 1793 case Op_AndReductionV: 1794 case Op_OrReductionV: 1795 case Op_XorReductionV: 1796 if (is_subword_type(bt) && (UseSSE < 4)) { 1797 return false; 1798 } 1799 #ifndef _LP64 1800 if (bt == T_BYTE || bt == T_LONG) { 1801 return false; 1802 } 1803 #endif 1804 break; 1805 #ifndef _LP64 1806 case Op_VectorInsert: 1807 if (bt == T_LONG || bt == T_DOUBLE) { 1808 return false; 1809 } 1810 break; 1811 #endif 1812 case Op_MinReductionV: 1813 case Op_MaxReductionV: 1814 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1815 return false; 1816 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1817 return false; 1818 } 1819 // Float/Double intrinsics enabled for AVX family. 1820 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1821 return false; 1822 } 1823 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1824 return false; 1825 } 1826 #ifndef _LP64 1827 if (bt == T_BYTE || bt == T_LONG) { 1828 return false; 1829 } 1830 #endif 1831 break; 1832 case Op_VectorTest: 1833 if (UseSSE < 4) { 1834 return false; // Implementation limitation 1835 } else if (size_in_bits < 32) { 1836 return false; // Implementation limitation 1837 } 1838 break; 1839 case Op_VectorLoadShuffle: 1840 case Op_VectorRearrange: 1841 if(vlen == 2) { 1842 return false; // Implementation limitation due to how shuffle is loaded 1843 } else if (size_in_bits == 256 && UseAVX < 2) { 1844 return false; // Implementation limitation 1845 } 1846 break; 1847 case Op_VectorLoadMask: 1848 case Op_VectorMaskCast: 1849 if (size_in_bits == 256 && UseAVX < 2) { 1850 return false; // Implementation limitation 1851 } 1852 // fallthrough 1853 case Op_VectorStoreMask: 1854 if (vlen == 2) { 1855 return false; // Implementation limitation 1856 } 1857 break; 1858 case Op_PopulateIndex: 1859 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1860 return false; 1861 } 1862 break; 1863 case Op_VectorCastB2X: 1864 case Op_VectorCastS2X: 1865 case Op_VectorCastI2X: 1866 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1867 return false; 1868 } 1869 break; 1870 case Op_VectorCastL2X: 1871 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1872 return false; 1873 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1874 return false; 1875 } 1876 break; 1877 case Op_VectorCastF2X: { 1878 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1879 // happen after intermediate conversion to integer and special handling 1880 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1881 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1882 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1883 return false; 1884 } 1885 } 1886 // fallthrough 1887 case Op_VectorCastD2X: 1888 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1889 return false; 1890 } 1891 break; 1892 case Op_VectorCastF2HF: 1893 case Op_VectorCastHF2F: 1894 if (!VM_Version::supports_f16c() && 1895 ((!VM_Version::supports_evex() || 1896 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1897 return false; 1898 } 1899 break; 1900 case Op_RoundVD: 1901 if (!VM_Version::supports_avx512dq()) { 1902 return false; 1903 } 1904 break; 1905 case Op_MulReductionVI: 1906 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1907 return false; 1908 } 1909 break; 1910 case Op_LoadVectorGatherMasked: 1911 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1912 return false; 1913 } 1914 if (is_subword_type(bt) && 1915 (!is_LP64 || 1916 (size_in_bits > 256 && !VM_Version::supports_avx512bw()) || 1917 (size_in_bits < 64) || 1918 (bt == T_SHORT && !VM_Version::supports_bmi2()))) { 1919 return false; 1920 } 1921 break; 1922 case Op_StoreVectorScatterMasked: 1923 case Op_StoreVectorScatter: 1924 if (is_subword_type(bt)) { 1925 return false; 1926 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1927 return false; 1928 } 1929 // fallthrough 1930 case Op_LoadVectorGather: 1931 if (!is_subword_type(bt) && size_in_bits == 64) { 1932 return false; 1933 } 1934 if (is_subword_type(bt) && size_in_bits < 64) { 1935 return false; 1936 } 1937 break; 1938 case Op_SelectFromTwoVector: 1939 if (size_in_bits < 128 || (size_in_bits < 512 && !VM_Version::supports_avx512vl())) { 1940 return false; 1941 } 1942 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 1943 return false; 1944 } 1945 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 1946 return false; 1947 } 1948 if ((bt == T_INT || bt == T_FLOAT || bt == T_DOUBLE) && !VM_Version::supports_evex()) { 1949 return false; 1950 } 1951 break; 1952 case Op_MaskAll: 1953 if (!VM_Version::supports_evex()) { 1954 return false; 1955 } 1956 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1957 return false; 1958 } 1959 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1960 return false; 1961 } 1962 break; 1963 case Op_VectorMaskCmp: 1964 if (vlen < 2 || size_in_bits < 32) { 1965 return false; 1966 } 1967 break; 1968 case Op_CompressM: 1969 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1970 return false; 1971 } 1972 break; 1973 case Op_CompressV: 1974 case Op_ExpandV: 1975 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1976 return false; 1977 } 1978 if (!is_LP64 && !VM_Version::supports_avx512vl() && size_in_bits < 512) { 1979 return false; 1980 } 1981 if (size_in_bits < 128 ) { 1982 return false; 1983 } 1984 case Op_VectorLongToMask: 1985 if (UseAVX < 1 || !is_LP64) { 1986 return false; 1987 } 1988 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1989 return false; 1990 } 1991 break; 1992 case Op_SignumVD: 1993 case Op_SignumVF: 1994 if (UseAVX < 1) { 1995 return false; 1996 } 1997 break; 1998 case Op_PopCountVI: 1999 case Op_PopCountVL: { 2000 if (!is_pop_count_instr_target(bt) && 2001 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 2002 return false; 2003 } 2004 } 2005 break; 2006 case Op_ReverseV: 2007 case Op_ReverseBytesV: 2008 if (UseAVX < 2) { 2009 return false; 2010 } 2011 break; 2012 case Op_CountTrailingZerosV: 2013 case Op_CountLeadingZerosV: 2014 if (UseAVX < 2) { 2015 return false; 2016 } 2017 break; 2018 } 2019 return true; // Per default match rules are supported. 2020 } 2021 2022 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 2023 // ADLC based match_rule_supported routine checks for the existence of pattern based 2024 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 2025 // of their non-masked counterpart with mask edge being the differentiator. 2026 // This routine does a strict check on the existence of masked operation patterns 2027 // by returning a default false value for all the other opcodes apart from the 2028 // ones whose masked instruction patterns are defined in this file. 2029 if (!match_rule_supported_vector(opcode, vlen, bt)) { 2030 return false; 2031 } 2032 2033 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 2034 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 2035 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 2036 return false; 2037 } 2038 switch(opcode) { 2039 // Unary masked operations 2040 case Op_AbsVB: 2041 case Op_AbsVS: 2042 if(!VM_Version::supports_avx512bw()) { 2043 return false; // Implementation limitation 2044 } 2045 case Op_AbsVI: 2046 case Op_AbsVL: 2047 return true; 2048 2049 // Ternary masked operations 2050 case Op_FmaVF: 2051 case Op_FmaVD: 2052 return true; 2053 2054 case Op_MacroLogicV: 2055 if(bt != T_INT && bt != T_LONG) { 2056 return false; 2057 } 2058 return true; 2059 2060 // Binary masked operations 2061 case Op_AddVB: 2062 case Op_AddVS: 2063 case Op_SubVB: 2064 case Op_SubVS: 2065 case Op_MulVS: 2066 case Op_LShiftVS: 2067 case Op_RShiftVS: 2068 case Op_URShiftVS: 2069 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2070 if (!VM_Version::supports_avx512bw()) { 2071 return false; // Implementation limitation 2072 } 2073 return true; 2074 2075 case Op_MulVL: 2076 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2077 if (!VM_Version::supports_avx512dq()) { 2078 return false; // Implementation limitation 2079 } 2080 return true; 2081 2082 case Op_AndV: 2083 case Op_OrV: 2084 case Op_XorV: 2085 case Op_RotateRightV: 2086 case Op_RotateLeftV: 2087 if (bt != T_INT && bt != T_LONG) { 2088 return false; // Implementation limitation 2089 } 2090 return true; 2091 2092 case Op_VectorLoadMask: 2093 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2094 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2095 return false; 2096 } 2097 return true; 2098 2099 case Op_AddVI: 2100 case Op_AddVL: 2101 case Op_AddVF: 2102 case Op_AddVD: 2103 case Op_SubVI: 2104 case Op_SubVL: 2105 case Op_SubVF: 2106 case Op_SubVD: 2107 case Op_MulVI: 2108 case Op_MulVF: 2109 case Op_MulVD: 2110 case Op_DivVF: 2111 case Op_DivVD: 2112 case Op_SqrtVF: 2113 case Op_SqrtVD: 2114 case Op_LShiftVI: 2115 case Op_LShiftVL: 2116 case Op_RShiftVI: 2117 case Op_RShiftVL: 2118 case Op_URShiftVI: 2119 case Op_URShiftVL: 2120 case Op_LoadVectorMasked: 2121 case Op_StoreVectorMasked: 2122 case Op_LoadVectorGatherMasked: 2123 case Op_StoreVectorScatterMasked: 2124 return true; 2125 2126 case Op_MaxV: 2127 case Op_MinV: 2128 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2129 return false; // Implementation limitation 2130 } 2131 if (is_floating_point_type(bt)) { 2132 return false; // Implementation limitation 2133 } 2134 return true; 2135 2136 case Op_VectorMaskCmp: 2137 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2138 return false; // Implementation limitation 2139 } 2140 return true; 2141 2142 case Op_VectorRearrange: 2143 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2144 return false; // Implementation limitation 2145 } 2146 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2147 return false; // Implementation limitation 2148 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2149 return false; // Implementation limitation 2150 } 2151 return true; 2152 2153 // Binary Logical operations 2154 case Op_AndVMask: 2155 case Op_OrVMask: 2156 case Op_XorVMask: 2157 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2158 return false; // Implementation limitation 2159 } 2160 return true; 2161 2162 case Op_PopCountVI: 2163 case Op_PopCountVL: 2164 if (!is_pop_count_instr_target(bt)) { 2165 return false; 2166 } 2167 return true; 2168 2169 case Op_MaskAll: 2170 return true; 2171 2172 case Op_CountLeadingZerosV: 2173 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2174 return true; 2175 } 2176 default: 2177 return false; 2178 } 2179 } 2180 2181 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2182 return false; 2183 } 2184 2185 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2186 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2187 bool legacy = (generic_opnd->opcode() == LEGVEC); 2188 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2189 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2190 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2191 return new legVecZOper(); 2192 } 2193 if (legacy) { 2194 switch (ideal_reg) { 2195 case Op_VecS: return new legVecSOper(); 2196 case Op_VecD: return new legVecDOper(); 2197 case Op_VecX: return new legVecXOper(); 2198 case Op_VecY: return new legVecYOper(); 2199 case Op_VecZ: return new legVecZOper(); 2200 } 2201 } else { 2202 switch (ideal_reg) { 2203 case Op_VecS: return new vecSOper(); 2204 case Op_VecD: return new vecDOper(); 2205 case Op_VecX: return new vecXOper(); 2206 case Op_VecY: return new vecYOper(); 2207 case Op_VecZ: return new vecZOper(); 2208 } 2209 } 2210 ShouldNotReachHere(); 2211 return nullptr; 2212 } 2213 2214 bool Matcher::is_reg2reg_move(MachNode* m) { 2215 switch (m->rule()) { 2216 case MoveVec2Leg_rule: 2217 case MoveLeg2Vec_rule: 2218 case MoveF2VL_rule: 2219 case MoveF2LEG_rule: 2220 case MoveVL2F_rule: 2221 case MoveLEG2F_rule: 2222 case MoveD2VL_rule: 2223 case MoveD2LEG_rule: 2224 case MoveVL2D_rule: 2225 case MoveLEG2D_rule: 2226 return true; 2227 default: 2228 return false; 2229 } 2230 } 2231 2232 bool Matcher::is_generic_vector(MachOper* opnd) { 2233 switch (opnd->opcode()) { 2234 case VEC: 2235 case LEGVEC: 2236 return true; 2237 default: 2238 return false; 2239 } 2240 } 2241 2242 //------------------------------------------------------------------------ 2243 2244 const RegMask* Matcher::predicate_reg_mask(void) { 2245 return &_VECTMASK_REG_mask; 2246 } 2247 2248 // Max vector size in bytes. 0 if not supported. 2249 int Matcher::vector_width_in_bytes(BasicType bt) { 2250 assert(is_java_primitive(bt), "only primitive type vectors"); 2251 if (UseSSE < 2) return 0; 2252 // SSE2 supports 128bit vectors for all types. 2253 // AVX2 supports 256bit vectors for all types. 2254 // AVX2/EVEX supports 512bit vectors for all types. 2255 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2256 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2257 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2258 size = (UseAVX > 2) ? 64 : 32; 2259 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2260 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2261 // Use flag to limit vector size. 2262 size = MIN2(size,(int)MaxVectorSize); 2263 // Minimum 2 values in vector (or 4 for bytes). 2264 switch (bt) { 2265 case T_DOUBLE: 2266 case T_LONG: 2267 if (size < 16) return 0; 2268 break; 2269 case T_FLOAT: 2270 case T_INT: 2271 if (size < 8) return 0; 2272 break; 2273 case T_BOOLEAN: 2274 if (size < 4) return 0; 2275 break; 2276 case T_CHAR: 2277 if (size < 4) return 0; 2278 break; 2279 case T_BYTE: 2280 if (size < 4) return 0; 2281 break; 2282 case T_SHORT: 2283 if (size < 4) return 0; 2284 break; 2285 default: 2286 ShouldNotReachHere(); 2287 } 2288 return size; 2289 } 2290 2291 // Limits on vector size (number of elements) loaded into vector. 2292 int Matcher::max_vector_size(const BasicType bt) { 2293 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2294 } 2295 int Matcher::min_vector_size(const BasicType bt) { 2296 int max_size = max_vector_size(bt); 2297 // Min size which can be loaded into vector is 4 bytes. 2298 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2299 // Support for calling svml double64 vectors 2300 if (bt == T_DOUBLE) { 2301 size = 1; 2302 } 2303 return MIN2(size,max_size); 2304 } 2305 2306 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) { 2307 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2308 // by default on Cascade Lake 2309 if (VM_Version::is_default_intel_cascade_lake()) { 2310 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2311 } 2312 return Matcher::max_vector_size(bt); 2313 } 2314 2315 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2316 return -1; 2317 } 2318 2319 // Vector ideal reg corresponding to specified size in bytes 2320 uint Matcher::vector_ideal_reg(int size) { 2321 assert(MaxVectorSize >= size, ""); 2322 switch(size) { 2323 case 4: return Op_VecS; 2324 case 8: return Op_VecD; 2325 case 16: return Op_VecX; 2326 case 32: return Op_VecY; 2327 case 64: return Op_VecZ; 2328 } 2329 ShouldNotReachHere(); 2330 return 0; 2331 } 2332 2333 // Check for shift by small constant as well 2334 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2335 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2336 shift->in(2)->get_int() <= 3 && 2337 // Are there other uses besides address expressions? 2338 !matcher->is_visited(shift)) { 2339 address_visited.set(shift->_idx); // Flag as address_visited 2340 mstack.push(shift->in(2), Matcher::Visit); 2341 Node *conv = shift->in(1); 2342 #ifdef _LP64 2343 // Allow Matcher to match the rule which bypass 2344 // ConvI2L operation for an array index on LP64 2345 // if the index value is positive. 2346 if (conv->Opcode() == Op_ConvI2L && 2347 conv->as_Type()->type()->is_long()->_lo >= 0 && 2348 // Are there other uses besides address expressions? 2349 !matcher->is_visited(conv)) { 2350 address_visited.set(conv->_idx); // Flag as address_visited 2351 mstack.push(conv->in(1), Matcher::Pre_Visit); 2352 } else 2353 #endif 2354 mstack.push(conv, Matcher::Pre_Visit); 2355 return true; 2356 } 2357 return false; 2358 } 2359 2360 // This function identifies sub-graphs in which a 'load' node is 2361 // input to two different nodes, and such that it can be matched 2362 // with BMI instructions like blsi, blsr, etc. 2363 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2364 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2365 // refers to the same node. 2366 // 2367 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2368 // This is a temporary solution until we make DAGs expressible in ADL. 2369 template<typename ConType> 2370 class FusedPatternMatcher { 2371 Node* _op1_node; 2372 Node* _mop_node; 2373 int _con_op; 2374 2375 static int match_next(Node* n, int next_op, int next_op_idx) { 2376 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2377 return -1; 2378 } 2379 2380 if (next_op_idx == -1) { // n is commutative, try rotations 2381 if (n->in(1)->Opcode() == next_op) { 2382 return 1; 2383 } else if (n->in(2)->Opcode() == next_op) { 2384 return 2; 2385 } 2386 } else { 2387 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2388 if (n->in(next_op_idx)->Opcode() == next_op) { 2389 return next_op_idx; 2390 } 2391 } 2392 return -1; 2393 } 2394 2395 public: 2396 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2397 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2398 2399 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2400 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2401 typename ConType::NativeType con_value) { 2402 if (_op1_node->Opcode() != op1) { 2403 return false; 2404 } 2405 if (_mop_node->outcnt() > 2) { 2406 return false; 2407 } 2408 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2409 if (op1_op2_idx == -1) { 2410 return false; 2411 } 2412 // Memory operation must be the other edge 2413 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2414 2415 // Check that the mop node is really what we want 2416 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2417 Node* op2_node = _op1_node->in(op1_op2_idx); 2418 if (op2_node->outcnt() > 1) { 2419 return false; 2420 } 2421 assert(op2_node->Opcode() == op2, "Should be"); 2422 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2423 if (op2_con_idx == -1) { 2424 return false; 2425 } 2426 // Memory operation must be the other edge 2427 int op2_mop_idx = (op2_con_idx & 1) + 1; 2428 // Check that the memory operation is the same node 2429 if (op2_node->in(op2_mop_idx) == _mop_node) { 2430 // Now check the constant 2431 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2432 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2433 return true; 2434 } 2435 } 2436 } 2437 return false; 2438 } 2439 }; 2440 2441 static bool is_bmi_pattern(Node* n, Node* m) { 2442 assert(UseBMI1Instructions, "sanity"); 2443 if (n != nullptr && m != nullptr) { 2444 if (m->Opcode() == Op_LoadI) { 2445 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2446 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2447 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2448 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2449 } else if (m->Opcode() == Op_LoadL) { 2450 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2451 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2452 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2453 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2454 } 2455 } 2456 return false; 2457 } 2458 2459 // Should the matcher clone input 'm' of node 'n'? 2460 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2461 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2462 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2463 mstack.push(m, Visit); 2464 return true; 2465 } 2466 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2467 mstack.push(m, Visit); // m = ShiftCntV 2468 return true; 2469 } 2470 if (is_encode_and_store_pattern(n, m)) { 2471 mstack.push(m, Visit); 2472 return true; 2473 } 2474 return false; 2475 } 2476 2477 // Should the Matcher clone shifts on addressing modes, expecting them 2478 // to be subsumed into complex addressing expressions or compute them 2479 // into registers? 2480 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2481 Node *off = m->in(AddPNode::Offset); 2482 if (off->is_Con()) { 2483 address_visited.test_set(m->_idx); // Flag as address_visited 2484 Node *adr = m->in(AddPNode::Address); 2485 2486 // Intel can handle 2 adds in addressing mode 2487 // AtomicAdd is not an addressing expression. 2488 // Cheap to find it by looking for screwy base. 2489 if (adr->is_AddP() && 2490 !adr->in(AddPNode::Base)->is_top() && 2491 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2492 // Are there other uses besides address expressions? 2493 !is_visited(adr)) { 2494 address_visited.set(adr->_idx); // Flag as address_visited 2495 Node *shift = adr->in(AddPNode::Offset); 2496 if (!clone_shift(shift, this, mstack, address_visited)) { 2497 mstack.push(shift, Pre_Visit); 2498 } 2499 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2500 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2501 } else { 2502 mstack.push(adr, Pre_Visit); 2503 } 2504 2505 // Clone X+offset as it also folds into most addressing expressions 2506 mstack.push(off, Visit); 2507 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2508 return true; 2509 } else if (clone_shift(off, this, mstack, address_visited)) { 2510 address_visited.test_set(m->_idx); // Flag as address_visited 2511 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2512 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2513 return true; 2514 } 2515 return false; 2516 } 2517 2518 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2519 switch (bt) { 2520 case BoolTest::eq: 2521 return Assembler::eq; 2522 case BoolTest::ne: 2523 return Assembler::neq; 2524 case BoolTest::le: 2525 case BoolTest::ule: 2526 return Assembler::le; 2527 case BoolTest::ge: 2528 case BoolTest::uge: 2529 return Assembler::nlt; 2530 case BoolTest::lt: 2531 case BoolTest::ult: 2532 return Assembler::lt; 2533 case BoolTest::gt: 2534 case BoolTest::ugt: 2535 return Assembler::nle; 2536 default : ShouldNotReachHere(); return Assembler::_false; 2537 } 2538 } 2539 2540 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2541 switch (bt) { 2542 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2543 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2544 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2545 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2546 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2547 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2548 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2549 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2550 } 2551 } 2552 2553 // Helper methods for MachSpillCopyNode::implementation(). 2554 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 2555 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2556 assert(ireg == Op_VecS || // 32bit vector 2557 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2558 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi), 2559 "no non-adjacent vector moves" ); 2560 if (masm) { 2561 switch (ireg) { 2562 case Op_VecS: // copy whole register 2563 case Op_VecD: 2564 case Op_VecX: 2565 #ifndef _LP64 2566 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2567 #else 2568 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2569 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2570 } else { 2571 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2572 } 2573 #endif 2574 break; 2575 case Op_VecY: 2576 #ifndef _LP64 2577 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2578 #else 2579 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2580 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2581 } else { 2582 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2583 } 2584 #endif 2585 break; 2586 case Op_VecZ: 2587 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2588 break; 2589 default: 2590 ShouldNotReachHere(); 2591 } 2592 #ifndef PRODUCT 2593 } else { 2594 switch (ireg) { 2595 case Op_VecS: 2596 case Op_VecD: 2597 case Op_VecX: 2598 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2599 break; 2600 case Op_VecY: 2601 case Op_VecZ: 2602 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2603 break; 2604 default: 2605 ShouldNotReachHere(); 2606 } 2607 #endif 2608 } 2609 } 2610 2611 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 2612 int stack_offset, int reg, uint ireg, outputStream* st) { 2613 if (masm) { 2614 if (is_load) { 2615 switch (ireg) { 2616 case Op_VecS: 2617 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2618 break; 2619 case Op_VecD: 2620 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2621 break; 2622 case Op_VecX: 2623 #ifndef _LP64 2624 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2625 #else 2626 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2627 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2628 } else { 2629 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2630 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2631 } 2632 #endif 2633 break; 2634 case Op_VecY: 2635 #ifndef _LP64 2636 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2637 #else 2638 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2639 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2640 } else { 2641 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2642 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2643 } 2644 #endif 2645 break; 2646 case Op_VecZ: 2647 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2648 break; 2649 default: 2650 ShouldNotReachHere(); 2651 } 2652 } else { // store 2653 switch (ireg) { 2654 case Op_VecS: 2655 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2656 break; 2657 case Op_VecD: 2658 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2659 break; 2660 case Op_VecX: 2661 #ifndef _LP64 2662 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2663 #else 2664 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2665 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2666 } 2667 else { 2668 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2669 } 2670 #endif 2671 break; 2672 case Op_VecY: 2673 #ifndef _LP64 2674 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2675 #else 2676 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2677 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2678 } 2679 else { 2680 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2681 } 2682 #endif 2683 break; 2684 case Op_VecZ: 2685 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2686 break; 2687 default: 2688 ShouldNotReachHere(); 2689 } 2690 } 2691 #ifndef PRODUCT 2692 } else { 2693 if (is_load) { 2694 switch (ireg) { 2695 case Op_VecS: 2696 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2697 break; 2698 case Op_VecD: 2699 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2700 break; 2701 case Op_VecX: 2702 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2703 break; 2704 case Op_VecY: 2705 case Op_VecZ: 2706 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2707 break; 2708 default: 2709 ShouldNotReachHere(); 2710 } 2711 } else { // store 2712 switch (ireg) { 2713 case Op_VecS: 2714 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2715 break; 2716 case Op_VecD: 2717 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2718 break; 2719 case Op_VecX: 2720 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2721 break; 2722 case Op_VecY: 2723 case Op_VecZ: 2724 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2725 break; 2726 default: 2727 ShouldNotReachHere(); 2728 } 2729 } 2730 #endif 2731 } 2732 } 2733 2734 template <class T> 2735 static inline GrowableArray<jvalue>* vreplicate_imm(BasicType bt, T con, int len) { 2736 GrowableArray<jvalue>* val = new GrowableArray<jvalue>(len); 2737 jvalue ele; 2738 switch (bt) { 2739 case T_BYTE: ele.b = con; break; 2740 case T_SHORT: ele.s = con; break; 2741 case T_INT: ele.i = con; break; 2742 case T_LONG: ele.j = con; break; 2743 case T_FLOAT: ele.f = con; break; 2744 case T_DOUBLE: ele.d = con; break; 2745 default: ShouldNotReachHere(); 2746 } 2747 for (int i = 0; i < len; i++) { 2748 val->append(ele); 2749 } 2750 return val; 2751 } 2752 2753 static inline jlong high_bit_set(BasicType bt) { 2754 switch (bt) { 2755 case T_BYTE: return 0x8080808080808080; 2756 case T_SHORT: return 0x8000800080008000; 2757 case T_INT: return 0x8000000080000000; 2758 case T_LONG: return 0x8000000000000000; 2759 default: 2760 ShouldNotReachHere(); 2761 return 0; 2762 } 2763 } 2764 2765 #ifndef PRODUCT 2766 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2767 st->print("nop \t# %d bytes pad for loops and calls", _count); 2768 } 2769 #endif 2770 2771 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const { 2772 __ nop(_count); 2773 } 2774 2775 uint MachNopNode::size(PhaseRegAlloc*) const { 2776 return _count; 2777 } 2778 2779 #ifndef PRODUCT 2780 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2781 st->print("# breakpoint"); 2782 } 2783 #endif 2784 2785 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const { 2786 __ int3(); 2787 } 2788 2789 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2790 return MachNode::size(ra_); 2791 } 2792 2793 %} 2794 2795 encode %{ 2796 2797 enc_class call_epilog %{ 2798 if (VerifyStackAtCalls) { 2799 // Check that stack depth is unchanged: find majik cookie on stack 2800 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2801 Label L; 2802 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2803 __ jccb(Assembler::equal, L); 2804 // Die if stack mismatch 2805 __ int3(); 2806 __ bind(L); 2807 } 2808 %} 2809 2810 %} 2811 2812 // Operands for bound floating pointer register arguments 2813 operand rxmm0() %{ 2814 constraint(ALLOC_IN_RC(xmm0_reg)); 2815 match(VecX); 2816 format%{%} 2817 interface(REG_INTER); 2818 %} 2819 2820 //----------OPERANDS----------------------------------------------------------- 2821 // Operand definitions must precede instruction definitions for correct parsing 2822 // in the ADLC because operands constitute user defined types which are used in 2823 // instruction definitions. 2824 2825 // Vectors 2826 2827 // Dummy generic vector class. Should be used for all vector operands. 2828 // Replaced with vec[SDXYZ] during post-selection pass. 2829 operand vec() %{ 2830 constraint(ALLOC_IN_RC(dynamic)); 2831 match(VecX); 2832 match(VecY); 2833 match(VecZ); 2834 match(VecS); 2835 match(VecD); 2836 2837 format %{ %} 2838 interface(REG_INTER); 2839 %} 2840 2841 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2842 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2843 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2844 // runtime code generation via reg_class_dynamic. 2845 operand legVec() %{ 2846 constraint(ALLOC_IN_RC(dynamic)); 2847 match(VecX); 2848 match(VecY); 2849 match(VecZ); 2850 match(VecS); 2851 match(VecD); 2852 2853 format %{ %} 2854 interface(REG_INTER); 2855 %} 2856 2857 // Replaces vec during post-selection cleanup. See above. 2858 operand vecS() %{ 2859 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2860 match(VecS); 2861 2862 format %{ %} 2863 interface(REG_INTER); 2864 %} 2865 2866 // Replaces legVec during post-selection cleanup. See above. 2867 operand legVecS() %{ 2868 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2869 match(VecS); 2870 2871 format %{ %} 2872 interface(REG_INTER); 2873 %} 2874 2875 // Replaces vec during post-selection cleanup. See above. 2876 operand vecD() %{ 2877 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2878 match(VecD); 2879 2880 format %{ %} 2881 interface(REG_INTER); 2882 %} 2883 2884 // Replaces legVec during post-selection cleanup. See above. 2885 operand legVecD() %{ 2886 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2887 match(VecD); 2888 2889 format %{ %} 2890 interface(REG_INTER); 2891 %} 2892 2893 // Replaces vec during post-selection cleanup. See above. 2894 operand vecX() %{ 2895 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2896 match(VecX); 2897 2898 format %{ %} 2899 interface(REG_INTER); 2900 %} 2901 2902 // Replaces legVec during post-selection cleanup. See above. 2903 operand legVecX() %{ 2904 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2905 match(VecX); 2906 2907 format %{ %} 2908 interface(REG_INTER); 2909 %} 2910 2911 // Replaces vec during post-selection cleanup. See above. 2912 operand vecY() %{ 2913 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2914 match(VecY); 2915 2916 format %{ %} 2917 interface(REG_INTER); 2918 %} 2919 2920 // Replaces legVec during post-selection cleanup. See above. 2921 operand legVecY() %{ 2922 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2923 match(VecY); 2924 2925 format %{ %} 2926 interface(REG_INTER); 2927 %} 2928 2929 // Replaces vec during post-selection cleanup. See above. 2930 operand vecZ() %{ 2931 constraint(ALLOC_IN_RC(vectorz_reg)); 2932 match(VecZ); 2933 2934 format %{ %} 2935 interface(REG_INTER); 2936 %} 2937 2938 // Replaces legVec during post-selection cleanup. See above. 2939 operand legVecZ() %{ 2940 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2941 match(VecZ); 2942 2943 format %{ %} 2944 interface(REG_INTER); 2945 %} 2946 2947 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2948 2949 // ============================================================================ 2950 2951 instruct ShouldNotReachHere() %{ 2952 match(Halt); 2953 format %{ "stop\t# ShouldNotReachHere" %} 2954 ins_encode %{ 2955 if (is_reachable()) { 2956 __ stop(_halt_reason); 2957 } 2958 %} 2959 ins_pipe(pipe_slow); 2960 %} 2961 2962 // ============================================================================ 2963 2964 instruct addF_reg(regF dst, regF src) %{ 2965 predicate((UseSSE>=1) && (UseAVX == 0)); 2966 match(Set dst (AddF dst src)); 2967 2968 format %{ "addss $dst, $src" %} 2969 ins_cost(150); 2970 ins_encode %{ 2971 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2972 %} 2973 ins_pipe(pipe_slow); 2974 %} 2975 2976 instruct addF_mem(regF dst, memory src) %{ 2977 predicate((UseSSE>=1) && (UseAVX == 0)); 2978 match(Set dst (AddF dst (LoadF src))); 2979 2980 format %{ "addss $dst, $src" %} 2981 ins_cost(150); 2982 ins_encode %{ 2983 __ addss($dst$$XMMRegister, $src$$Address); 2984 %} 2985 ins_pipe(pipe_slow); 2986 %} 2987 2988 instruct addF_imm(regF dst, immF con) %{ 2989 predicate((UseSSE>=1) && (UseAVX == 0)); 2990 match(Set dst (AddF dst con)); 2991 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2992 ins_cost(150); 2993 ins_encode %{ 2994 __ addss($dst$$XMMRegister, $constantaddress($con)); 2995 %} 2996 ins_pipe(pipe_slow); 2997 %} 2998 2999 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 3000 predicate(UseAVX > 0); 3001 match(Set dst (AddF src1 src2)); 3002 3003 format %{ "vaddss $dst, $src1, $src2" %} 3004 ins_cost(150); 3005 ins_encode %{ 3006 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3007 %} 3008 ins_pipe(pipe_slow); 3009 %} 3010 3011 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 3012 predicate(UseAVX > 0); 3013 match(Set dst (AddF src1 (LoadF src2))); 3014 3015 format %{ "vaddss $dst, $src1, $src2" %} 3016 ins_cost(150); 3017 ins_encode %{ 3018 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3019 %} 3020 ins_pipe(pipe_slow); 3021 %} 3022 3023 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 3024 predicate(UseAVX > 0); 3025 match(Set dst (AddF src con)); 3026 3027 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3028 ins_cost(150); 3029 ins_encode %{ 3030 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3031 %} 3032 ins_pipe(pipe_slow); 3033 %} 3034 3035 instruct addD_reg(regD dst, regD src) %{ 3036 predicate((UseSSE>=2) && (UseAVX == 0)); 3037 match(Set dst (AddD dst src)); 3038 3039 format %{ "addsd $dst, $src" %} 3040 ins_cost(150); 3041 ins_encode %{ 3042 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 3043 %} 3044 ins_pipe(pipe_slow); 3045 %} 3046 3047 instruct addD_mem(regD dst, memory src) %{ 3048 predicate((UseSSE>=2) && (UseAVX == 0)); 3049 match(Set dst (AddD dst (LoadD src))); 3050 3051 format %{ "addsd $dst, $src" %} 3052 ins_cost(150); 3053 ins_encode %{ 3054 __ addsd($dst$$XMMRegister, $src$$Address); 3055 %} 3056 ins_pipe(pipe_slow); 3057 %} 3058 3059 instruct addD_imm(regD dst, immD con) %{ 3060 predicate((UseSSE>=2) && (UseAVX == 0)); 3061 match(Set dst (AddD dst con)); 3062 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3063 ins_cost(150); 3064 ins_encode %{ 3065 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3066 %} 3067 ins_pipe(pipe_slow); 3068 %} 3069 3070 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3071 predicate(UseAVX > 0); 3072 match(Set dst (AddD src1 src2)); 3073 3074 format %{ "vaddsd $dst, $src1, $src2" %} 3075 ins_cost(150); 3076 ins_encode %{ 3077 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3078 %} 3079 ins_pipe(pipe_slow); 3080 %} 3081 3082 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3083 predicate(UseAVX > 0); 3084 match(Set dst (AddD src1 (LoadD src2))); 3085 3086 format %{ "vaddsd $dst, $src1, $src2" %} 3087 ins_cost(150); 3088 ins_encode %{ 3089 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3090 %} 3091 ins_pipe(pipe_slow); 3092 %} 3093 3094 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3095 predicate(UseAVX > 0); 3096 match(Set dst (AddD src con)); 3097 3098 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3099 ins_cost(150); 3100 ins_encode %{ 3101 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3102 %} 3103 ins_pipe(pipe_slow); 3104 %} 3105 3106 instruct subF_reg(regF dst, regF src) %{ 3107 predicate((UseSSE>=1) && (UseAVX == 0)); 3108 match(Set dst (SubF dst src)); 3109 3110 format %{ "subss $dst, $src" %} 3111 ins_cost(150); 3112 ins_encode %{ 3113 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3114 %} 3115 ins_pipe(pipe_slow); 3116 %} 3117 3118 instruct subF_mem(regF dst, memory src) %{ 3119 predicate((UseSSE>=1) && (UseAVX == 0)); 3120 match(Set dst (SubF dst (LoadF src))); 3121 3122 format %{ "subss $dst, $src" %} 3123 ins_cost(150); 3124 ins_encode %{ 3125 __ subss($dst$$XMMRegister, $src$$Address); 3126 %} 3127 ins_pipe(pipe_slow); 3128 %} 3129 3130 instruct subF_imm(regF dst, immF con) %{ 3131 predicate((UseSSE>=1) && (UseAVX == 0)); 3132 match(Set dst (SubF dst con)); 3133 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3134 ins_cost(150); 3135 ins_encode %{ 3136 __ subss($dst$$XMMRegister, $constantaddress($con)); 3137 %} 3138 ins_pipe(pipe_slow); 3139 %} 3140 3141 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3142 predicate(UseAVX > 0); 3143 match(Set dst (SubF src1 src2)); 3144 3145 format %{ "vsubss $dst, $src1, $src2" %} 3146 ins_cost(150); 3147 ins_encode %{ 3148 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3149 %} 3150 ins_pipe(pipe_slow); 3151 %} 3152 3153 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3154 predicate(UseAVX > 0); 3155 match(Set dst (SubF src1 (LoadF src2))); 3156 3157 format %{ "vsubss $dst, $src1, $src2" %} 3158 ins_cost(150); 3159 ins_encode %{ 3160 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3161 %} 3162 ins_pipe(pipe_slow); 3163 %} 3164 3165 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3166 predicate(UseAVX > 0); 3167 match(Set dst (SubF src con)); 3168 3169 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3170 ins_cost(150); 3171 ins_encode %{ 3172 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3173 %} 3174 ins_pipe(pipe_slow); 3175 %} 3176 3177 instruct subD_reg(regD dst, regD src) %{ 3178 predicate((UseSSE>=2) && (UseAVX == 0)); 3179 match(Set dst (SubD dst src)); 3180 3181 format %{ "subsd $dst, $src" %} 3182 ins_cost(150); 3183 ins_encode %{ 3184 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3185 %} 3186 ins_pipe(pipe_slow); 3187 %} 3188 3189 instruct subD_mem(regD dst, memory src) %{ 3190 predicate((UseSSE>=2) && (UseAVX == 0)); 3191 match(Set dst (SubD dst (LoadD src))); 3192 3193 format %{ "subsd $dst, $src" %} 3194 ins_cost(150); 3195 ins_encode %{ 3196 __ subsd($dst$$XMMRegister, $src$$Address); 3197 %} 3198 ins_pipe(pipe_slow); 3199 %} 3200 3201 instruct subD_imm(regD dst, immD con) %{ 3202 predicate((UseSSE>=2) && (UseAVX == 0)); 3203 match(Set dst (SubD dst con)); 3204 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3205 ins_cost(150); 3206 ins_encode %{ 3207 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3208 %} 3209 ins_pipe(pipe_slow); 3210 %} 3211 3212 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3213 predicate(UseAVX > 0); 3214 match(Set dst (SubD src1 src2)); 3215 3216 format %{ "vsubsd $dst, $src1, $src2" %} 3217 ins_cost(150); 3218 ins_encode %{ 3219 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3220 %} 3221 ins_pipe(pipe_slow); 3222 %} 3223 3224 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3225 predicate(UseAVX > 0); 3226 match(Set dst (SubD src1 (LoadD src2))); 3227 3228 format %{ "vsubsd $dst, $src1, $src2" %} 3229 ins_cost(150); 3230 ins_encode %{ 3231 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3232 %} 3233 ins_pipe(pipe_slow); 3234 %} 3235 3236 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3237 predicate(UseAVX > 0); 3238 match(Set dst (SubD src con)); 3239 3240 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3241 ins_cost(150); 3242 ins_encode %{ 3243 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3244 %} 3245 ins_pipe(pipe_slow); 3246 %} 3247 3248 instruct mulF_reg(regF dst, regF src) %{ 3249 predicate((UseSSE>=1) && (UseAVX == 0)); 3250 match(Set dst (MulF dst src)); 3251 3252 format %{ "mulss $dst, $src" %} 3253 ins_cost(150); 3254 ins_encode %{ 3255 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3256 %} 3257 ins_pipe(pipe_slow); 3258 %} 3259 3260 instruct mulF_mem(regF dst, memory src) %{ 3261 predicate((UseSSE>=1) && (UseAVX == 0)); 3262 match(Set dst (MulF dst (LoadF src))); 3263 3264 format %{ "mulss $dst, $src" %} 3265 ins_cost(150); 3266 ins_encode %{ 3267 __ mulss($dst$$XMMRegister, $src$$Address); 3268 %} 3269 ins_pipe(pipe_slow); 3270 %} 3271 3272 instruct mulF_imm(regF dst, immF con) %{ 3273 predicate((UseSSE>=1) && (UseAVX == 0)); 3274 match(Set dst (MulF dst con)); 3275 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3276 ins_cost(150); 3277 ins_encode %{ 3278 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3279 %} 3280 ins_pipe(pipe_slow); 3281 %} 3282 3283 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3284 predicate(UseAVX > 0); 3285 match(Set dst (MulF src1 src2)); 3286 3287 format %{ "vmulss $dst, $src1, $src2" %} 3288 ins_cost(150); 3289 ins_encode %{ 3290 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3291 %} 3292 ins_pipe(pipe_slow); 3293 %} 3294 3295 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3296 predicate(UseAVX > 0); 3297 match(Set dst (MulF src1 (LoadF src2))); 3298 3299 format %{ "vmulss $dst, $src1, $src2" %} 3300 ins_cost(150); 3301 ins_encode %{ 3302 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3303 %} 3304 ins_pipe(pipe_slow); 3305 %} 3306 3307 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3308 predicate(UseAVX > 0); 3309 match(Set dst (MulF src con)); 3310 3311 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3312 ins_cost(150); 3313 ins_encode %{ 3314 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3315 %} 3316 ins_pipe(pipe_slow); 3317 %} 3318 3319 instruct mulD_reg(regD dst, regD src) %{ 3320 predicate((UseSSE>=2) && (UseAVX == 0)); 3321 match(Set dst (MulD dst src)); 3322 3323 format %{ "mulsd $dst, $src" %} 3324 ins_cost(150); 3325 ins_encode %{ 3326 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3327 %} 3328 ins_pipe(pipe_slow); 3329 %} 3330 3331 instruct mulD_mem(regD dst, memory src) %{ 3332 predicate((UseSSE>=2) && (UseAVX == 0)); 3333 match(Set dst (MulD dst (LoadD src))); 3334 3335 format %{ "mulsd $dst, $src" %} 3336 ins_cost(150); 3337 ins_encode %{ 3338 __ mulsd($dst$$XMMRegister, $src$$Address); 3339 %} 3340 ins_pipe(pipe_slow); 3341 %} 3342 3343 instruct mulD_imm(regD dst, immD con) %{ 3344 predicate((UseSSE>=2) && (UseAVX == 0)); 3345 match(Set dst (MulD dst con)); 3346 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3347 ins_cost(150); 3348 ins_encode %{ 3349 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3350 %} 3351 ins_pipe(pipe_slow); 3352 %} 3353 3354 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3355 predicate(UseAVX > 0); 3356 match(Set dst (MulD src1 src2)); 3357 3358 format %{ "vmulsd $dst, $src1, $src2" %} 3359 ins_cost(150); 3360 ins_encode %{ 3361 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3362 %} 3363 ins_pipe(pipe_slow); 3364 %} 3365 3366 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3367 predicate(UseAVX > 0); 3368 match(Set dst (MulD src1 (LoadD src2))); 3369 3370 format %{ "vmulsd $dst, $src1, $src2" %} 3371 ins_cost(150); 3372 ins_encode %{ 3373 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3374 %} 3375 ins_pipe(pipe_slow); 3376 %} 3377 3378 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3379 predicate(UseAVX > 0); 3380 match(Set dst (MulD src con)); 3381 3382 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3383 ins_cost(150); 3384 ins_encode %{ 3385 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3386 %} 3387 ins_pipe(pipe_slow); 3388 %} 3389 3390 instruct divF_reg(regF dst, regF src) %{ 3391 predicate((UseSSE>=1) && (UseAVX == 0)); 3392 match(Set dst (DivF dst src)); 3393 3394 format %{ "divss $dst, $src" %} 3395 ins_cost(150); 3396 ins_encode %{ 3397 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3398 %} 3399 ins_pipe(pipe_slow); 3400 %} 3401 3402 instruct divF_mem(regF dst, memory src) %{ 3403 predicate((UseSSE>=1) && (UseAVX == 0)); 3404 match(Set dst (DivF dst (LoadF src))); 3405 3406 format %{ "divss $dst, $src" %} 3407 ins_cost(150); 3408 ins_encode %{ 3409 __ divss($dst$$XMMRegister, $src$$Address); 3410 %} 3411 ins_pipe(pipe_slow); 3412 %} 3413 3414 instruct divF_imm(regF dst, immF con) %{ 3415 predicate((UseSSE>=1) && (UseAVX == 0)); 3416 match(Set dst (DivF dst con)); 3417 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3418 ins_cost(150); 3419 ins_encode %{ 3420 __ divss($dst$$XMMRegister, $constantaddress($con)); 3421 %} 3422 ins_pipe(pipe_slow); 3423 %} 3424 3425 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3426 predicate(UseAVX > 0); 3427 match(Set dst (DivF src1 src2)); 3428 3429 format %{ "vdivss $dst, $src1, $src2" %} 3430 ins_cost(150); 3431 ins_encode %{ 3432 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3433 %} 3434 ins_pipe(pipe_slow); 3435 %} 3436 3437 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3438 predicate(UseAVX > 0); 3439 match(Set dst (DivF src1 (LoadF src2))); 3440 3441 format %{ "vdivss $dst, $src1, $src2" %} 3442 ins_cost(150); 3443 ins_encode %{ 3444 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3445 %} 3446 ins_pipe(pipe_slow); 3447 %} 3448 3449 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3450 predicate(UseAVX > 0); 3451 match(Set dst (DivF src con)); 3452 3453 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3454 ins_cost(150); 3455 ins_encode %{ 3456 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3457 %} 3458 ins_pipe(pipe_slow); 3459 %} 3460 3461 instruct divD_reg(regD dst, regD src) %{ 3462 predicate((UseSSE>=2) && (UseAVX == 0)); 3463 match(Set dst (DivD dst src)); 3464 3465 format %{ "divsd $dst, $src" %} 3466 ins_cost(150); 3467 ins_encode %{ 3468 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3469 %} 3470 ins_pipe(pipe_slow); 3471 %} 3472 3473 instruct divD_mem(regD dst, memory src) %{ 3474 predicate((UseSSE>=2) && (UseAVX == 0)); 3475 match(Set dst (DivD dst (LoadD src))); 3476 3477 format %{ "divsd $dst, $src" %} 3478 ins_cost(150); 3479 ins_encode %{ 3480 __ divsd($dst$$XMMRegister, $src$$Address); 3481 %} 3482 ins_pipe(pipe_slow); 3483 %} 3484 3485 instruct divD_imm(regD dst, immD con) %{ 3486 predicate((UseSSE>=2) && (UseAVX == 0)); 3487 match(Set dst (DivD dst con)); 3488 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3489 ins_cost(150); 3490 ins_encode %{ 3491 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3492 %} 3493 ins_pipe(pipe_slow); 3494 %} 3495 3496 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3497 predicate(UseAVX > 0); 3498 match(Set dst (DivD src1 src2)); 3499 3500 format %{ "vdivsd $dst, $src1, $src2" %} 3501 ins_cost(150); 3502 ins_encode %{ 3503 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3504 %} 3505 ins_pipe(pipe_slow); 3506 %} 3507 3508 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3509 predicate(UseAVX > 0); 3510 match(Set dst (DivD src1 (LoadD src2))); 3511 3512 format %{ "vdivsd $dst, $src1, $src2" %} 3513 ins_cost(150); 3514 ins_encode %{ 3515 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3516 %} 3517 ins_pipe(pipe_slow); 3518 %} 3519 3520 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3521 predicate(UseAVX > 0); 3522 match(Set dst (DivD src con)); 3523 3524 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3525 ins_cost(150); 3526 ins_encode %{ 3527 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3528 %} 3529 ins_pipe(pipe_slow); 3530 %} 3531 3532 instruct absF_reg(regF dst) %{ 3533 predicate((UseSSE>=1) && (UseAVX == 0)); 3534 match(Set dst (AbsF dst)); 3535 ins_cost(150); 3536 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3537 ins_encode %{ 3538 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3539 %} 3540 ins_pipe(pipe_slow); 3541 %} 3542 3543 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3544 predicate(UseAVX > 0); 3545 match(Set dst (AbsF src)); 3546 ins_cost(150); 3547 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3548 ins_encode %{ 3549 int vlen_enc = Assembler::AVX_128bit; 3550 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3551 ExternalAddress(float_signmask()), vlen_enc); 3552 %} 3553 ins_pipe(pipe_slow); 3554 %} 3555 3556 instruct absD_reg(regD dst) %{ 3557 predicate((UseSSE>=2) && (UseAVX == 0)); 3558 match(Set dst (AbsD dst)); 3559 ins_cost(150); 3560 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3561 "# abs double by sign masking" %} 3562 ins_encode %{ 3563 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3564 %} 3565 ins_pipe(pipe_slow); 3566 %} 3567 3568 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3569 predicate(UseAVX > 0); 3570 match(Set dst (AbsD src)); 3571 ins_cost(150); 3572 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3573 "# abs double by sign masking" %} 3574 ins_encode %{ 3575 int vlen_enc = Assembler::AVX_128bit; 3576 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3577 ExternalAddress(double_signmask()), vlen_enc); 3578 %} 3579 ins_pipe(pipe_slow); 3580 %} 3581 3582 instruct negF_reg(regF dst) %{ 3583 predicate((UseSSE>=1) && (UseAVX == 0)); 3584 match(Set dst (NegF dst)); 3585 ins_cost(150); 3586 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3587 ins_encode %{ 3588 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3589 %} 3590 ins_pipe(pipe_slow); 3591 %} 3592 3593 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3594 predicate(UseAVX > 0); 3595 match(Set dst (NegF src)); 3596 ins_cost(150); 3597 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3598 ins_encode %{ 3599 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3600 ExternalAddress(float_signflip())); 3601 %} 3602 ins_pipe(pipe_slow); 3603 %} 3604 3605 instruct negD_reg(regD dst) %{ 3606 predicate((UseSSE>=2) && (UseAVX == 0)); 3607 match(Set dst (NegD dst)); 3608 ins_cost(150); 3609 format %{ "xorpd $dst, [0x8000000000000000]\t" 3610 "# neg double by sign flipping" %} 3611 ins_encode %{ 3612 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3613 %} 3614 ins_pipe(pipe_slow); 3615 %} 3616 3617 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3618 predicate(UseAVX > 0); 3619 match(Set dst (NegD src)); 3620 ins_cost(150); 3621 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3622 "# neg double by sign flipping" %} 3623 ins_encode %{ 3624 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3625 ExternalAddress(double_signflip())); 3626 %} 3627 ins_pipe(pipe_slow); 3628 %} 3629 3630 // sqrtss instruction needs destination register to be pre initialized for best performance 3631 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3632 instruct sqrtF_reg(regF dst) %{ 3633 predicate(UseSSE>=1); 3634 match(Set dst (SqrtF dst)); 3635 format %{ "sqrtss $dst, $dst" %} 3636 ins_encode %{ 3637 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3638 %} 3639 ins_pipe(pipe_slow); 3640 %} 3641 3642 // sqrtsd instruction needs destination register to be pre initialized for best performance 3643 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3644 instruct sqrtD_reg(regD dst) %{ 3645 predicate(UseSSE>=2); 3646 match(Set dst (SqrtD dst)); 3647 format %{ "sqrtsd $dst, $dst" %} 3648 ins_encode %{ 3649 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3650 %} 3651 ins_pipe(pipe_slow); 3652 %} 3653 3654 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3655 effect(TEMP tmp); 3656 match(Set dst (ConvF2HF src)); 3657 ins_cost(125); 3658 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3659 ins_encode %{ 3660 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3661 %} 3662 ins_pipe( pipe_slow ); 3663 %} 3664 3665 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3666 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3667 effect(TEMP ktmp, TEMP rtmp); 3668 match(Set mem (StoreC mem (ConvF2HF src))); 3669 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3670 ins_encode %{ 3671 __ movl($rtmp$$Register, 0x1); 3672 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3673 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3674 %} 3675 ins_pipe( pipe_slow ); 3676 %} 3677 3678 instruct vconvF2HF(vec dst, vec src) %{ 3679 match(Set dst (VectorCastF2HF src)); 3680 format %{ "vector_conv_F2HF $dst $src" %} 3681 ins_encode %{ 3682 int vlen_enc = vector_length_encoding(this, $src); 3683 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3684 %} 3685 ins_pipe( pipe_slow ); 3686 %} 3687 3688 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3689 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3690 format %{ "vcvtps2ph $mem,$src" %} 3691 ins_encode %{ 3692 int vlen_enc = vector_length_encoding(this, $src); 3693 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3694 %} 3695 ins_pipe( pipe_slow ); 3696 %} 3697 3698 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3699 match(Set dst (ConvHF2F src)); 3700 format %{ "vcvtph2ps $dst,$src" %} 3701 ins_encode %{ 3702 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3703 %} 3704 ins_pipe( pipe_slow ); 3705 %} 3706 3707 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3708 match(Set dst (VectorCastHF2F (LoadVector mem))); 3709 format %{ "vcvtph2ps $dst,$mem" %} 3710 ins_encode %{ 3711 int vlen_enc = vector_length_encoding(this); 3712 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3713 %} 3714 ins_pipe( pipe_slow ); 3715 %} 3716 3717 instruct vconvHF2F(vec dst, vec src) %{ 3718 match(Set dst (VectorCastHF2F src)); 3719 ins_cost(125); 3720 format %{ "vector_conv_HF2F $dst,$src" %} 3721 ins_encode %{ 3722 int vlen_enc = vector_length_encoding(this); 3723 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3724 %} 3725 ins_pipe( pipe_slow ); 3726 %} 3727 3728 // ---------------------------------------- VectorReinterpret ------------------------------------ 3729 instruct reinterpret_mask(kReg dst) %{ 3730 predicate(n->bottom_type()->isa_vectmask() && 3731 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3732 match(Set dst (VectorReinterpret dst)); 3733 ins_cost(125); 3734 format %{ "vector_reinterpret $dst\t!" %} 3735 ins_encode %{ 3736 // empty 3737 %} 3738 ins_pipe( pipe_slow ); 3739 %} 3740 3741 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3742 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3743 n->bottom_type()->isa_vectmask() && 3744 n->in(1)->bottom_type()->isa_vectmask() && 3745 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3746 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3747 match(Set dst (VectorReinterpret src)); 3748 effect(TEMP xtmp); 3749 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3750 ins_encode %{ 3751 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3752 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3753 assert(src_sz == dst_sz , "src and dst size mismatch"); 3754 int vlen_enc = vector_length_encoding(src_sz); 3755 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3756 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3757 %} 3758 ins_pipe( pipe_slow ); 3759 %} 3760 3761 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3762 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3763 n->bottom_type()->isa_vectmask() && 3764 n->in(1)->bottom_type()->isa_vectmask() && 3765 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3766 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3767 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3768 match(Set dst (VectorReinterpret src)); 3769 effect(TEMP xtmp); 3770 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3771 ins_encode %{ 3772 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3773 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3774 assert(src_sz == dst_sz , "src and dst size mismatch"); 3775 int vlen_enc = vector_length_encoding(src_sz); 3776 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3777 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3778 %} 3779 ins_pipe( pipe_slow ); 3780 %} 3781 3782 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3783 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3784 n->bottom_type()->isa_vectmask() && 3785 n->in(1)->bottom_type()->isa_vectmask() && 3786 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3787 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3788 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3789 match(Set dst (VectorReinterpret src)); 3790 effect(TEMP xtmp); 3791 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3792 ins_encode %{ 3793 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3794 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3795 assert(src_sz == dst_sz , "src and dst size mismatch"); 3796 int vlen_enc = vector_length_encoding(src_sz); 3797 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3798 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3799 %} 3800 ins_pipe( pipe_slow ); 3801 %} 3802 3803 instruct reinterpret(vec dst) %{ 3804 predicate(!n->bottom_type()->isa_vectmask() && 3805 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3806 match(Set dst (VectorReinterpret dst)); 3807 ins_cost(125); 3808 format %{ "vector_reinterpret $dst\t!" %} 3809 ins_encode %{ 3810 // empty 3811 %} 3812 ins_pipe( pipe_slow ); 3813 %} 3814 3815 instruct reinterpret_expand(vec dst, vec src) %{ 3816 predicate(UseAVX == 0 && 3817 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3818 match(Set dst (VectorReinterpret src)); 3819 ins_cost(125); 3820 effect(TEMP dst); 3821 format %{ "vector_reinterpret_expand $dst,$src" %} 3822 ins_encode %{ 3823 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3824 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3825 3826 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3827 if (src_vlen_in_bytes == 4) { 3828 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3829 } else { 3830 assert(src_vlen_in_bytes == 8, ""); 3831 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3832 } 3833 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3834 %} 3835 ins_pipe( pipe_slow ); 3836 %} 3837 3838 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3839 predicate(UseAVX > 0 && 3840 !n->bottom_type()->isa_vectmask() && 3841 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3842 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3843 match(Set dst (VectorReinterpret src)); 3844 ins_cost(125); 3845 format %{ "vector_reinterpret_expand $dst,$src" %} 3846 ins_encode %{ 3847 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3848 %} 3849 ins_pipe( pipe_slow ); 3850 %} 3851 3852 3853 instruct vreinterpret_expand(legVec dst, vec src) %{ 3854 predicate(UseAVX > 0 && 3855 !n->bottom_type()->isa_vectmask() && 3856 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3857 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3858 match(Set dst (VectorReinterpret src)); 3859 ins_cost(125); 3860 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3861 ins_encode %{ 3862 switch (Matcher::vector_length_in_bytes(this, $src)) { 3863 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3864 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3865 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3866 default: ShouldNotReachHere(); 3867 } 3868 %} 3869 ins_pipe( pipe_slow ); 3870 %} 3871 3872 instruct reinterpret_shrink(vec dst, legVec src) %{ 3873 predicate(!n->bottom_type()->isa_vectmask() && 3874 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3875 match(Set dst (VectorReinterpret src)); 3876 ins_cost(125); 3877 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3878 ins_encode %{ 3879 switch (Matcher::vector_length_in_bytes(this)) { 3880 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3881 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3882 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3883 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3884 default: ShouldNotReachHere(); 3885 } 3886 %} 3887 ins_pipe( pipe_slow ); 3888 %} 3889 3890 // ---------------------------------------------------------------------------------------------------- 3891 3892 #ifdef _LP64 3893 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3894 match(Set dst (RoundDoubleMode src rmode)); 3895 format %{ "roundsd $dst,$src" %} 3896 ins_cost(150); 3897 ins_encode %{ 3898 assert(UseSSE >= 4, "required"); 3899 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) { 3900 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3901 } 3902 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3903 %} 3904 ins_pipe(pipe_slow); 3905 %} 3906 3907 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3908 match(Set dst (RoundDoubleMode con rmode)); 3909 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3910 ins_cost(150); 3911 ins_encode %{ 3912 assert(UseSSE >= 4, "required"); 3913 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3914 %} 3915 ins_pipe(pipe_slow); 3916 %} 3917 3918 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3919 predicate(Matcher::vector_length(n) < 8); 3920 match(Set dst (RoundDoubleModeV src rmode)); 3921 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3922 ins_encode %{ 3923 assert(UseAVX > 0, "required"); 3924 int vlen_enc = vector_length_encoding(this); 3925 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3926 %} 3927 ins_pipe( pipe_slow ); 3928 %} 3929 3930 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3931 predicate(Matcher::vector_length(n) == 8); 3932 match(Set dst (RoundDoubleModeV src rmode)); 3933 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3934 ins_encode %{ 3935 assert(UseAVX > 2, "required"); 3936 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3937 %} 3938 ins_pipe( pipe_slow ); 3939 %} 3940 3941 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3942 predicate(Matcher::vector_length(n) < 8); 3943 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3944 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3945 ins_encode %{ 3946 assert(UseAVX > 0, "required"); 3947 int vlen_enc = vector_length_encoding(this); 3948 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3949 %} 3950 ins_pipe( pipe_slow ); 3951 %} 3952 3953 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3954 predicate(Matcher::vector_length(n) == 8); 3955 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3956 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3957 ins_encode %{ 3958 assert(UseAVX > 2, "required"); 3959 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3960 %} 3961 ins_pipe( pipe_slow ); 3962 %} 3963 #endif // _LP64 3964 3965 instruct onspinwait() %{ 3966 match(OnSpinWait); 3967 ins_cost(200); 3968 3969 format %{ 3970 $$template 3971 $$emit$$"pause\t! membar_onspinwait" 3972 %} 3973 ins_encode %{ 3974 __ pause(); 3975 %} 3976 ins_pipe(pipe_slow); 3977 %} 3978 3979 // a * b + c 3980 instruct fmaD_reg(regD a, regD b, regD c) %{ 3981 match(Set c (FmaD c (Binary a b))); 3982 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3983 ins_cost(150); 3984 ins_encode %{ 3985 assert(UseFMA, "Needs FMA instructions support."); 3986 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3987 %} 3988 ins_pipe( pipe_slow ); 3989 %} 3990 3991 // a * b + c 3992 instruct fmaF_reg(regF a, regF b, regF c) %{ 3993 match(Set c (FmaF c (Binary a b))); 3994 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3995 ins_cost(150); 3996 ins_encode %{ 3997 assert(UseFMA, "Needs FMA instructions support."); 3998 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3999 %} 4000 ins_pipe( pipe_slow ); 4001 %} 4002 4003 // ====================VECTOR INSTRUCTIONS===================================== 4004 4005 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4006 instruct MoveVec2Leg(legVec dst, vec src) %{ 4007 match(Set dst src); 4008 format %{ "" %} 4009 ins_encode %{ 4010 ShouldNotReachHere(); 4011 %} 4012 ins_pipe( fpu_reg_reg ); 4013 %} 4014 4015 instruct MoveLeg2Vec(vec dst, legVec src) %{ 4016 match(Set dst src); 4017 format %{ "" %} 4018 ins_encode %{ 4019 ShouldNotReachHere(); 4020 %} 4021 ins_pipe( fpu_reg_reg ); 4022 %} 4023 4024 // ============================================================================ 4025 4026 // Load vectors generic operand pattern 4027 instruct loadV(vec dst, memory mem) %{ 4028 match(Set dst (LoadVector mem)); 4029 ins_cost(125); 4030 format %{ "load_vector $dst,$mem" %} 4031 ins_encode %{ 4032 __ load_vector($dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 4033 %} 4034 ins_pipe( pipe_slow ); 4035 %} 4036 4037 // Store vectors generic operand pattern. 4038 instruct storeV(memory mem, vec src) %{ 4039 match(Set mem (StoreVector mem src)); 4040 ins_cost(145); 4041 format %{ "store_vector $mem,$src\n\t" %} 4042 ins_encode %{ 4043 switch (Matcher::vector_length_in_bytes(this, $src)) { 4044 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 4045 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 4046 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 4047 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 4048 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 4049 default: ShouldNotReachHere(); 4050 } 4051 %} 4052 ins_pipe( pipe_slow ); 4053 %} 4054 4055 // ---------------------------------------- Gather ------------------------------------ 4056 4057 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE 4058 4059 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 4060 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) && 4061 Matcher::vector_length_in_bytes(n) <= 32); 4062 match(Set dst (LoadVectorGather mem idx)); 4063 effect(TEMP dst, TEMP tmp, TEMP mask); 4064 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 4065 ins_encode %{ 4066 int vlen_enc = vector_length_encoding(this); 4067 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4068 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4069 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4070 __ lea($tmp$$Register, $mem$$Address); 4071 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4072 %} 4073 ins_pipe( pipe_slow ); 4074 %} 4075 4076 4077 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 4078 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4079 !is_subword_type(Matcher::vector_element_basic_type(n))); 4080 match(Set dst (LoadVectorGather mem idx)); 4081 effect(TEMP dst, TEMP tmp, TEMP ktmp); 4082 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 4083 ins_encode %{ 4084 int vlen_enc = vector_length_encoding(this); 4085 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4086 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister); 4087 __ lea($tmp$$Register, $mem$$Address); 4088 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4089 %} 4090 ins_pipe( pipe_slow ); 4091 %} 4092 4093 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4094 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4095 !is_subword_type(Matcher::vector_element_basic_type(n))); 4096 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4097 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4098 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4099 ins_encode %{ 4100 assert(UseAVX > 2, "sanity"); 4101 int vlen_enc = vector_length_encoding(this); 4102 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4103 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4104 // Note: Since gather instruction partially updates the opmask register used 4105 // for predication hense moving mask operand to a temporary. 4106 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4107 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4108 __ lea($tmp$$Register, $mem$$Address); 4109 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4110 %} 4111 ins_pipe( pipe_slow ); 4112 %} 4113 4114 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegI rtmp) %{ 4115 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4116 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4117 effect(TEMP tmp, TEMP rtmp); 4118 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %} 4119 ins_encode %{ 4120 int vlen_enc = vector_length_encoding(this); 4121 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4122 __ lea($tmp$$Register, $mem$$Address); 4123 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp$$Register, vlen_enc); 4124 %} 4125 ins_pipe( pipe_slow ); 4126 %} 4127 4128 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegP idx_base_temp, 4129 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4130 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4131 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4132 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4133 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4134 ins_encode %{ 4135 int vlen_enc = vector_length_encoding(this); 4136 int vector_len = Matcher::vector_length(this); 4137 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4138 __ lea($tmp$$Register, $mem$$Address); 4139 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4140 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, noreg, $xtmp1$$XMMRegister, 4141 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4142 %} 4143 ins_pipe( pipe_slow ); 4144 %} 4145 4146 instruct vgather_subwordLE8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegI rtmp, rFlagsReg cr) %{ 4147 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4148 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4149 effect(TEMP tmp, TEMP rtmp, KILL cr); 4150 format %{ "vector_gatherLE8_off $dst, $mem, $idx_base, $offset\t! using $tmp and $rtmp as TEMP" %} 4151 ins_encode %{ 4152 int vlen_enc = vector_length_encoding(this); 4153 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4154 __ lea($tmp$$Register, $mem$$Address); 4155 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, $rtmp$$Register, vlen_enc); 4156 %} 4157 ins_pipe( pipe_slow ); 4158 %} 4159 4160 4161 instruct vgather_subwordGT8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegP idx_base_temp, 4162 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4163 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4164 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4165 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4166 format %{ "vector_gatherGT8_off $dst, $mem, $idx_base, $offset\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4167 ins_encode %{ 4168 int vlen_enc = vector_length_encoding(this); 4169 int vector_len = Matcher::vector_length(this); 4170 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4171 __ lea($tmp$$Register, $mem$$Address); 4172 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4173 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, noreg, $xtmp1$$XMMRegister, 4174 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4175 %} 4176 ins_pipe( pipe_slow ); 4177 %} 4178 4179 4180 #ifdef _LP64 4181 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4182 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4183 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4184 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4185 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4186 ins_encode %{ 4187 int vlen_enc = vector_length_encoding(this); 4188 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4189 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4190 __ lea($tmp$$Register, $mem$$Address); 4191 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4192 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4193 %} 4194 ins_pipe( pipe_slow ); 4195 %} 4196 4197 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4198 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4199 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4200 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4201 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4202 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4203 ins_encode %{ 4204 int vlen_enc = vector_length_encoding(this); 4205 int vector_len = Matcher::vector_length(this); 4206 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4207 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4208 __ lea($tmp$$Register, $mem$$Address); 4209 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4210 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4211 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4212 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4213 %} 4214 ins_pipe( pipe_slow ); 4215 %} 4216 4217 instruct vgather_masked_subwordLE8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4218 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4219 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4220 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4221 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4222 ins_encode %{ 4223 int vlen_enc = vector_length_encoding(this); 4224 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4225 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4226 __ lea($tmp$$Register, $mem$$Address); 4227 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4228 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4229 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4230 %} 4231 ins_pipe( pipe_slow ); 4232 %} 4233 4234 instruct vgather_masked_subwordGT8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4235 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4236 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4237 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4238 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4239 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4240 ins_encode %{ 4241 int vlen_enc = vector_length_encoding(this); 4242 int vector_len = Matcher::vector_length(this); 4243 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4244 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4245 __ lea($tmp$$Register, $mem$$Address); 4246 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4247 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4248 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4249 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4250 %} 4251 ins_pipe( pipe_slow ); 4252 %} 4253 4254 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4255 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4256 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4257 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4258 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4259 ins_encode %{ 4260 int vlen_enc = vector_length_encoding(this); 4261 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4262 __ lea($tmp$$Register, $mem$$Address); 4263 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4264 if (elem_bt == T_SHORT) { 4265 __ movl($mask_idx$$Register, 0x55555555); 4266 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4267 } 4268 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4269 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4270 %} 4271 ins_pipe( pipe_slow ); 4272 %} 4273 4274 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4275 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4276 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4277 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4278 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4279 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4280 ins_encode %{ 4281 int vlen_enc = vector_length_encoding(this); 4282 int vector_len = Matcher::vector_length(this); 4283 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4284 __ lea($tmp$$Register, $mem$$Address); 4285 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4286 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4287 if (elem_bt == T_SHORT) { 4288 __ movl($mask_idx$$Register, 0x55555555); 4289 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4290 } 4291 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4292 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4293 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4294 %} 4295 ins_pipe( pipe_slow ); 4296 %} 4297 4298 instruct vgather_masked_subwordLE8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4299 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4300 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4301 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4302 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4303 ins_encode %{ 4304 int vlen_enc = vector_length_encoding(this); 4305 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4306 __ lea($tmp$$Register, $mem$$Address); 4307 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4308 if (elem_bt == T_SHORT) { 4309 __ movl($mask_idx$$Register, 0x55555555); 4310 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4311 } 4312 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4313 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4314 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4315 %} 4316 ins_pipe( pipe_slow ); 4317 %} 4318 4319 instruct vgather_masked_subwordGT8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4320 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4321 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4322 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4323 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4324 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4325 ins_encode %{ 4326 int vlen_enc = vector_length_encoding(this); 4327 int vector_len = Matcher::vector_length(this); 4328 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4329 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4330 __ lea($tmp$$Register, $mem$$Address); 4331 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4332 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4333 if (elem_bt == T_SHORT) { 4334 __ movl($mask_idx$$Register, 0x55555555); 4335 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4336 } 4337 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4338 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4339 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4340 %} 4341 ins_pipe( pipe_slow ); 4342 %} 4343 #endif 4344 4345 // ====================Scatter======================================= 4346 4347 // Scatter INT, LONG, FLOAT, DOUBLE 4348 4349 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4350 predicate(UseAVX > 2); 4351 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4352 effect(TEMP tmp, TEMP ktmp); 4353 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4354 ins_encode %{ 4355 int vlen_enc = vector_length_encoding(this, $src); 4356 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4357 4358 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4359 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4360 4361 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4362 __ lea($tmp$$Register, $mem$$Address); 4363 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4364 %} 4365 ins_pipe( pipe_slow ); 4366 %} 4367 4368 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4369 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4370 effect(TEMP tmp, TEMP ktmp); 4371 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4372 ins_encode %{ 4373 int vlen_enc = vector_length_encoding(this, $src); 4374 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4375 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4376 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4377 // Note: Since scatter instruction partially updates the opmask register used 4378 // for predication hense moving mask operand to a temporary. 4379 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4380 __ lea($tmp$$Register, $mem$$Address); 4381 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4382 %} 4383 ins_pipe( pipe_slow ); 4384 %} 4385 4386 // ====================REPLICATE======================================= 4387 4388 // Replicate byte scalar to be vector 4389 instruct vReplB_reg(vec dst, rRegI src) %{ 4390 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4391 match(Set dst (Replicate src)); 4392 format %{ "replicateB $dst,$src" %} 4393 ins_encode %{ 4394 uint vlen = Matcher::vector_length(this); 4395 if (UseAVX >= 2) { 4396 int vlen_enc = vector_length_encoding(this); 4397 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4398 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4399 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4400 } else { 4401 __ movdl($dst$$XMMRegister, $src$$Register); 4402 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4403 } 4404 } else { 4405 assert(UseAVX < 2, ""); 4406 __ movdl($dst$$XMMRegister, $src$$Register); 4407 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4408 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4409 if (vlen >= 16) { 4410 assert(vlen == 16, ""); 4411 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4412 } 4413 } 4414 %} 4415 ins_pipe( pipe_slow ); 4416 %} 4417 4418 instruct ReplB_mem(vec dst, memory mem) %{ 4419 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4420 match(Set dst (Replicate (LoadB mem))); 4421 format %{ "replicateB $dst,$mem" %} 4422 ins_encode %{ 4423 int vlen_enc = vector_length_encoding(this); 4424 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4425 %} 4426 ins_pipe( pipe_slow ); 4427 %} 4428 4429 // ====================ReplicateS======================================= 4430 4431 instruct vReplS_reg(vec dst, rRegI src) %{ 4432 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4433 match(Set dst (Replicate src)); 4434 format %{ "replicateS $dst,$src" %} 4435 ins_encode %{ 4436 uint vlen = Matcher::vector_length(this); 4437 int vlen_enc = vector_length_encoding(this); 4438 if (UseAVX >= 2) { 4439 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4440 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4441 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4442 } else { 4443 __ movdl($dst$$XMMRegister, $src$$Register); 4444 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4445 } 4446 } else { 4447 assert(UseAVX < 2, ""); 4448 __ movdl($dst$$XMMRegister, $src$$Register); 4449 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4450 if (vlen >= 8) { 4451 assert(vlen == 8, ""); 4452 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4453 } 4454 } 4455 %} 4456 ins_pipe( pipe_slow ); 4457 %} 4458 4459 instruct ReplS_mem(vec dst, memory mem) %{ 4460 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4461 match(Set dst (Replicate (LoadS mem))); 4462 format %{ "replicateS $dst,$mem" %} 4463 ins_encode %{ 4464 int vlen_enc = vector_length_encoding(this); 4465 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4466 %} 4467 ins_pipe( pipe_slow ); 4468 %} 4469 4470 // ====================ReplicateI======================================= 4471 4472 instruct ReplI_reg(vec dst, rRegI src) %{ 4473 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4474 match(Set dst (Replicate src)); 4475 format %{ "replicateI $dst,$src" %} 4476 ins_encode %{ 4477 uint vlen = Matcher::vector_length(this); 4478 int vlen_enc = vector_length_encoding(this); 4479 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4480 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4481 } else if (VM_Version::supports_avx2()) { 4482 __ movdl($dst$$XMMRegister, $src$$Register); 4483 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4484 } else { 4485 __ movdl($dst$$XMMRegister, $src$$Register); 4486 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4487 } 4488 %} 4489 ins_pipe( pipe_slow ); 4490 %} 4491 4492 instruct ReplI_mem(vec dst, memory mem) %{ 4493 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4494 match(Set dst (Replicate (LoadI mem))); 4495 format %{ "replicateI $dst,$mem" %} 4496 ins_encode %{ 4497 int vlen_enc = vector_length_encoding(this); 4498 if (VM_Version::supports_avx2()) { 4499 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4500 } else if (VM_Version::supports_avx()) { 4501 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4502 } else { 4503 __ movdl($dst$$XMMRegister, $mem$$Address); 4504 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4505 } 4506 %} 4507 ins_pipe( pipe_slow ); 4508 %} 4509 4510 instruct ReplI_imm(vec dst, immI con) %{ 4511 predicate(Matcher::is_non_long_integral_vector(n)); 4512 match(Set dst (Replicate con)); 4513 format %{ "replicateI $dst,$con" %} 4514 ins_encode %{ 4515 InternalAddress addr = $constantaddress(Matcher::vector_element_basic_type(this), 4516 vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4517 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 8) / 4518 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4519 BasicType bt = Matcher::vector_element_basic_type(this); 4520 int vlen = Matcher::vector_length_in_bytes(this); 4521 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4522 %} 4523 ins_pipe( pipe_slow ); 4524 %} 4525 4526 // Replicate scalar zero to be vector 4527 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4528 predicate(Matcher::is_non_long_integral_vector(n)); 4529 match(Set dst (Replicate zero)); 4530 format %{ "replicateI $dst,$zero" %} 4531 ins_encode %{ 4532 int vlen_enc = vector_length_encoding(this); 4533 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4534 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4535 } else { 4536 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4537 } 4538 %} 4539 ins_pipe( fpu_reg_reg ); 4540 %} 4541 4542 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4543 predicate(UseSSE >= 2 && Matcher::is_non_long_integral_vector(n)); 4544 match(Set dst (Replicate con)); 4545 format %{ "vallones $dst" %} 4546 ins_encode %{ 4547 int vector_len = vector_length_encoding(this); 4548 __ vallones($dst$$XMMRegister, vector_len); 4549 %} 4550 ins_pipe( pipe_slow ); 4551 %} 4552 4553 // ====================ReplicateL======================================= 4554 4555 #ifdef _LP64 4556 // Replicate long (8 byte) scalar to be vector 4557 instruct ReplL_reg(vec dst, rRegL src) %{ 4558 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4559 match(Set dst (Replicate src)); 4560 format %{ "replicateL $dst,$src" %} 4561 ins_encode %{ 4562 int vlen = Matcher::vector_length(this); 4563 int vlen_enc = vector_length_encoding(this); 4564 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4565 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4566 } else if (VM_Version::supports_avx2()) { 4567 __ movdq($dst$$XMMRegister, $src$$Register); 4568 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4569 } else { 4570 __ movdq($dst$$XMMRegister, $src$$Register); 4571 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4572 } 4573 %} 4574 ins_pipe( pipe_slow ); 4575 %} 4576 #else // _LP64 4577 // Replicate long (8 byte) scalar to be vector 4578 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 4579 predicate(Matcher::vector_length(n) <= 4 && Matcher::vector_element_basic_type(n) == T_LONG); 4580 match(Set dst (Replicate src)); 4581 effect(TEMP dst, USE src, TEMP tmp); 4582 format %{ "replicateL $dst,$src" %} 4583 ins_encode %{ 4584 uint vlen = Matcher::vector_length(this); 4585 if (vlen == 2) { 4586 __ movdl($dst$$XMMRegister, $src$$Register); 4587 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4588 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4589 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4590 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4591 int vlen_enc = Assembler::AVX_256bit; 4592 __ movdl($dst$$XMMRegister, $src$$Register); 4593 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4594 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4595 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4596 } else { 4597 __ movdl($dst$$XMMRegister, $src$$Register); 4598 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4599 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4600 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4601 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4602 } 4603 %} 4604 ins_pipe( pipe_slow ); 4605 %} 4606 4607 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 4608 predicate(Matcher::vector_length(n) == 8 && Matcher::vector_element_basic_type(n) == T_LONG); 4609 match(Set dst (Replicate src)); 4610 effect(TEMP dst, USE src, TEMP tmp); 4611 format %{ "replicateL $dst,$src" %} 4612 ins_encode %{ 4613 if (VM_Version::supports_avx512vl()) { 4614 __ movdl($dst$$XMMRegister, $src$$Register); 4615 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4616 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4617 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4618 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4619 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4620 } else { 4621 int vlen_enc = Assembler::AVX_512bit; 4622 __ movdl($dst$$XMMRegister, $src$$Register); 4623 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4624 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4625 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4626 } 4627 %} 4628 ins_pipe( pipe_slow ); 4629 %} 4630 #endif // _LP64 4631 4632 instruct ReplL_mem(vec dst, memory mem) %{ 4633 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4634 match(Set dst (Replicate (LoadL mem))); 4635 format %{ "replicateL $dst,$mem" %} 4636 ins_encode %{ 4637 int vlen_enc = vector_length_encoding(this); 4638 if (VM_Version::supports_avx2()) { 4639 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4640 } else if (VM_Version::supports_sse3()) { 4641 __ movddup($dst$$XMMRegister, $mem$$Address); 4642 } else { 4643 __ movq($dst$$XMMRegister, $mem$$Address); 4644 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4645 } 4646 %} 4647 ins_pipe( pipe_slow ); 4648 %} 4649 4650 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4651 instruct ReplL_imm(vec dst, immL con) %{ 4652 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4653 match(Set dst (Replicate con)); 4654 format %{ "replicateL $dst,$con" %} 4655 ins_encode %{ 4656 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, 1)); 4657 int vlen = Matcher::vector_length_in_bytes(this); 4658 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4659 %} 4660 ins_pipe( pipe_slow ); 4661 %} 4662 4663 instruct ReplL_zero(vec dst, immL0 zero) %{ 4664 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4665 match(Set dst (Replicate zero)); 4666 format %{ "replicateL $dst,$zero" %} 4667 ins_encode %{ 4668 int vlen_enc = vector_length_encoding(this); 4669 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4670 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4671 } else { 4672 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4673 } 4674 %} 4675 ins_pipe( fpu_reg_reg ); 4676 %} 4677 4678 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4679 predicate(UseSSE >= 2 && Matcher::vector_element_basic_type(n) == T_LONG); 4680 match(Set dst (Replicate con)); 4681 format %{ "vallones $dst" %} 4682 ins_encode %{ 4683 int vector_len = vector_length_encoding(this); 4684 __ vallones($dst$$XMMRegister, vector_len); 4685 %} 4686 ins_pipe( pipe_slow ); 4687 %} 4688 4689 // ====================ReplicateF======================================= 4690 4691 instruct vReplF_reg(vec dst, vlRegF src) %{ 4692 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4693 match(Set dst (Replicate src)); 4694 format %{ "replicateF $dst,$src" %} 4695 ins_encode %{ 4696 uint vlen = Matcher::vector_length(this); 4697 int vlen_enc = vector_length_encoding(this); 4698 if (vlen <= 4) { 4699 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4700 } else if (VM_Version::supports_avx2()) { 4701 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4702 } else { 4703 assert(vlen == 8, "sanity"); 4704 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4705 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4706 } 4707 %} 4708 ins_pipe( pipe_slow ); 4709 %} 4710 4711 instruct ReplF_reg(vec dst, vlRegF src) %{ 4712 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4713 match(Set dst (Replicate src)); 4714 format %{ "replicateF $dst,$src" %} 4715 ins_encode %{ 4716 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4717 %} 4718 ins_pipe( pipe_slow ); 4719 %} 4720 4721 instruct ReplF_mem(vec dst, memory mem) %{ 4722 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4723 match(Set dst (Replicate (LoadF mem))); 4724 format %{ "replicateF $dst,$mem" %} 4725 ins_encode %{ 4726 int vlen_enc = vector_length_encoding(this); 4727 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4728 %} 4729 ins_pipe( pipe_slow ); 4730 %} 4731 4732 // Replicate float scalar immediate to be vector by loading from const table. 4733 instruct ReplF_imm(vec dst, immF con) %{ 4734 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4735 match(Set dst (Replicate con)); 4736 format %{ "replicateF $dst,$con" %} 4737 ins_encode %{ 4738 InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant, 4739 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 2)); 4740 int vlen = Matcher::vector_length_in_bytes(this); 4741 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4742 %} 4743 ins_pipe( pipe_slow ); 4744 %} 4745 4746 instruct ReplF_zero(vec dst, immF0 zero) %{ 4747 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4748 match(Set dst (Replicate zero)); 4749 format %{ "replicateF $dst,$zero" %} 4750 ins_encode %{ 4751 int vlen_enc = vector_length_encoding(this); 4752 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4753 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4754 } else { 4755 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4756 } 4757 %} 4758 ins_pipe( fpu_reg_reg ); 4759 %} 4760 4761 // ====================ReplicateD======================================= 4762 4763 // Replicate double (8 bytes) scalar to be vector 4764 instruct vReplD_reg(vec dst, vlRegD src) %{ 4765 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4766 match(Set dst (Replicate src)); 4767 format %{ "replicateD $dst,$src" %} 4768 ins_encode %{ 4769 uint vlen = Matcher::vector_length(this); 4770 int vlen_enc = vector_length_encoding(this); 4771 if (vlen <= 2) { 4772 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4773 } else if (VM_Version::supports_avx2()) { 4774 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4775 } else { 4776 assert(vlen == 4, "sanity"); 4777 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4778 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4779 } 4780 %} 4781 ins_pipe( pipe_slow ); 4782 %} 4783 4784 instruct ReplD_reg(vec dst, vlRegD src) %{ 4785 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4786 match(Set dst (Replicate src)); 4787 format %{ "replicateD $dst,$src" %} 4788 ins_encode %{ 4789 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4790 %} 4791 ins_pipe( pipe_slow ); 4792 %} 4793 4794 instruct ReplD_mem(vec dst, memory mem) %{ 4795 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4796 match(Set dst (Replicate (LoadD mem))); 4797 format %{ "replicateD $dst,$mem" %} 4798 ins_encode %{ 4799 if (Matcher::vector_length(this) >= 4) { 4800 int vlen_enc = vector_length_encoding(this); 4801 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4802 } else { 4803 __ movddup($dst$$XMMRegister, $mem$$Address); 4804 } 4805 %} 4806 ins_pipe( pipe_slow ); 4807 %} 4808 4809 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4810 instruct ReplD_imm(vec dst, immD con) %{ 4811 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4812 match(Set dst (Replicate con)); 4813 format %{ "replicateD $dst,$con" %} 4814 ins_encode %{ 4815 InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, 1)); 4816 int vlen = Matcher::vector_length_in_bytes(this); 4817 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4818 %} 4819 ins_pipe( pipe_slow ); 4820 %} 4821 4822 instruct ReplD_zero(vec dst, immD0 zero) %{ 4823 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4824 match(Set dst (Replicate zero)); 4825 format %{ "replicateD $dst,$zero" %} 4826 ins_encode %{ 4827 int vlen_enc = vector_length_encoding(this); 4828 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4829 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4830 } else { 4831 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4832 } 4833 %} 4834 ins_pipe( fpu_reg_reg ); 4835 %} 4836 4837 // ====================VECTOR INSERT======================================= 4838 4839 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4840 predicate(Matcher::vector_length_in_bytes(n) < 32); 4841 match(Set dst (VectorInsert (Binary dst val) idx)); 4842 format %{ "vector_insert $dst,$val,$idx" %} 4843 ins_encode %{ 4844 assert(UseSSE >= 4, "required"); 4845 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4846 4847 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4848 4849 assert(is_integral_type(elem_bt), ""); 4850 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4851 4852 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4853 %} 4854 ins_pipe( pipe_slow ); 4855 %} 4856 4857 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4858 predicate(Matcher::vector_length_in_bytes(n) == 32); 4859 match(Set dst (VectorInsert (Binary src val) idx)); 4860 effect(TEMP vtmp); 4861 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4862 ins_encode %{ 4863 int vlen_enc = Assembler::AVX_256bit; 4864 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4865 int elem_per_lane = 16/type2aelembytes(elem_bt); 4866 int log2epr = log2(elem_per_lane); 4867 4868 assert(is_integral_type(elem_bt), "sanity"); 4869 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4870 4871 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4872 uint y_idx = ($idx$$constant >> log2epr) & 1; 4873 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4874 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4875 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4876 %} 4877 ins_pipe( pipe_slow ); 4878 %} 4879 4880 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4881 predicate(Matcher::vector_length_in_bytes(n) == 64); 4882 match(Set dst (VectorInsert (Binary src val) idx)); 4883 effect(TEMP vtmp); 4884 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4885 ins_encode %{ 4886 assert(UseAVX > 2, "sanity"); 4887 4888 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4889 int elem_per_lane = 16/type2aelembytes(elem_bt); 4890 int log2epr = log2(elem_per_lane); 4891 4892 assert(is_integral_type(elem_bt), ""); 4893 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4894 4895 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4896 uint y_idx = ($idx$$constant >> log2epr) & 3; 4897 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4898 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4899 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4900 %} 4901 ins_pipe( pipe_slow ); 4902 %} 4903 4904 #ifdef _LP64 4905 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4906 predicate(Matcher::vector_length(n) == 2); 4907 match(Set dst (VectorInsert (Binary dst val) idx)); 4908 format %{ "vector_insert $dst,$val,$idx" %} 4909 ins_encode %{ 4910 assert(UseSSE >= 4, "required"); 4911 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4912 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4913 4914 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4915 %} 4916 ins_pipe( pipe_slow ); 4917 %} 4918 4919 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4920 predicate(Matcher::vector_length(n) == 4); 4921 match(Set dst (VectorInsert (Binary src val) idx)); 4922 effect(TEMP vtmp); 4923 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4924 ins_encode %{ 4925 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4926 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4927 4928 uint x_idx = $idx$$constant & right_n_bits(1); 4929 uint y_idx = ($idx$$constant >> 1) & 1; 4930 int vlen_enc = Assembler::AVX_256bit; 4931 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4932 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4933 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4934 %} 4935 ins_pipe( pipe_slow ); 4936 %} 4937 4938 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4939 predicate(Matcher::vector_length(n) == 8); 4940 match(Set dst (VectorInsert (Binary src val) idx)); 4941 effect(TEMP vtmp); 4942 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4943 ins_encode %{ 4944 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4945 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4946 4947 uint x_idx = $idx$$constant & right_n_bits(1); 4948 uint y_idx = ($idx$$constant >> 1) & 3; 4949 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4950 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4951 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4952 %} 4953 ins_pipe( pipe_slow ); 4954 %} 4955 #endif 4956 4957 instruct insertF(vec dst, regF val, immU8 idx) %{ 4958 predicate(Matcher::vector_length(n) < 8); 4959 match(Set dst (VectorInsert (Binary dst val) idx)); 4960 format %{ "vector_insert $dst,$val,$idx" %} 4961 ins_encode %{ 4962 assert(UseSSE >= 4, "sanity"); 4963 4964 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4965 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4966 4967 uint x_idx = $idx$$constant & right_n_bits(2); 4968 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4969 %} 4970 ins_pipe( pipe_slow ); 4971 %} 4972 4973 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4974 predicate(Matcher::vector_length(n) >= 8); 4975 match(Set dst (VectorInsert (Binary src val) idx)); 4976 effect(TEMP vtmp); 4977 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4978 ins_encode %{ 4979 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4980 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4981 4982 int vlen = Matcher::vector_length(this); 4983 uint x_idx = $idx$$constant & right_n_bits(2); 4984 if (vlen == 8) { 4985 uint y_idx = ($idx$$constant >> 2) & 1; 4986 int vlen_enc = Assembler::AVX_256bit; 4987 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4988 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4989 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4990 } else { 4991 assert(vlen == 16, "sanity"); 4992 uint y_idx = ($idx$$constant >> 2) & 3; 4993 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4994 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4995 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4996 } 4997 %} 4998 ins_pipe( pipe_slow ); 4999 %} 5000 5001 #ifdef _LP64 5002 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 5003 predicate(Matcher::vector_length(n) == 2); 5004 match(Set dst (VectorInsert (Binary dst val) idx)); 5005 effect(TEMP tmp); 5006 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 5007 ins_encode %{ 5008 assert(UseSSE >= 4, "sanity"); 5009 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5010 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5011 5012 __ movq($tmp$$Register, $val$$XMMRegister); 5013 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 5014 %} 5015 ins_pipe( pipe_slow ); 5016 %} 5017 5018 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 5019 predicate(Matcher::vector_length(n) == 4); 5020 match(Set dst (VectorInsert (Binary src val) idx)); 5021 effect(TEMP vtmp, TEMP tmp); 5022 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 5023 ins_encode %{ 5024 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5025 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5026 5027 uint x_idx = $idx$$constant & right_n_bits(1); 5028 uint y_idx = ($idx$$constant >> 1) & 1; 5029 int vlen_enc = Assembler::AVX_256bit; 5030 __ movq($tmp$$Register, $val$$XMMRegister); 5031 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5032 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 5033 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5034 %} 5035 ins_pipe( pipe_slow ); 5036 %} 5037 5038 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 5039 predicate(Matcher::vector_length(n) == 8); 5040 match(Set dst (VectorInsert (Binary src val) idx)); 5041 effect(TEMP tmp, TEMP vtmp); 5042 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5043 ins_encode %{ 5044 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5045 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5046 5047 uint x_idx = $idx$$constant & right_n_bits(1); 5048 uint y_idx = ($idx$$constant >> 1) & 3; 5049 __ movq($tmp$$Register, $val$$XMMRegister); 5050 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5051 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 5052 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5053 %} 5054 ins_pipe( pipe_slow ); 5055 %} 5056 #endif 5057 5058 // ====================REDUCTION ARITHMETIC======================================= 5059 5060 // =======================Int Reduction========================================== 5061 5062 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5063 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 5064 match(Set dst (AddReductionVI src1 src2)); 5065 match(Set dst (MulReductionVI src1 src2)); 5066 match(Set dst (AndReductionV src1 src2)); 5067 match(Set dst ( OrReductionV src1 src2)); 5068 match(Set dst (XorReductionV src1 src2)); 5069 match(Set dst (MinReductionV src1 src2)); 5070 match(Set dst (MaxReductionV src1 src2)); 5071 effect(TEMP vtmp1, TEMP vtmp2); 5072 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5073 ins_encode %{ 5074 int opcode = this->ideal_Opcode(); 5075 int vlen = Matcher::vector_length(this, $src2); 5076 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5077 %} 5078 ins_pipe( pipe_slow ); 5079 %} 5080 5081 // =======================Long Reduction========================================== 5082 5083 #ifdef _LP64 5084 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5085 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 5086 match(Set dst (AddReductionVL src1 src2)); 5087 match(Set dst (MulReductionVL src1 src2)); 5088 match(Set dst (AndReductionV src1 src2)); 5089 match(Set dst ( OrReductionV src1 src2)); 5090 match(Set dst (XorReductionV src1 src2)); 5091 match(Set dst (MinReductionV src1 src2)); 5092 match(Set dst (MaxReductionV src1 src2)); 5093 effect(TEMP vtmp1, TEMP vtmp2); 5094 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5095 ins_encode %{ 5096 int opcode = this->ideal_Opcode(); 5097 int vlen = Matcher::vector_length(this, $src2); 5098 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5099 %} 5100 ins_pipe( pipe_slow ); 5101 %} 5102 5103 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 5104 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 5105 match(Set dst (AddReductionVL src1 src2)); 5106 match(Set dst (MulReductionVL src1 src2)); 5107 match(Set dst (AndReductionV src1 src2)); 5108 match(Set dst ( OrReductionV src1 src2)); 5109 match(Set dst (XorReductionV src1 src2)); 5110 match(Set dst (MinReductionV src1 src2)); 5111 match(Set dst (MaxReductionV src1 src2)); 5112 effect(TEMP vtmp1, TEMP vtmp2); 5113 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5114 ins_encode %{ 5115 int opcode = this->ideal_Opcode(); 5116 int vlen = Matcher::vector_length(this, $src2); 5117 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5118 %} 5119 ins_pipe( pipe_slow ); 5120 %} 5121 #endif // _LP64 5122 5123 // =======================Float Reduction========================================== 5124 5125 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 5126 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src 5127 match(Set dst (AddReductionVF dst src)); 5128 match(Set dst (MulReductionVF dst src)); 5129 effect(TEMP dst, TEMP vtmp); 5130 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 5131 ins_encode %{ 5132 int opcode = this->ideal_Opcode(); 5133 int vlen = Matcher::vector_length(this, $src); 5134 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5135 %} 5136 ins_pipe( pipe_slow ); 5137 %} 5138 5139 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 5140 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5141 match(Set dst (AddReductionVF dst src)); 5142 match(Set dst (MulReductionVF dst src)); 5143 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5144 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5145 ins_encode %{ 5146 int opcode = this->ideal_Opcode(); 5147 int vlen = Matcher::vector_length(this, $src); 5148 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5149 %} 5150 ins_pipe( pipe_slow ); 5151 %} 5152 5153 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5154 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src 5155 match(Set dst (AddReductionVF dst src)); 5156 match(Set dst (MulReductionVF dst src)); 5157 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5158 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5159 ins_encode %{ 5160 int opcode = this->ideal_Opcode(); 5161 int vlen = Matcher::vector_length(this, $src); 5162 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5163 %} 5164 ins_pipe( pipe_slow ); 5165 %} 5166 5167 5168 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{ 5169 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5170 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5171 // src1 contains reduction identity 5172 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5173 match(Set dst (AddReductionVF src1 src2)); 5174 match(Set dst (MulReductionVF src1 src2)); 5175 effect(TEMP dst); 5176 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %} 5177 ins_encode %{ 5178 int opcode = this->ideal_Opcode(); 5179 int vlen = Matcher::vector_length(this, $src2); 5180 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5181 %} 5182 ins_pipe( pipe_slow ); 5183 %} 5184 5185 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{ 5186 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5187 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5188 // src1 contains reduction identity 5189 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5190 match(Set dst (AddReductionVF src1 src2)); 5191 match(Set dst (MulReductionVF src1 src2)); 5192 effect(TEMP dst, TEMP vtmp); 5193 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5194 ins_encode %{ 5195 int opcode = this->ideal_Opcode(); 5196 int vlen = Matcher::vector_length(this, $src2); 5197 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5198 %} 5199 ins_pipe( pipe_slow ); 5200 %} 5201 5202 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{ 5203 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5204 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5205 // src1 contains reduction identity 5206 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5207 match(Set dst (AddReductionVF src1 src2)); 5208 match(Set dst (MulReductionVF src1 src2)); 5209 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5210 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5211 ins_encode %{ 5212 int opcode = this->ideal_Opcode(); 5213 int vlen = Matcher::vector_length(this, $src2); 5214 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5215 %} 5216 ins_pipe( pipe_slow ); 5217 %} 5218 5219 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5220 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5221 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5222 // src1 contains reduction identity 5223 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2 5224 match(Set dst (AddReductionVF src1 src2)); 5225 match(Set dst (MulReductionVF src1 src2)); 5226 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5227 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5228 ins_encode %{ 5229 int opcode = this->ideal_Opcode(); 5230 int vlen = Matcher::vector_length(this, $src2); 5231 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5232 %} 5233 ins_pipe( pipe_slow ); 5234 %} 5235 5236 // =======================Double Reduction========================================== 5237 5238 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 5239 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src 5240 match(Set dst (AddReductionVD dst src)); 5241 match(Set dst (MulReductionVD dst src)); 5242 effect(TEMP dst, TEMP vtmp); 5243 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 5244 ins_encode %{ 5245 int opcode = this->ideal_Opcode(); 5246 int vlen = Matcher::vector_length(this, $src); 5247 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5248 %} 5249 ins_pipe( pipe_slow ); 5250 %} 5251 5252 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 5253 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src 5254 match(Set dst (AddReductionVD dst src)); 5255 match(Set dst (MulReductionVD dst src)); 5256 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5257 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5258 ins_encode %{ 5259 int opcode = this->ideal_Opcode(); 5260 int vlen = Matcher::vector_length(this, $src); 5261 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5262 %} 5263 ins_pipe( pipe_slow ); 5264 %} 5265 5266 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5267 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5268 match(Set dst (AddReductionVD dst src)); 5269 match(Set dst (MulReductionVD dst src)); 5270 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5271 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5272 ins_encode %{ 5273 int opcode = this->ideal_Opcode(); 5274 int vlen = Matcher::vector_length(this, $src); 5275 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5276 %} 5277 ins_pipe( pipe_slow ); 5278 %} 5279 5280 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{ 5281 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5282 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5283 // src1 contains reduction identity 5284 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5285 match(Set dst (AddReductionVD src1 src2)); 5286 match(Set dst (MulReductionVD src1 src2)); 5287 effect(TEMP dst); 5288 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %} 5289 ins_encode %{ 5290 int opcode = this->ideal_Opcode(); 5291 int vlen = Matcher::vector_length(this, $src2); 5292 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5293 %} 5294 ins_pipe( pipe_slow ); 5295 %} 5296 5297 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{ 5298 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5299 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5300 // src1 contains reduction identity 5301 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5302 match(Set dst (AddReductionVD src1 src2)); 5303 match(Set dst (MulReductionVD src1 src2)); 5304 effect(TEMP dst, TEMP vtmp); 5305 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5306 ins_encode %{ 5307 int opcode = this->ideal_Opcode(); 5308 int vlen = Matcher::vector_length(this, $src2); 5309 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5310 %} 5311 ins_pipe( pipe_slow ); 5312 %} 5313 5314 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5315 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5316 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5317 // src1 contains reduction identity 5318 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5319 match(Set dst (AddReductionVD src1 src2)); 5320 match(Set dst (MulReductionVD src1 src2)); 5321 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5322 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5323 ins_encode %{ 5324 int opcode = this->ideal_Opcode(); 5325 int vlen = Matcher::vector_length(this, $src2); 5326 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5327 %} 5328 ins_pipe( pipe_slow ); 5329 %} 5330 5331 // =======================Byte Reduction========================================== 5332 5333 #ifdef _LP64 5334 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5335 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5336 match(Set dst (AddReductionVI src1 src2)); 5337 match(Set dst (AndReductionV src1 src2)); 5338 match(Set dst ( OrReductionV src1 src2)); 5339 match(Set dst (XorReductionV src1 src2)); 5340 match(Set dst (MinReductionV src1 src2)); 5341 match(Set dst (MaxReductionV src1 src2)); 5342 effect(TEMP vtmp1, TEMP vtmp2); 5343 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5344 ins_encode %{ 5345 int opcode = this->ideal_Opcode(); 5346 int vlen = Matcher::vector_length(this, $src2); 5347 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5348 %} 5349 ins_pipe( pipe_slow ); 5350 %} 5351 5352 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5353 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5354 match(Set dst (AddReductionVI src1 src2)); 5355 match(Set dst (AndReductionV src1 src2)); 5356 match(Set dst ( OrReductionV src1 src2)); 5357 match(Set dst (XorReductionV src1 src2)); 5358 match(Set dst (MinReductionV src1 src2)); 5359 match(Set dst (MaxReductionV src1 src2)); 5360 effect(TEMP vtmp1, TEMP vtmp2); 5361 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5362 ins_encode %{ 5363 int opcode = this->ideal_Opcode(); 5364 int vlen = Matcher::vector_length(this, $src2); 5365 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5366 %} 5367 ins_pipe( pipe_slow ); 5368 %} 5369 #endif 5370 5371 // =======================Short Reduction========================================== 5372 5373 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5374 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5375 match(Set dst (AddReductionVI src1 src2)); 5376 match(Set dst (MulReductionVI src1 src2)); 5377 match(Set dst (AndReductionV src1 src2)); 5378 match(Set dst ( OrReductionV src1 src2)); 5379 match(Set dst (XorReductionV src1 src2)); 5380 match(Set dst (MinReductionV src1 src2)); 5381 match(Set dst (MaxReductionV src1 src2)); 5382 effect(TEMP vtmp1, TEMP vtmp2); 5383 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5384 ins_encode %{ 5385 int opcode = this->ideal_Opcode(); 5386 int vlen = Matcher::vector_length(this, $src2); 5387 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5388 %} 5389 ins_pipe( pipe_slow ); 5390 %} 5391 5392 // =======================Mul Reduction========================================== 5393 5394 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5395 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5396 Matcher::vector_length(n->in(2)) <= 32); // src2 5397 match(Set dst (MulReductionVI src1 src2)); 5398 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5399 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5400 ins_encode %{ 5401 int opcode = this->ideal_Opcode(); 5402 int vlen = Matcher::vector_length(this, $src2); 5403 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5404 %} 5405 ins_pipe( pipe_slow ); 5406 %} 5407 5408 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5409 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5410 Matcher::vector_length(n->in(2)) == 64); // src2 5411 match(Set dst (MulReductionVI src1 src2)); 5412 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5413 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5414 ins_encode %{ 5415 int opcode = this->ideal_Opcode(); 5416 int vlen = Matcher::vector_length(this, $src2); 5417 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5418 %} 5419 ins_pipe( pipe_slow ); 5420 %} 5421 5422 //--------------------Min/Max Float Reduction -------------------- 5423 // Float Min Reduction 5424 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 5425 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5426 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5427 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5428 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5429 Matcher::vector_length(n->in(2)) == 2); 5430 match(Set dst (MinReductionV src1 src2)); 5431 match(Set dst (MaxReductionV src1 src2)); 5432 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5433 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5434 ins_encode %{ 5435 assert(UseAVX > 0, "sanity"); 5436 5437 int opcode = this->ideal_Opcode(); 5438 int vlen = Matcher::vector_length(this, $src2); 5439 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5440 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5441 %} 5442 ins_pipe( pipe_slow ); 5443 %} 5444 5445 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5446 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5447 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5448 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5449 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5450 Matcher::vector_length(n->in(2)) >= 4); 5451 match(Set dst (MinReductionV src1 src2)); 5452 match(Set dst (MaxReductionV src1 src2)); 5453 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5454 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5455 ins_encode %{ 5456 assert(UseAVX > 0, "sanity"); 5457 5458 int opcode = this->ideal_Opcode(); 5459 int vlen = Matcher::vector_length(this, $src2); 5460 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5461 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5462 %} 5463 ins_pipe( pipe_slow ); 5464 %} 5465 5466 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 5467 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5468 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5469 Matcher::vector_length(n->in(2)) == 2); 5470 match(Set dst (MinReductionV dst src)); 5471 match(Set dst (MaxReductionV dst src)); 5472 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5473 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5474 ins_encode %{ 5475 assert(UseAVX > 0, "sanity"); 5476 5477 int opcode = this->ideal_Opcode(); 5478 int vlen = Matcher::vector_length(this, $src); 5479 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5480 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5481 %} 5482 ins_pipe( pipe_slow ); 5483 %} 5484 5485 5486 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5487 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5488 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5489 Matcher::vector_length(n->in(2)) >= 4); 5490 match(Set dst (MinReductionV dst src)); 5491 match(Set dst (MaxReductionV dst src)); 5492 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5493 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5494 ins_encode %{ 5495 assert(UseAVX > 0, "sanity"); 5496 5497 int opcode = this->ideal_Opcode(); 5498 int vlen = Matcher::vector_length(this, $src); 5499 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5500 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5501 %} 5502 ins_pipe( pipe_slow ); 5503 %} 5504 5505 5506 //--------------------Min Double Reduction -------------------- 5507 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5508 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5509 rFlagsReg cr) %{ 5510 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5511 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5512 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5513 Matcher::vector_length(n->in(2)) == 2); 5514 match(Set dst (MinReductionV src1 src2)); 5515 match(Set dst (MaxReductionV src1 src2)); 5516 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5517 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5518 ins_encode %{ 5519 assert(UseAVX > 0, "sanity"); 5520 5521 int opcode = this->ideal_Opcode(); 5522 int vlen = Matcher::vector_length(this, $src2); 5523 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5524 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5525 %} 5526 ins_pipe( pipe_slow ); 5527 %} 5528 5529 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5530 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5531 rFlagsReg cr) %{ 5532 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5533 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5534 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5535 Matcher::vector_length(n->in(2)) >= 4); 5536 match(Set dst (MinReductionV src1 src2)); 5537 match(Set dst (MaxReductionV src1 src2)); 5538 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5539 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5540 ins_encode %{ 5541 assert(UseAVX > 0, "sanity"); 5542 5543 int opcode = this->ideal_Opcode(); 5544 int vlen = Matcher::vector_length(this, $src2); 5545 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5546 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5547 %} 5548 ins_pipe( pipe_slow ); 5549 %} 5550 5551 5552 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5553 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5554 rFlagsReg cr) %{ 5555 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5556 Matcher::vector_length(n->in(2)) == 2); 5557 match(Set dst (MinReductionV dst src)); 5558 match(Set dst (MaxReductionV dst src)); 5559 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5560 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5561 ins_encode %{ 5562 assert(UseAVX > 0, "sanity"); 5563 5564 int opcode = this->ideal_Opcode(); 5565 int vlen = Matcher::vector_length(this, $src); 5566 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5567 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5568 %} 5569 ins_pipe( pipe_slow ); 5570 %} 5571 5572 instruct minmax_reductionD_av(legRegD dst, legVec src, 5573 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5574 rFlagsReg cr) %{ 5575 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5576 Matcher::vector_length(n->in(2)) >= 4); 5577 match(Set dst (MinReductionV dst src)); 5578 match(Set dst (MaxReductionV dst src)); 5579 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5580 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5581 ins_encode %{ 5582 assert(UseAVX > 0, "sanity"); 5583 5584 int opcode = this->ideal_Opcode(); 5585 int vlen = Matcher::vector_length(this, $src); 5586 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5587 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5588 %} 5589 ins_pipe( pipe_slow ); 5590 %} 5591 5592 // ====================VECTOR ARITHMETIC======================================= 5593 5594 // --------------------------------- ADD -------------------------------------- 5595 5596 // Bytes vector add 5597 instruct vaddB(vec dst, vec src) %{ 5598 predicate(UseAVX == 0); 5599 match(Set dst (AddVB dst src)); 5600 format %{ "paddb $dst,$src\t! add packedB" %} 5601 ins_encode %{ 5602 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5603 %} 5604 ins_pipe( pipe_slow ); 5605 %} 5606 5607 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5608 predicate(UseAVX > 0); 5609 match(Set dst (AddVB src1 src2)); 5610 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5611 ins_encode %{ 5612 int vlen_enc = vector_length_encoding(this); 5613 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5614 %} 5615 ins_pipe( pipe_slow ); 5616 %} 5617 5618 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5619 predicate((UseAVX > 0) && 5620 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5621 match(Set dst (AddVB src (LoadVector mem))); 5622 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5623 ins_encode %{ 5624 int vlen_enc = vector_length_encoding(this); 5625 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5626 %} 5627 ins_pipe( pipe_slow ); 5628 %} 5629 5630 // Shorts/Chars vector add 5631 instruct vaddS(vec dst, vec src) %{ 5632 predicate(UseAVX == 0); 5633 match(Set dst (AddVS dst src)); 5634 format %{ "paddw $dst,$src\t! add packedS" %} 5635 ins_encode %{ 5636 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5637 %} 5638 ins_pipe( pipe_slow ); 5639 %} 5640 5641 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5642 predicate(UseAVX > 0); 5643 match(Set dst (AddVS src1 src2)); 5644 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5645 ins_encode %{ 5646 int vlen_enc = vector_length_encoding(this); 5647 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5648 %} 5649 ins_pipe( pipe_slow ); 5650 %} 5651 5652 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5653 predicate((UseAVX > 0) && 5654 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5655 match(Set dst (AddVS src (LoadVector mem))); 5656 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5657 ins_encode %{ 5658 int vlen_enc = vector_length_encoding(this); 5659 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5660 %} 5661 ins_pipe( pipe_slow ); 5662 %} 5663 5664 // Integers vector add 5665 instruct vaddI(vec dst, vec src) %{ 5666 predicate(UseAVX == 0); 5667 match(Set dst (AddVI dst src)); 5668 format %{ "paddd $dst,$src\t! add packedI" %} 5669 ins_encode %{ 5670 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5671 %} 5672 ins_pipe( pipe_slow ); 5673 %} 5674 5675 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5676 predicate(UseAVX > 0); 5677 match(Set dst (AddVI src1 src2)); 5678 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5679 ins_encode %{ 5680 int vlen_enc = vector_length_encoding(this); 5681 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5682 %} 5683 ins_pipe( pipe_slow ); 5684 %} 5685 5686 5687 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5688 predicate((UseAVX > 0) && 5689 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5690 match(Set dst (AddVI src (LoadVector mem))); 5691 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5692 ins_encode %{ 5693 int vlen_enc = vector_length_encoding(this); 5694 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5695 %} 5696 ins_pipe( pipe_slow ); 5697 %} 5698 5699 // Longs vector add 5700 instruct vaddL(vec dst, vec src) %{ 5701 predicate(UseAVX == 0); 5702 match(Set dst (AddVL dst src)); 5703 format %{ "paddq $dst,$src\t! add packedL" %} 5704 ins_encode %{ 5705 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5706 %} 5707 ins_pipe( pipe_slow ); 5708 %} 5709 5710 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5711 predicate(UseAVX > 0); 5712 match(Set dst (AddVL src1 src2)); 5713 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5714 ins_encode %{ 5715 int vlen_enc = vector_length_encoding(this); 5716 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5717 %} 5718 ins_pipe( pipe_slow ); 5719 %} 5720 5721 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5722 predicate((UseAVX > 0) && 5723 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5724 match(Set dst (AddVL src (LoadVector mem))); 5725 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5726 ins_encode %{ 5727 int vlen_enc = vector_length_encoding(this); 5728 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5729 %} 5730 ins_pipe( pipe_slow ); 5731 %} 5732 5733 // Floats vector add 5734 instruct vaddF(vec dst, vec src) %{ 5735 predicate(UseAVX == 0); 5736 match(Set dst (AddVF dst src)); 5737 format %{ "addps $dst,$src\t! add packedF" %} 5738 ins_encode %{ 5739 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5740 %} 5741 ins_pipe( pipe_slow ); 5742 %} 5743 5744 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5745 predicate(UseAVX > 0); 5746 match(Set dst (AddVF src1 src2)); 5747 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5748 ins_encode %{ 5749 int vlen_enc = vector_length_encoding(this); 5750 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5751 %} 5752 ins_pipe( pipe_slow ); 5753 %} 5754 5755 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5756 predicate((UseAVX > 0) && 5757 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5758 match(Set dst (AddVF src (LoadVector mem))); 5759 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5760 ins_encode %{ 5761 int vlen_enc = vector_length_encoding(this); 5762 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5763 %} 5764 ins_pipe( pipe_slow ); 5765 %} 5766 5767 // Doubles vector add 5768 instruct vaddD(vec dst, vec src) %{ 5769 predicate(UseAVX == 0); 5770 match(Set dst (AddVD dst src)); 5771 format %{ "addpd $dst,$src\t! add packedD" %} 5772 ins_encode %{ 5773 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5774 %} 5775 ins_pipe( pipe_slow ); 5776 %} 5777 5778 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5779 predicate(UseAVX > 0); 5780 match(Set dst (AddVD src1 src2)); 5781 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5782 ins_encode %{ 5783 int vlen_enc = vector_length_encoding(this); 5784 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5785 %} 5786 ins_pipe( pipe_slow ); 5787 %} 5788 5789 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5790 predicate((UseAVX > 0) && 5791 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5792 match(Set dst (AddVD src (LoadVector mem))); 5793 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5794 ins_encode %{ 5795 int vlen_enc = vector_length_encoding(this); 5796 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5797 %} 5798 ins_pipe( pipe_slow ); 5799 %} 5800 5801 // --------------------------------- SUB -------------------------------------- 5802 5803 // Bytes vector sub 5804 instruct vsubB(vec dst, vec src) %{ 5805 predicate(UseAVX == 0); 5806 match(Set dst (SubVB dst src)); 5807 format %{ "psubb $dst,$src\t! sub packedB" %} 5808 ins_encode %{ 5809 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5810 %} 5811 ins_pipe( pipe_slow ); 5812 %} 5813 5814 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5815 predicate(UseAVX > 0); 5816 match(Set dst (SubVB src1 src2)); 5817 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5818 ins_encode %{ 5819 int vlen_enc = vector_length_encoding(this); 5820 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5821 %} 5822 ins_pipe( pipe_slow ); 5823 %} 5824 5825 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5826 predicate((UseAVX > 0) && 5827 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5828 match(Set dst (SubVB src (LoadVector mem))); 5829 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5830 ins_encode %{ 5831 int vlen_enc = vector_length_encoding(this); 5832 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5833 %} 5834 ins_pipe( pipe_slow ); 5835 %} 5836 5837 // Shorts/Chars vector sub 5838 instruct vsubS(vec dst, vec src) %{ 5839 predicate(UseAVX == 0); 5840 match(Set dst (SubVS dst src)); 5841 format %{ "psubw $dst,$src\t! sub packedS" %} 5842 ins_encode %{ 5843 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5844 %} 5845 ins_pipe( pipe_slow ); 5846 %} 5847 5848 5849 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5850 predicate(UseAVX > 0); 5851 match(Set dst (SubVS src1 src2)); 5852 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5853 ins_encode %{ 5854 int vlen_enc = vector_length_encoding(this); 5855 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5856 %} 5857 ins_pipe( pipe_slow ); 5858 %} 5859 5860 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5861 predicate((UseAVX > 0) && 5862 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5863 match(Set dst (SubVS src (LoadVector mem))); 5864 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5865 ins_encode %{ 5866 int vlen_enc = vector_length_encoding(this); 5867 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5868 %} 5869 ins_pipe( pipe_slow ); 5870 %} 5871 5872 // Integers vector sub 5873 instruct vsubI(vec dst, vec src) %{ 5874 predicate(UseAVX == 0); 5875 match(Set dst (SubVI dst src)); 5876 format %{ "psubd $dst,$src\t! sub packedI" %} 5877 ins_encode %{ 5878 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5879 %} 5880 ins_pipe( pipe_slow ); 5881 %} 5882 5883 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5884 predicate(UseAVX > 0); 5885 match(Set dst (SubVI src1 src2)); 5886 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5887 ins_encode %{ 5888 int vlen_enc = vector_length_encoding(this); 5889 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5890 %} 5891 ins_pipe( pipe_slow ); 5892 %} 5893 5894 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5895 predicate((UseAVX > 0) && 5896 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5897 match(Set dst (SubVI src (LoadVector mem))); 5898 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5899 ins_encode %{ 5900 int vlen_enc = vector_length_encoding(this); 5901 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5902 %} 5903 ins_pipe( pipe_slow ); 5904 %} 5905 5906 // Longs vector sub 5907 instruct vsubL(vec dst, vec src) %{ 5908 predicate(UseAVX == 0); 5909 match(Set dst (SubVL dst src)); 5910 format %{ "psubq $dst,$src\t! sub packedL" %} 5911 ins_encode %{ 5912 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5913 %} 5914 ins_pipe( pipe_slow ); 5915 %} 5916 5917 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5918 predicate(UseAVX > 0); 5919 match(Set dst (SubVL src1 src2)); 5920 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5921 ins_encode %{ 5922 int vlen_enc = vector_length_encoding(this); 5923 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5924 %} 5925 ins_pipe( pipe_slow ); 5926 %} 5927 5928 5929 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5930 predicate((UseAVX > 0) && 5931 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5932 match(Set dst (SubVL src (LoadVector mem))); 5933 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5934 ins_encode %{ 5935 int vlen_enc = vector_length_encoding(this); 5936 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5937 %} 5938 ins_pipe( pipe_slow ); 5939 %} 5940 5941 // Floats vector sub 5942 instruct vsubF(vec dst, vec src) %{ 5943 predicate(UseAVX == 0); 5944 match(Set dst (SubVF dst src)); 5945 format %{ "subps $dst,$src\t! sub packedF" %} 5946 ins_encode %{ 5947 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5948 %} 5949 ins_pipe( pipe_slow ); 5950 %} 5951 5952 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5953 predicate(UseAVX > 0); 5954 match(Set dst (SubVF src1 src2)); 5955 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5956 ins_encode %{ 5957 int vlen_enc = vector_length_encoding(this); 5958 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5959 %} 5960 ins_pipe( pipe_slow ); 5961 %} 5962 5963 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5964 predicate((UseAVX > 0) && 5965 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5966 match(Set dst (SubVF src (LoadVector mem))); 5967 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5968 ins_encode %{ 5969 int vlen_enc = vector_length_encoding(this); 5970 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5971 %} 5972 ins_pipe( pipe_slow ); 5973 %} 5974 5975 // Doubles vector sub 5976 instruct vsubD(vec dst, vec src) %{ 5977 predicate(UseAVX == 0); 5978 match(Set dst (SubVD dst src)); 5979 format %{ "subpd $dst,$src\t! sub packedD" %} 5980 ins_encode %{ 5981 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5982 %} 5983 ins_pipe( pipe_slow ); 5984 %} 5985 5986 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5987 predicate(UseAVX > 0); 5988 match(Set dst (SubVD src1 src2)); 5989 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5990 ins_encode %{ 5991 int vlen_enc = vector_length_encoding(this); 5992 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5993 %} 5994 ins_pipe( pipe_slow ); 5995 %} 5996 5997 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 5998 predicate((UseAVX > 0) && 5999 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6000 match(Set dst (SubVD src (LoadVector mem))); 6001 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 6002 ins_encode %{ 6003 int vlen_enc = vector_length_encoding(this); 6004 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6005 %} 6006 ins_pipe( pipe_slow ); 6007 %} 6008 6009 // --------------------------------- MUL -------------------------------------- 6010 6011 // Byte vector mul 6012 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 6013 predicate(Matcher::vector_length_in_bytes(n) <= 8); 6014 match(Set dst (MulVB src1 src2)); 6015 effect(TEMP dst, TEMP xtmp); 6016 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6017 ins_encode %{ 6018 assert(UseSSE > 3, "required"); 6019 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 6020 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 6021 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6022 __ psllw($dst$$XMMRegister, 8); 6023 __ psrlw($dst$$XMMRegister, 8); 6024 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6025 %} 6026 ins_pipe( pipe_slow ); 6027 %} 6028 6029 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 6030 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 6031 match(Set dst (MulVB src1 src2)); 6032 effect(TEMP dst, TEMP xtmp); 6033 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6034 ins_encode %{ 6035 assert(UseSSE > 3, "required"); 6036 // Odd-index elements 6037 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 6038 __ psrlw($dst$$XMMRegister, 8); 6039 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 6040 __ psrlw($xtmp$$XMMRegister, 8); 6041 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6042 __ psllw($dst$$XMMRegister, 8); 6043 // Even-index elements 6044 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6045 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 6046 __ psllw($xtmp$$XMMRegister, 8); 6047 __ psrlw($xtmp$$XMMRegister, 8); 6048 // Combine 6049 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 6050 %} 6051 ins_pipe( pipe_slow ); 6052 %} 6053 6054 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6055 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 6056 match(Set dst (MulVB src1 src2)); 6057 effect(TEMP xtmp1, TEMP xtmp2); 6058 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6059 ins_encode %{ 6060 int vlen_enc = vector_length_encoding(this); 6061 // Odd-index elements 6062 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 6063 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 6064 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6065 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 6066 // Even-index elements 6067 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6068 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6069 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6070 // Combine 6071 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6072 %} 6073 ins_pipe( pipe_slow ); 6074 %} 6075 6076 // Shorts/Chars vector mul 6077 instruct vmulS(vec dst, vec src) %{ 6078 predicate(UseAVX == 0); 6079 match(Set dst (MulVS dst src)); 6080 format %{ "pmullw $dst,$src\t! mul packedS" %} 6081 ins_encode %{ 6082 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6083 %} 6084 ins_pipe( pipe_slow ); 6085 %} 6086 6087 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 6088 predicate(UseAVX > 0); 6089 match(Set dst (MulVS src1 src2)); 6090 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 6091 ins_encode %{ 6092 int vlen_enc = vector_length_encoding(this); 6093 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6094 %} 6095 ins_pipe( pipe_slow ); 6096 %} 6097 6098 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 6099 predicate((UseAVX > 0) && 6100 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6101 match(Set dst (MulVS src (LoadVector mem))); 6102 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 6103 ins_encode %{ 6104 int vlen_enc = vector_length_encoding(this); 6105 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6106 %} 6107 ins_pipe( pipe_slow ); 6108 %} 6109 6110 // Integers vector mul 6111 instruct vmulI(vec dst, vec src) %{ 6112 predicate(UseAVX == 0); 6113 match(Set dst (MulVI dst src)); 6114 format %{ "pmulld $dst,$src\t! mul packedI" %} 6115 ins_encode %{ 6116 assert(UseSSE > 3, "required"); 6117 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6118 %} 6119 ins_pipe( pipe_slow ); 6120 %} 6121 6122 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 6123 predicate(UseAVX > 0); 6124 match(Set dst (MulVI src1 src2)); 6125 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 6126 ins_encode %{ 6127 int vlen_enc = vector_length_encoding(this); 6128 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6129 %} 6130 ins_pipe( pipe_slow ); 6131 %} 6132 6133 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 6134 predicate((UseAVX > 0) && 6135 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6136 match(Set dst (MulVI src (LoadVector mem))); 6137 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 6138 ins_encode %{ 6139 int vlen_enc = vector_length_encoding(this); 6140 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6141 %} 6142 ins_pipe( pipe_slow ); 6143 %} 6144 6145 // Longs vector mul 6146 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 6147 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6148 VM_Version::supports_avx512dq()) || 6149 VM_Version::supports_avx512vldq()); 6150 match(Set dst (MulVL src1 src2)); 6151 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 6152 ins_encode %{ 6153 assert(UseAVX > 2, "required"); 6154 int vlen_enc = vector_length_encoding(this); 6155 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6156 %} 6157 ins_pipe( pipe_slow ); 6158 %} 6159 6160 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 6161 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6162 VM_Version::supports_avx512dq()) || 6163 (Matcher::vector_length_in_bytes(n) > 8 && 6164 VM_Version::supports_avx512vldq())); 6165 match(Set dst (MulVL src (LoadVector mem))); 6166 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 6167 ins_encode %{ 6168 assert(UseAVX > 2, "required"); 6169 int vlen_enc = vector_length_encoding(this); 6170 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6171 %} 6172 ins_pipe( pipe_slow ); 6173 %} 6174 6175 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 6176 predicate(UseAVX == 0); 6177 match(Set dst (MulVL src1 src2)); 6178 effect(TEMP dst, TEMP xtmp); 6179 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6180 ins_encode %{ 6181 assert(VM_Version::supports_sse4_1(), "required"); 6182 // Get the lo-hi products, only the lower 32 bits is in concerns 6183 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 6184 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 6185 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 6186 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 6187 __ psllq($dst$$XMMRegister, 32); 6188 // Get the lo-lo products 6189 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6190 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 6191 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 6192 %} 6193 ins_pipe( pipe_slow ); 6194 %} 6195 6196 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6197 predicate(UseAVX > 0 && 6198 ((Matcher::vector_length_in_bytes(n) == 64 && 6199 !VM_Version::supports_avx512dq()) || 6200 (Matcher::vector_length_in_bytes(n) < 64 && 6201 !VM_Version::supports_avx512vldq()))); 6202 match(Set dst (MulVL src1 src2)); 6203 effect(TEMP xtmp1, TEMP xtmp2); 6204 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6205 ins_encode %{ 6206 int vlen_enc = vector_length_encoding(this); 6207 // Get the lo-hi products, only the lower 32 bits is in concerns 6208 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 6209 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6210 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 6211 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6212 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 6213 // Get the lo-lo products 6214 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6215 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6216 %} 6217 ins_pipe( pipe_slow ); 6218 %} 6219 6220 // Floats vector mul 6221 instruct vmulF(vec dst, vec src) %{ 6222 predicate(UseAVX == 0); 6223 match(Set dst (MulVF dst src)); 6224 format %{ "mulps $dst,$src\t! mul packedF" %} 6225 ins_encode %{ 6226 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6227 %} 6228 ins_pipe( pipe_slow ); 6229 %} 6230 6231 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 6232 predicate(UseAVX > 0); 6233 match(Set dst (MulVF src1 src2)); 6234 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 6235 ins_encode %{ 6236 int vlen_enc = vector_length_encoding(this); 6237 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6238 %} 6239 ins_pipe( pipe_slow ); 6240 %} 6241 6242 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 6243 predicate((UseAVX > 0) && 6244 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6245 match(Set dst (MulVF src (LoadVector mem))); 6246 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 6247 ins_encode %{ 6248 int vlen_enc = vector_length_encoding(this); 6249 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6250 %} 6251 ins_pipe( pipe_slow ); 6252 %} 6253 6254 // Doubles vector mul 6255 instruct vmulD(vec dst, vec src) %{ 6256 predicate(UseAVX == 0); 6257 match(Set dst (MulVD dst src)); 6258 format %{ "mulpd $dst,$src\t! mul packedD" %} 6259 ins_encode %{ 6260 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6261 %} 6262 ins_pipe( pipe_slow ); 6263 %} 6264 6265 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 6266 predicate(UseAVX > 0); 6267 match(Set dst (MulVD src1 src2)); 6268 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 6269 ins_encode %{ 6270 int vlen_enc = vector_length_encoding(this); 6271 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6272 %} 6273 ins_pipe( pipe_slow ); 6274 %} 6275 6276 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 6277 predicate((UseAVX > 0) && 6278 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6279 match(Set dst (MulVD src (LoadVector mem))); 6280 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 6281 ins_encode %{ 6282 int vlen_enc = vector_length_encoding(this); 6283 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6284 %} 6285 ins_pipe( pipe_slow ); 6286 %} 6287 6288 // --------------------------------- DIV -------------------------------------- 6289 6290 // Floats vector div 6291 instruct vdivF(vec dst, vec src) %{ 6292 predicate(UseAVX == 0); 6293 match(Set dst (DivVF dst src)); 6294 format %{ "divps $dst,$src\t! div packedF" %} 6295 ins_encode %{ 6296 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6297 %} 6298 ins_pipe( pipe_slow ); 6299 %} 6300 6301 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 6302 predicate(UseAVX > 0); 6303 match(Set dst (DivVF src1 src2)); 6304 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 6305 ins_encode %{ 6306 int vlen_enc = vector_length_encoding(this); 6307 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6308 %} 6309 ins_pipe( pipe_slow ); 6310 %} 6311 6312 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6313 predicate((UseAVX > 0) && 6314 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6315 match(Set dst (DivVF src (LoadVector mem))); 6316 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6317 ins_encode %{ 6318 int vlen_enc = vector_length_encoding(this); 6319 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6320 %} 6321 ins_pipe( pipe_slow ); 6322 %} 6323 6324 // Doubles vector div 6325 instruct vdivD(vec dst, vec src) %{ 6326 predicate(UseAVX == 0); 6327 match(Set dst (DivVD dst src)); 6328 format %{ "divpd $dst,$src\t! div packedD" %} 6329 ins_encode %{ 6330 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6331 %} 6332 ins_pipe( pipe_slow ); 6333 %} 6334 6335 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6336 predicate(UseAVX > 0); 6337 match(Set dst (DivVD src1 src2)); 6338 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6339 ins_encode %{ 6340 int vlen_enc = vector_length_encoding(this); 6341 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6342 %} 6343 ins_pipe( pipe_slow ); 6344 %} 6345 6346 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6347 predicate((UseAVX > 0) && 6348 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6349 match(Set dst (DivVD src (LoadVector mem))); 6350 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6351 ins_encode %{ 6352 int vlen_enc = vector_length_encoding(this); 6353 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6354 %} 6355 ins_pipe( pipe_slow ); 6356 %} 6357 6358 // ------------------------------ MinMax --------------------------------------- 6359 6360 // Byte, Short, Int vector Min/Max 6361 instruct minmax_reg_sse(vec dst, vec src) %{ 6362 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6363 UseAVX == 0); 6364 match(Set dst (MinV dst src)); 6365 match(Set dst (MaxV dst src)); 6366 format %{ "vector_minmax $dst,$src\t! " %} 6367 ins_encode %{ 6368 assert(UseSSE >= 4, "required"); 6369 6370 int opcode = this->ideal_Opcode(); 6371 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6372 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6373 %} 6374 ins_pipe( pipe_slow ); 6375 %} 6376 6377 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6378 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6379 UseAVX > 0); 6380 match(Set dst (MinV src1 src2)); 6381 match(Set dst (MaxV src1 src2)); 6382 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6383 ins_encode %{ 6384 int opcode = this->ideal_Opcode(); 6385 int vlen_enc = vector_length_encoding(this); 6386 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6387 6388 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6389 %} 6390 ins_pipe( pipe_slow ); 6391 %} 6392 6393 // Long vector Min/Max 6394 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6395 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6396 UseAVX == 0); 6397 match(Set dst (MinV dst src)); 6398 match(Set dst (MaxV src dst)); 6399 effect(TEMP dst, TEMP tmp); 6400 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6401 ins_encode %{ 6402 assert(UseSSE >= 4, "required"); 6403 6404 int opcode = this->ideal_Opcode(); 6405 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6406 assert(elem_bt == T_LONG, "sanity"); 6407 6408 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6409 %} 6410 ins_pipe( pipe_slow ); 6411 %} 6412 6413 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6414 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6415 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6416 match(Set dst (MinV src1 src2)); 6417 match(Set dst (MaxV src1 src2)); 6418 effect(TEMP dst); 6419 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6420 ins_encode %{ 6421 int vlen_enc = vector_length_encoding(this); 6422 int opcode = this->ideal_Opcode(); 6423 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6424 assert(elem_bt == T_LONG, "sanity"); 6425 6426 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6427 %} 6428 ins_pipe( pipe_slow ); 6429 %} 6430 6431 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6432 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6433 Matcher::vector_element_basic_type(n) == T_LONG); 6434 match(Set dst (MinV src1 src2)); 6435 match(Set dst (MaxV src1 src2)); 6436 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6437 ins_encode %{ 6438 assert(UseAVX > 2, "required"); 6439 6440 int vlen_enc = vector_length_encoding(this); 6441 int opcode = this->ideal_Opcode(); 6442 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6443 assert(elem_bt == T_LONG, "sanity"); 6444 6445 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6446 %} 6447 ins_pipe( pipe_slow ); 6448 %} 6449 6450 // Float/Double vector Min/Max 6451 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6452 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6453 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6454 UseAVX > 0); 6455 match(Set dst (MinV a b)); 6456 match(Set dst (MaxV a b)); 6457 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6458 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6459 ins_encode %{ 6460 assert(UseAVX > 0, "required"); 6461 6462 int opcode = this->ideal_Opcode(); 6463 int vlen_enc = vector_length_encoding(this); 6464 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6465 6466 __ vminmax_fp(opcode, elem_bt, 6467 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6468 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6469 %} 6470 ins_pipe( pipe_slow ); 6471 %} 6472 6473 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6474 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6475 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6476 match(Set dst (MinV a b)); 6477 match(Set dst (MaxV a b)); 6478 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6479 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6480 ins_encode %{ 6481 assert(UseAVX > 2, "required"); 6482 6483 int opcode = this->ideal_Opcode(); 6484 int vlen_enc = vector_length_encoding(this); 6485 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6486 6487 __ evminmax_fp(opcode, elem_bt, 6488 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6489 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6490 %} 6491 ins_pipe( pipe_slow ); 6492 %} 6493 6494 // --------------------------------- Signum/CopySign --------------------------- 6495 6496 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6497 match(Set dst (SignumF dst (Binary zero one))); 6498 effect(KILL cr); 6499 format %{ "signumF $dst, $dst" %} 6500 ins_encode %{ 6501 int opcode = this->ideal_Opcode(); 6502 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6503 %} 6504 ins_pipe( pipe_slow ); 6505 %} 6506 6507 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6508 match(Set dst (SignumD dst (Binary zero one))); 6509 effect(KILL cr); 6510 format %{ "signumD $dst, $dst" %} 6511 ins_encode %{ 6512 int opcode = this->ideal_Opcode(); 6513 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6514 %} 6515 ins_pipe( pipe_slow ); 6516 %} 6517 6518 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6519 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6520 match(Set dst (SignumVF src (Binary zero one))); 6521 match(Set dst (SignumVD src (Binary zero one))); 6522 effect(TEMP dst, TEMP xtmp1); 6523 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6524 ins_encode %{ 6525 int opcode = this->ideal_Opcode(); 6526 int vec_enc = vector_length_encoding(this); 6527 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6528 $xtmp1$$XMMRegister, vec_enc); 6529 %} 6530 ins_pipe( pipe_slow ); 6531 %} 6532 6533 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6534 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6535 match(Set dst (SignumVF src (Binary zero one))); 6536 match(Set dst (SignumVD src (Binary zero one))); 6537 effect(TEMP dst, TEMP ktmp1); 6538 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6539 ins_encode %{ 6540 int opcode = this->ideal_Opcode(); 6541 int vec_enc = vector_length_encoding(this); 6542 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6543 $ktmp1$$KRegister, vec_enc); 6544 %} 6545 ins_pipe( pipe_slow ); 6546 %} 6547 6548 // --------------------------------------- 6549 // For copySign use 0xE4 as writemask for vpternlog 6550 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6551 // C (xmm2) is set to 0x7FFFFFFF 6552 // Wherever xmm2 is 0, we want to pick from B (sign) 6553 // Wherever xmm2 is 1, we want to pick from A (src) 6554 // 6555 // A B C Result 6556 // 0 0 0 0 6557 // 0 0 1 0 6558 // 0 1 0 1 6559 // 0 1 1 0 6560 // 1 0 0 0 6561 // 1 0 1 1 6562 // 1 1 0 1 6563 // 1 1 1 1 6564 // 6565 // Result going from high bit to low bit is 0x11100100 = 0xe4 6566 // --------------------------------------- 6567 6568 #ifdef _LP64 6569 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6570 match(Set dst (CopySignF dst src)); 6571 effect(TEMP tmp1, TEMP tmp2); 6572 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6573 ins_encode %{ 6574 __ movl($tmp2$$Register, 0x7FFFFFFF); 6575 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6576 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6577 %} 6578 ins_pipe( pipe_slow ); 6579 %} 6580 6581 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6582 match(Set dst (CopySignD dst (Binary src zero))); 6583 ins_cost(100); 6584 effect(TEMP tmp1, TEMP tmp2); 6585 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6586 ins_encode %{ 6587 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6588 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6589 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6590 %} 6591 ins_pipe( pipe_slow ); 6592 %} 6593 6594 #endif // _LP64 6595 6596 //----------------------------- CompressBits/ExpandBits ------------------------ 6597 6598 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6599 predicate(n->bottom_type()->isa_int()); 6600 match(Set dst (CompressBits src mask)); 6601 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6602 ins_encode %{ 6603 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6604 %} 6605 ins_pipe( pipe_slow ); 6606 %} 6607 6608 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6609 predicate(n->bottom_type()->isa_int()); 6610 match(Set dst (ExpandBits src mask)); 6611 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6612 ins_encode %{ 6613 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6614 %} 6615 ins_pipe( pipe_slow ); 6616 %} 6617 6618 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6619 predicate(n->bottom_type()->isa_int()); 6620 match(Set dst (CompressBits src (LoadI mask))); 6621 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6622 ins_encode %{ 6623 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6624 %} 6625 ins_pipe( pipe_slow ); 6626 %} 6627 6628 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6629 predicate(n->bottom_type()->isa_int()); 6630 match(Set dst (ExpandBits src (LoadI mask))); 6631 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6632 ins_encode %{ 6633 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6634 %} 6635 ins_pipe( pipe_slow ); 6636 %} 6637 6638 // --------------------------------- Sqrt -------------------------------------- 6639 6640 instruct vsqrtF_reg(vec dst, vec src) %{ 6641 match(Set dst (SqrtVF src)); 6642 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6643 ins_encode %{ 6644 assert(UseAVX > 0, "required"); 6645 int vlen_enc = vector_length_encoding(this); 6646 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6647 %} 6648 ins_pipe( pipe_slow ); 6649 %} 6650 6651 instruct vsqrtF_mem(vec dst, memory mem) %{ 6652 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6653 match(Set dst (SqrtVF (LoadVector mem))); 6654 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6655 ins_encode %{ 6656 assert(UseAVX > 0, "required"); 6657 int vlen_enc = vector_length_encoding(this); 6658 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6659 %} 6660 ins_pipe( pipe_slow ); 6661 %} 6662 6663 // Floating point vector sqrt 6664 instruct vsqrtD_reg(vec dst, vec src) %{ 6665 match(Set dst (SqrtVD src)); 6666 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6667 ins_encode %{ 6668 assert(UseAVX > 0, "required"); 6669 int vlen_enc = vector_length_encoding(this); 6670 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6671 %} 6672 ins_pipe( pipe_slow ); 6673 %} 6674 6675 instruct vsqrtD_mem(vec dst, memory mem) %{ 6676 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6677 match(Set dst (SqrtVD (LoadVector mem))); 6678 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6679 ins_encode %{ 6680 assert(UseAVX > 0, "required"); 6681 int vlen_enc = vector_length_encoding(this); 6682 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6683 %} 6684 ins_pipe( pipe_slow ); 6685 %} 6686 6687 // ------------------------------ Shift --------------------------------------- 6688 6689 // Left and right shift count vectors are the same on x86 6690 // (only lowest bits of xmm reg are used for count). 6691 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6692 match(Set dst (LShiftCntV cnt)); 6693 match(Set dst (RShiftCntV cnt)); 6694 format %{ "movdl $dst,$cnt\t! load shift count" %} 6695 ins_encode %{ 6696 __ movdl($dst$$XMMRegister, $cnt$$Register); 6697 %} 6698 ins_pipe( pipe_slow ); 6699 %} 6700 6701 // Byte vector shift 6702 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6703 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6704 match(Set dst ( LShiftVB src shift)); 6705 match(Set dst ( RShiftVB src shift)); 6706 match(Set dst (URShiftVB src shift)); 6707 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6708 format %{"vector_byte_shift $dst,$src,$shift" %} 6709 ins_encode %{ 6710 assert(UseSSE > 3, "required"); 6711 int opcode = this->ideal_Opcode(); 6712 bool sign = (opcode != Op_URShiftVB); 6713 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6714 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6715 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6716 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6717 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6718 %} 6719 ins_pipe( pipe_slow ); 6720 %} 6721 6722 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6723 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6724 UseAVX <= 1); 6725 match(Set dst ( LShiftVB src shift)); 6726 match(Set dst ( RShiftVB src shift)); 6727 match(Set dst (URShiftVB src shift)); 6728 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6729 format %{"vector_byte_shift $dst,$src,$shift" %} 6730 ins_encode %{ 6731 assert(UseSSE > 3, "required"); 6732 int opcode = this->ideal_Opcode(); 6733 bool sign = (opcode != Op_URShiftVB); 6734 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6735 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6736 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6737 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6738 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6739 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6740 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6741 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6742 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6743 %} 6744 ins_pipe( pipe_slow ); 6745 %} 6746 6747 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6748 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6749 UseAVX > 1); 6750 match(Set dst ( LShiftVB src shift)); 6751 match(Set dst ( RShiftVB src shift)); 6752 match(Set dst (URShiftVB src shift)); 6753 effect(TEMP dst, TEMP tmp); 6754 format %{"vector_byte_shift $dst,$src,$shift" %} 6755 ins_encode %{ 6756 int opcode = this->ideal_Opcode(); 6757 bool sign = (opcode != Op_URShiftVB); 6758 int vlen_enc = Assembler::AVX_256bit; 6759 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6760 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6761 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6762 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6763 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6764 %} 6765 ins_pipe( pipe_slow ); 6766 %} 6767 6768 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6769 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6770 match(Set dst ( LShiftVB src shift)); 6771 match(Set dst ( RShiftVB src shift)); 6772 match(Set dst (URShiftVB src shift)); 6773 effect(TEMP dst, TEMP tmp); 6774 format %{"vector_byte_shift $dst,$src,$shift" %} 6775 ins_encode %{ 6776 assert(UseAVX > 1, "required"); 6777 int opcode = this->ideal_Opcode(); 6778 bool sign = (opcode != Op_URShiftVB); 6779 int vlen_enc = Assembler::AVX_256bit; 6780 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6781 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6782 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6783 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6784 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6785 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6786 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6787 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6788 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6789 %} 6790 ins_pipe( pipe_slow ); 6791 %} 6792 6793 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6794 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6795 match(Set dst ( LShiftVB src shift)); 6796 match(Set dst (RShiftVB src shift)); 6797 match(Set dst (URShiftVB src shift)); 6798 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6799 format %{"vector_byte_shift $dst,$src,$shift" %} 6800 ins_encode %{ 6801 assert(UseAVX > 2, "required"); 6802 int opcode = this->ideal_Opcode(); 6803 bool sign = (opcode != Op_URShiftVB); 6804 int vlen_enc = Assembler::AVX_512bit; 6805 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6806 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6807 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6808 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6809 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6810 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6811 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6812 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6813 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6814 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6815 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 6816 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6817 %} 6818 ins_pipe( pipe_slow ); 6819 %} 6820 6821 // Shorts vector logical right shift produces incorrect Java result 6822 // for negative data because java code convert short value into int with 6823 // sign extension before a shift. But char vectors are fine since chars are 6824 // unsigned values. 6825 // Shorts/Chars vector left shift 6826 instruct vshiftS(vec dst, vec src, vec shift) %{ 6827 predicate(!n->as_ShiftV()->is_var_shift()); 6828 match(Set dst ( LShiftVS src shift)); 6829 match(Set dst ( RShiftVS src shift)); 6830 match(Set dst (URShiftVS src shift)); 6831 effect(TEMP dst, USE src, USE shift); 6832 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6833 ins_encode %{ 6834 int opcode = this->ideal_Opcode(); 6835 if (UseAVX > 0) { 6836 int vlen_enc = vector_length_encoding(this); 6837 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6838 } else { 6839 int vlen = Matcher::vector_length(this); 6840 if (vlen == 2) { 6841 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6842 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6843 } else if (vlen == 4) { 6844 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6845 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6846 } else { 6847 assert (vlen == 8, "sanity"); 6848 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6849 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6850 } 6851 } 6852 %} 6853 ins_pipe( pipe_slow ); 6854 %} 6855 6856 // Integers vector left shift 6857 instruct vshiftI(vec dst, vec src, vec shift) %{ 6858 predicate(!n->as_ShiftV()->is_var_shift()); 6859 match(Set dst ( LShiftVI src shift)); 6860 match(Set dst ( RShiftVI src shift)); 6861 match(Set dst (URShiftVI src shift)); 6862 effect(TEMP dst, USE src, USE shift); 6863 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6864 ins_encode %{ 6865 int opcode = this->ideal_Opcode(); 6866 if (UseAVX > 0) { 6867 int vlen_enc = vector_length_encoding(this); 6868 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6869 } else { 6870 int vlen = Matcher::vector_length(this); 6871 if (vlen == 2) { 6872 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6873 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6874 } else { 6875 assert(vlen == 4, "sanity"); 6876 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6877 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6878 } 6879 } 6880 %} 6881 ins_pipe( pipe_slow ); 6882 %} 6883 6884 // Integers vector left constant shift 6885 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6886 match(Set dst (LShiftVI src (LShiftCntV shift))); 6887 match(Set dst (RShiftVI src (RShiftCntV shift))); 6888 match(Set dst (URShiftVI src (RShiftCntV shift))); 6889 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6890 ins_encode %{ 6891 int opcode = this->ideal_Opcode(); 6892 if (UseAVX > 0) { 6893 int vector_len = vector_length_encoding(this); 6894 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6895 } else { 6896 int vlen = Matcher::vector_length(this); 6897 if (vlen == 2) { 6898 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6899 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6900 } else { 6901 assert(vlen == 4, "sanity"); 6902 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6903 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6904 } 6905 } 6906 %} 6907 ins_pipe( pipe_slow ); 6908 %} 6909 6910 // Longs vector shift 6911 instruct vshiftL(vec dst, vec src, vec shift) %{ 6912 predicate(!n->as_ShiftV()->is_var_shift()); 6913 match(Set dst ( LShiftVL src shift)); 6914 match(Set dst (URShiftVL src shift)); 6915 effect(TEMP dst, USE src, USE shift); 6916 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6917 ins_encode %{ 6918 int opcode = this->ideal_Opcode(); 6919 if (UseAVX > 0) { 6920 int vlen_enc = vector_length_encoding(this); 6921 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6922 } else { 6923 assert(Matcher::vector_length(this) == 2, ""); 6924 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6925 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6926 } 6927 %} 6928 ins_pipe( pipe_slow ); 6929 %} 6930 6931 // Longs vector constant shift 6932 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 6933 match(Set dst (LShiftVL src (LShiftCntV shift))); 6934 match(Set dst (URShiftVL src (RShiftCntV shift))); 6935 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 6936 ins_encode %{ 6937 int opcode = this->ideal_Opcode(); 6938 if (UseAVX > 0) { 6939 int vector_len = vector_length_encoding(this); 6940 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6941 } else { 6942 assert(Matcher::vector_length(this) == 2, ""); 6943 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6944 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6945 } 6946 %} 6947 ins_pipe( pipe_slow ); 6948 %} 6949 6950 // -------------------ArithmeticRightShift ----------------------------------- 6951 // Long vector arithmetic right shift 6952 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 6953 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 6954 match(Set dst (RShiftVL src shift)); 6955 effect(TEMP dst, TEMP tmp); 6956 format %{ "vshiftq $dst,$src,$shift" %} 6957 ins_encode %{ 6958 uint vlen = Matcher::vector_length(this); 6959 if (vlen == 2) { 6960 assert(UseSSE >= 2, "required"); 6961 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6962 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6963 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6964 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6965 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6966 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 6967 } else { 6968 assert(vlen == 4, "sanity"); 6969 assert(UseAVX > 1, "required"); 6970 int vlen_enc = Assembler::AVX_256bit; 6971 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6972 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6973 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6974 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6975 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6976 } 6977 %} 6978 ins_pipe( pipe_slow ); 6979 %} 6980 6981 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 6982 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 6983 match(Set dst (RShiftVL src shift)); 6984 format %{ "vshiftq $dst,$src,$shift" %} 6985 ins_encode %{ 6986 int vlen_enc = vector_length_encoding(this); 6987 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6988 %} 6989 ins_pipe( pipe_slow ); 6990 %} 6991 6992 // ------------------- Variable Shift ----------------------------- 6993 // Byte variable shift 6994 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 6995 predicate(Matcher::vector_length(n) <= 8 && 6996 n->as_ShiftV()->is_var_shift() && 6997 !VM_Version::supports_avx512bw()); 6998 match(Set dst ( LShiftVB src shift)); 6999 match(Set dst ( RShiftVB src shift)); 7000 match(Set dst (URShiftVB src shift)); 7001 effect(TEMP dst, TEMP vtmp); 7002 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7003 ins_encode %{ 7004 assert(UseAVX >= 2, "required"); 7005 7006 int opcode = this->ideal_Opcode(); 7007 int vlen_enc = Assembler::AVX_128bit; 7008 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7009 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7010 %} 7011 ins_pipe( pipe_slow ); 7012 %} 7013 7014 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7015 predicate(Matcher::vector_length(n) == 16 && 7016 n->as_ShiftV()->is_var_shift() && 7017 !VM_Version::supports_avx512bw()); 7018 match(Set dst ( LShiftVB src shift)); 7019 match(Set dst ( RShiftVB src shift)); 7020 match(Set dst (URShiftVB src shift)); 7021 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7022 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7023 ins_encode %{ 7024 assert(UseAVX >= 2, "required"); 7025 7026 int opcode = this->ideal_Opcode(); 7027 int vlen_enc = Assembler::AVX_128bit; 7028 // Shift lower half and get word result in dst 7029 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7030 7031 // Shift upper half and get word result in vtmp1 7032 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7033 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7034 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7035 7036 // Merge and down convert the two word results to byte in dst 7037 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7038 %} 7039 ins_pipe( pipe_slow ); 7040 %} 7041 7042 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 7043 predicate(Matcher::vector_length(n) == 32 && 7044 n->as_ShiftV()->is_var_shift() && 7045 !VM_Version::supports_avx512bw()); 7046 match(Set dst ( LShiftVB src shift)); 7047 match(Set dst ( RShiftVB src shift)); 7048 match(Set dst (URShiftVB src shift)); 7049 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 7050 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 7051 ins_encode %{ 7052 assert(UseAVX >= 2, "required"); 7053 7054 int opcode = this->ideal_Opcode(); 7055 int vlen_enc = Assembler::AVX_128bit; 7056 // Process lower 128 bits and get result in dst 7057 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7058 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7059 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7060 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7061 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7062 7063 // Process higher 128 bits and get result in vtmp3 7064 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7065 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7066 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 7067 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 7068 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 7069 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7070 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 7071 7072 // Merge the two results in dst 7073 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7074 %} 7075 ins_pipe( pipe_slow ); 7076 %} 7077 7078 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 7079 predicate(Matcher::vector_length(n) <= 32 && 7080 n->as_ShiftV()->is_var_shift() && 7081 VM_Version::supports_avx512bw()); 7082 match(Set dst ( LShiftVB src shift)); 7083 match(Set dst ( RShiftVB src shift)); 7084 match(Set dst (URShiftVB src shift)); 7085 effect(TEMP dst, TEMP vtmp); 7086 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7087 ins_encode %{ 7088 assert(UseAVX > 2, "required"); 7089 7090 int opcode = this->ideal_Opcode(); 7091 int vlen_enc = vector_length_encoding(this); 7092 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7093 %} 7094 ins_pipe( pipe_slow ); 7095 %} 7096 7097 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7098 predicate(Matcher::vector_length(n) == 64 && 7099 n->as_ShiftV()->is_var_shift() && 7100 VM_Version::supports_avx512bw()); 7101 match(Set dst ( LShiftVB src shift)); 7102 match(Set dst ( RShiftVB src shift)); 7103 match(Set dst (URShiftVB src shift)); 7104 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7105 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7106 ins_encode %{ 7107 assert(UseAVX > 2, "required"); 7108 7109 int opcode = this->ideal_Opcode(); 7110 int vlen_enc = Assembler::AVX_256bit; 7111 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7112 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7113 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7114 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7115 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7116 %} 7117 ins_pipe( pipe_slow ); 7118 %} 7119 7120 // Short variable shift 7121 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7122 predicate(Matcher::vector_length(n) <= 8 && 7123 n->as_ShiftV()->is_var_shift() && 7124 !VM_Version::supports_avx512bw()); 7125 match(Set dst ( LShiftVS src shift)); 7126 match(Set dst ( RShiftVS src shift)); 7127 match(Set dst (URShiftVS src shift)); 7128 effect(TEMP dst, TEMP vtmp); 7129 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7130 ins_encode %{ 7131 assert(UseAVX >= 2, "required"); 7132 7133 int opcode = this->ideal_Opcode(); 7134 bool sign = (opcode != Op_URShiftVS); 7135 int vlen_enc = Assembler::AVX_256bit; 7136 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 7137 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 7138 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7139 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7140 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 7141 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7142 %} 7143 ins_pipe( pipe_slow ); 7144 %} 7145 7146 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7147 predicate(Matcher::vector_length(n) == 16 && 7148 n->as_ShiftV()->is_var_shift() && 7149 !VM_Version::supports_avx512bw()); 7150 match(Set dst ( LShiftVS src shift)); 7151 match(Set dst ( RShiftVS src shift)); 7152 match(Set dst (URShiftVS src shift)); 7153 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7154 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7155 ins_encode %{ 7156 assert(UseAVX >= 2, "required"); 7157 7158 int opcode = this->ideal_Opcode(); 7159 bool sign = (opcode != Op_URShiftVS); 7160 int vlen_enc = Assembler::AVX_256bit; 7161 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 7162 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7163 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7164 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7165 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7166 7167 // Shift upper half, with result in dst using vtmp1 as TEMP 7168 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 7169 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 7170 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7171 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7172 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7173 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7174 7175 // Merge lower and upper half result into dst 7176 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7177 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 7178 %} 7179 ins_pipe( pipe_slow ); 7180 %} 7181 7182 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 7183 predicate(n->as_ShiftV()->is_var_shift() && 7184 VM_Version::supports_avx512bw()); 7185 match(Set dst ( LShiftVS src shift)); 7186 match(Set dst ( RShiftVS src shift)); 7187 match(Set dst (URShiftVS src shift)); 7188 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 7189 ins_encode %{ 7190 assert(UseAVX > 2, "required"); 7191 7192 int opcode = this->ideal_Opcode(); 7193 int vlen_enc = vector_length_encoding(this); 7194 if (!VM_Version::supports_avx512vl()) { 7195 vlen_enc = Assembler::AVX_512bit; 7196 } 7197 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7198 %} 7199 ins_pipe( pipe_slow ); 7200 %} 7201 7202 //Integer variable shift 7203 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 7204 predicate(n->as_ShiftV()->is_var_shift()); 7205 match(Set dst ( LShiftVI src shift)); 7206 match(Set dst ( RShiftVI src shift)); 7207 match(Set dst (URShiftVI src shift)); 7208 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 7209 ins_encode %{ 7210 assert(UseAVX >= 2, "required"); 7211 7212 int opcode = this->ideal_Opcode(); 7213 int vlen_enc = vector_length_encoding(this); 7214 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7215 %} 7216 ins_pipe( pipe_slow ); 7217 %} 7218 7219 //Long variable shift 7220 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 7221 predicate(n->as_ShiftV()->is_var_shift()); 7222 match(Set dst ( LShiftVL src shift)); 7223 match(Set dst (URShiftVL src shift)); 7224 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 7225 ins_encode %{ 7226 assert(UseAVX >= 2, "required"); 7227 7228 int opcode = this->ideal_Opcode(); 7229 int vlen_enc = vector_length_encoding(this); 7230 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7231 %} 7232 ins_pipe( pipe_slow ); 7233 %} 7234 7235 //Long variable right shift arithmetic 7236 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 7237 predicate(Matcher::vector_length(n) <= 4 && 7238 n->as_ShiftV()->is_var_shift() && 7239 UseAVX == 2); 7240 match(Set dst (RShiftVL src shift)); 7241 effect(TEMP dst, TEMP vtmp); 7242 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 7243 ins_encode %{ 7244 int opcode = this->ideal_Opcode(); 7245 int vlen_enc = vector_length_encoding(this); 7246 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 7247 $vtmp$$XMMRegister); 7248 %} 7249 ins_pipe( pipe_slow ); 7250 %} 7251 7252 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 7253 predicate(n->as_ShiftV()->is_var_shift() && 7254 UseAVX > 2); 7255 match(Set dst (RShiftVL src shift)); 7256 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 7257 ins_encode %{ 7258 int opcode = this->ideal_Opcode(); 7259 int vlen_enc = vector_length_encoding(this); 7260 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7261 %} 7262 ins_pipe( pipe_slow ); 7263 %} 7264 7265 // --------------------------------- AND -------------------------------------- 7266 7267 instruct vand(vec dst, vec src) %{ 7268 predicate(UseAVX == 0); 7269 match(Set dst (AndV dst src)); 7270 format %{ "pand $dst,$src\t! and vectors" %} 7271 ins_encode %{ 7272 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7273 %} 7274 ins_pipe( pipe_slow ); 7275 %} 7276 7277 instruct vand_reg(vec dst, vec src1, vec src2) %{ 7278 predicate(UseAVX > 0); 7279 match(Set dst (AndV src1 src2)); 7280 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 7281 ins_encode %{ 7282 int vlen_enc = vector_length_encoding(this); 7283 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7284 %} 7285 ins_pipe( pipe_slow ); 7286 %} 7287 7288 instruct vand_mem(vec dst, vec src, memory mem) %{ 7289 predicate((UseAVX > 0) && 7290 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7291 match(Set dst (AndV src (LoadVector mem))); 7292 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 7293 ins_encode %{ 7294 int vlen_enc = vector_length_encoding(this); 7295 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7296 %} 7297 ins_pipe( pipe_slow ); 7298 %} 7299 7300 // --------------------------------- OR --------------------------------------- 7301 7302 instruct vor(vec dst, vec src) %{ 7303 predicate(UseAVX == 0); 7304 match(Set dst (OrV dst src)); 7305 format %{ "por $dst,$src\t! or vectors" %} 7306 ins_encode %{ 7307 __ por($dst$$XMMRegister, $src$$XMMRegister); 7308 %} 7309 ins_pipe( pipe_slow ); 7310 %} 7311 7312 instruct vor_reg(vec dst, vec src1, vec src2) %{ 7313 predicate(UseAVX > 0); 7314 match(Set dst (OrV src1 src2)); 7315 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7316 ins_encode %{ 7317 int vlen_enc = vector_length_encoding(this); 7318 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7319 %} 7320 ins_pipe( pipe_slow ); 7321 %} 7322 7323 instruct vor_mem(vec dst, vec src, memory mem) %{ 7324 predicate((UseAVX > 0) && 7325 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7326 match(Set dst (OrV src (LoadVector mem))); 7327 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7328 ins_encode %{ 7329 int vlen_enc = vector_length_encoding(this); 7330 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7331 %} 7332 ins_pipe( pipe_slow ); 7333 %} 7334 7335 // --------------------------------- XOR -------------------------------------- 7336 7337 instruct vxor(vec dst, vec src) %{ 7338 predicate(UseAVX == 0); 7339 match(Set dst (XorV dst src)); 7340 format %{ "pxor $dst,$src\t! xor vectors" %} 7341 ins_encode %{ 7342 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7343 %} 7344 ins_pipe( pipe_slow ); 7345 %} 7346 7347 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7348 predicate(UseAVX > 0); 7349 match(Set dst (XorV src1 src2)); 7350 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7351 ins_encode %{ 7352 int vlen_enc = vector_length_encoding(this); 7353 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7354 %} 7355 ins_pipe( pipe_slow ); 7356 %} 7357 7358 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7359 predicate((UseAVX > 0) && 7360 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7361 match(Set dst (XorV src (LoadVector mem))); 7362 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7363 ins_encode %{ 7364 int vlen_enc = vector_length_encoding(this); 7365 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7366 %} 7367 ins_pipe( pipe_slow ); 7368 %} 7369 7370 // --------------------------------- VectorCast -------------------------------------- 7371 7372 instruct vcastBtoX(vec dst, vec src) %{ 7373 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE); 7374 match(Set dst (VectorCastB2X src)); 7375 format %{ "vector_cast_b2x $dst,$src\t!" %} 7376 ins_encode %{ 7377 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7378 int vlen_enc = vector_length_encoding(this); 7379 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7380 %} 7381 ins_pipe( pipe_slow ); 7382 %} 7383 7384 instruct vcastBtoD(legVec dst, legVec src) %{ 7385 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE); 7386 match(Set dst (VectorCastB2X src)); 7387 format %{ "vector_cast_b2x $dst,$src\t!" %} 7388 ins_encode %{ 7389 int vlen_enc = vector_length_encoding(this); 7390 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7391 %} 7392 ins_pipe( pipe_slow ); 7393 %} 7394 7395 instruct castStoX(vec dst, vec src) %{ 7396 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7397 Matcher::vector_length(n->in(1)) <= 8 && // src 7398 Matcher::vector_element_basic_type(n) == T_BYTE); 7399 match(Set dst (VectorCastS2X src)); 7400 format %{ "vector_cast_s2x $dst,$src" %} 7401 ins_encode %{ 7402 assert(UseAVX > 0, "required"); 7403 7404 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7405 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7406 %} 7407 ins_pipe( pipe_slow ); 7408 %} 7409 7410 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7411 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7412 Matcher::vector_length(n->in(1)) == 16 && // src 7413 Matcher::vector_element_basic_type(n) == T_BYTE); 7414 effect(TEMP dst, TEMP vtmp); 7415 match(Set dst (VectorCastS2X src)); 7416 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7417 ins_encode %{ 7418 assert(UseAVX > 0, "required"); 7419 7420 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7421 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7422 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7423 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7424 %} 7425 ins_pipe( pipe_slow ); 7426 %} 7427 7428 instruct vcastStoX_evex(vec dst, vec src) %{ 7429 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7430 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7431 match(Set dst (VectorCastS2X src)); 7432 format %{ "vector_cast_s2x $dst,$src\t!" %} 7433 ins_encode %{ 7434 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7435 int src_vlen_enc = vector_length_encoding(this, $src); 7436 int vlen_enc = vector_length_encoding(this); 7437 switch (to_elem_bt) { 7438 case T_BYTE: 7439 if (!VM_Version::supports_avx512vl()) { 7440 vlen_enc = Assembler::AVX_512bit; 7441 } 7442 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7443 break; 7444 case T_INT: 7445 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7446 break; 7447 case T_FLOAT: 7448 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7449 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7450 break; 7451 case T_LONG: 7452 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7453 break; 7454 case T_DOUBLE: { 7455 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7456 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7457 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7458 break; 7459 } 7460 default: 7461 ShouldNotReachHere(); 7462 } 7463 %} 7464 ins_pipe( pipe_slow ); 7465 %} 7466 7467 instruct castItoX(vec dst, vec src) %{ 7468 predicate(UseAVX <= 2 && 7469 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7470 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7471 match(Set dst (VectorCastI2X src)); 7472 format %{ "vector_cast_i2x $dst,$src" %} 7473 ins_encode %{ 7474 assert(UseAVX > 0, "required"); 7475 7476 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7477 int vlen_enc = vector_length_encoding(this, $src); 7478 7479 if (to_elem_bt == T_BYTE) { 7480 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7481 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7482 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7483 } else { 7484 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7485 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7486 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7487 } 7488 %} 7489 ins_pipe( pipe_slow ); 7490 %} 7491 7492 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7493 predicate(UseAVX <= 2 && 7494 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7495 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7496 match(Set dst (VectorCastI2X src)); 7497 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7498 effect(TEMP dst, TEMP vtmp); 7499 ins_encode %{ 7500 assert(UseAVX > 0, "required"); 7501 7502 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7503 int vlen_enc = vector_length_encoding(this, $src); 7504 7505 if (to_elem_bt == T_BYTE) { 7506 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7507 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7508 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7509 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7510 } else { 7511 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7512 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7513 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7514 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7515 } 7516 %} 7517 ins_pipe( pipe_slow ); 7518 %} 7519 7520 instruct vcastItoX_evex(vec dst, vec src) %{ 7521 predicate(UseAVX > 2 || 7522 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7523 match(Set dst (VectorCastI2X src)); 7524 format %{ "vector_cast_i2x $dst,$src\t!" %} 7525 ins_encode %{ 7526 assert(UseAVX > 0, "required"); 7527 7528 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7529 int src_vlen_enc = vector_length_encoding(this, $src); 7530 int dst_vlen_enc = vector_length_encoding(this); 7531 switch (dst_elem_bt) { 7532 case T_BYTE: 7533 if (!VM_Version::supports_avx512vl()) { 7534 src_vlen_enc = Assembler::AVX_512bit; 7535 } 7536 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7537 break; 7538 case T_SHORT: 7539 if (!VM_Version::supports_avx512vl()) { 7540 src_vlen_enc = Assembler::AVX_512bit; 7541 } 7542 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7543 break; 7544 case T_FLOAT: 7545 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7546 break; 7547 case T_LONG: 7548 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7549 break; 7550 case T_DOUBLE: 7551 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7552 break; 7553 default: 7554 ShouldNotReachHere(); 7555 } 7556 %} 7557 ins_pipe( pipe_slow ); 7558 %} 7559 7560 instruct vcastLtoBS(vec dst, vec src) %{ 7561 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7562 UseAVX <= 2); 7563 match(Set dst (VectorCastL2X src)); 7564 format %{ "vector_cast_l2x $dst,$src" %} 7565 ins_encode %{ 7566 assert(UseAVX > 0, "required"); 7567 7568 int vlen = Matcher::vector_length_in_bytes(this, $src); 7569 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7570 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7571 : ExternalAddress(vector_int_to_short_mask()); 7572 if (vlen <= 16) { 7573 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7574 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7575 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7576 } else { 7577 assert(vlen <= 32, "required"); 7578 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7579 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7580 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7581 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7582 } 7583 if (to_elem_bt == T_BYTE) { 7584 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7585 } 7586 %} 7587 ins_pipe( pipe_slow ); 7588 %} 7589 7590 instruct vcastLtoX_evex(vec dst, vec src) %{ 7591 predicate(UseAVX > 2 || 7592 (Matcher::vector_element_basic_type(n) == T_INT || 7593 Matcher::vector_element_basic_type(n) == T_FLOAT || 7594 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7595 match(Set dst (VectorCastL2X src)); 7596 format %{ "vector_cast_l2x $dst,$src\t!" %} 7597 ins_encode %{ 7598 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7599 int vlen = Matcher::vector_length_in_bytes(this, $src); 7600 int vlen_enc = vector_length_encoding(this, $src); 7601 switch (to_elem_bt) { 7602 case T_BYTE: 7603 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7604 vlen_enc = Assembler::AVX_512bit; 7605 } 7606 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7607 break; 7608 case T_SHORT: 7609 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7610 vlen_enc = Assembler::AVX_512bit; 7611 } 7612 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7613 break; 7614 case T_INT: 7615 if (vlen == 8) { 7616 if ($dst$$XMMRegister != $src$$XMMRegister) { 7617 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7618 } 7619 } else if (vlen == 16) { 7620 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7621 } else if (vlen == 32) { 7622 if (UseAVX > 2) { 7623 if (!VM_Version::supports_avx512vl()) { 7624 vlen_enc = Assembler::AVX_512bit; 7625 } 7626 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7627 } else { 7628 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7629 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7630 } 7631 } else { // vlen == 64 7632 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7633 } 7634 break; 7635 case T_FLOAT: 7636 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7637 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7638 break; 7639 case T_DOUBLE: 7640 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7641 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7642 break; 7643 7644 default: assert(false, "%s", type2name(to_elem_bt)); 7645 } 7646 %} 7647 ins_pipe( pipe_slow ); 7648 %} 7649 7650 instruct vcastFtoD_reg(vec dst, vec src) %{ 7651 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7652 match(Set dst (VectorCastF2X src)); 7653 format %{ "vector_cast_f2d $dst,$src\t!" %} 7654 ins_encode %{ 7655 int vlen_enc = vector_length_encoding(this); 7656 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7657 %} 7658 ins_pipe( pipe_slow ); 7659 %} 7660 7661 7662 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7663 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7664 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7665 match(Set dst (VectorCastF2X src)); 7666 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7667 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7668 ins_encode %{ 7669 int vlen_enc = vector_length_encoding(this, $src); 7670 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7671 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7672 // 32 bit addresses for register indirect addressing mode since stub constants 7673 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7674 // However, targets are free to increase this limit, but having a large code cache size 7675 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7676 // cap we save a temporary register allocation which in limiting case can prevent 7677 // spilling in high register pressure blocks. 7678 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7679 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7680 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7681 %} 7682 ins_pipe( pipe_slow ); 7683 %} 7684 7685 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7686 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7687 is_integral_type(Matcher::vector_element_basic_type(n))); 7688 match(Set dst (VectorCastF2X src)); 7689 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7690 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7691 ins_encode %{ 7692 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7693 if (to_elem_bt == T_LONG) { 7694 int vlen_enc = vector_length_encoding(this); 7695 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7696 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7697 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7698 } else { 7699 int vlen_enc = vector_length_encoding(this, $src); 7700 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7701 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7702 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7703 } 7704 %} 7705 ins_pipe( pipe_slow ); 7706 %} 7707 7708 instruct vcastDtoF_reg(vec dst, vec src) %{ 7709 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7710 match(Set dst (VectorCastD2X src)); 7711 format %{ "vector_cast_d2x $dst,$src\t!" %} 7712 ins_encode %{ 7713 int vlen_enc = vector_length_encoding(this, $src); 7714 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7715 %} 7716 ins_pipe( pipe_slow ); 7717 %} 7718 7719 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7720 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7721 is_integral_type(Matcher::vector_element_basic_type(n))); 7722 match(Set dst (VectorCastD2X src)); 7723 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7724 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7725 ins_encode %{ 7726 int vlen_enc = vector_length_encoding(this, $src); 7727 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7728 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7729 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7730 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7731 %} 7732 ins_pipe( pipe_slow ); 7733 %} 7734 7735 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7736 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7737 is_integral_type(Matcher::vector_element_basic_type(n))); 7738 match(Set dst (VectorCastD2X src)); 7739 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7740 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7741 ins_encode %{ 7742 int vlen_enc = vector_length_encoding(this, $src); 7743 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7744 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7745 ExternalAddress(vector_float_signflip()); 7746 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7747 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7748 %} 7749 ins_pipe( pipe_slow ); 7750 %} 7751 7752 instruct vucast(vec dst, vec src) %{ 7753 match(Set dst (VectorUCastB2X src)); 7754 match(Set dst (VectorUCastS2X src)); 7755 match(Set dst (VectorUCastI2X src)); 7756 format %{ "vector_ucast $dst,$src\t!" %} 7757 ins_encode %{ 7758 assert(UseAVX > 0, "required"); 7759 7760 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7761 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7762 int vlen_enc = vector_length_encoding(this); 7763 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7764 %} 7765 ins_pipe( pipe_slow ); 7766 %} 7767 7768 #ifdef _LP64 7769 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7770 predicate(!VM_Version::supports_avx512vl() && 7771 Matcher::vector_length_in_bytes(n) < 64 && 7772 Matcher::vector_element_basic_type(n) == T_INT); 7773 match(Set dst (RoundVF src)); 7774 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7775 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7776 ins_encode %{ 7777 int vlen_enc = vector_length_encoding(this); 7778 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7779 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7780 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7781 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7782 %} 7783 ins_pipe( pipe_slow ); 7784 %} 7785 7786 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7787 predicate((VM_Version::supports_avx512vl() || 7788 Matcher::vector_length_in_bytes(n) == 64) && 7789 Matcher::vector_element_basic_type(n) == T_INT); 7790 match(Set dst (RoundVF src)); 7791 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7792 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7793 ins_encode %{ 7794 int vlen_enc = vector_length_encoding(this); 7795 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7796 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7797 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7798 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7799 %} 7800 ins_pipe( pipe_slow ); 7801 %} 7802 7803 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7804 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7805 match(Set dst (RoundVD src)); 7806 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7807 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7808 ins_encode %{ 7809 int vlen_enc = vector_length_encoding(this); 7810 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7811 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 7812 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 7813 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7814 %} 7815 ins_pipe( pipe_slow ); 7816 %} 7817 7818 #endif // _LP64 7819 7820 // --------------------------------- VectorMaskCmp -------------------------------------- 7821 7822 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7823 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7824 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7825 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7826 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7827 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7828 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7829 ins_encode %{ 7830 int vlen_enc = vector_length_encoding(this, $src1); 7831 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7832 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7833 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7834 } else { 7835 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7836 } 7837 %} 7838 ins_pipe( pipe_slow ); 7839 %} 7840 7841 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7842 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7843 n->bottom_type()->isa_vectmask() == nullptr && 7844 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7845 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7846 effect(TEMP ktmp); 7847 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7848 ins_encode %{ 7849 int vlen_enc = Assembler::AVX_512bit; 7850 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7851 KRegister mask = k0; // The comparison itself is not being masked. 7852 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7853 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7854 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7855 } else { 7856 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7857 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7858 } 7859 %} 7860 ins_pipe( pipe_slow ); 7861 %} 7862 7863 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7864 predicate(n->bottom_type()->isa_vectmask() && 7865 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7866 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7867 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7868 ins_encode %{ 7869 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7870 int vlen_enc = vector_length_encoding(this, $src1); 7871 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7872 KRegister mask = k0; // The comparison itself is not being masked. 7873 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7874 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7875 } else { 7876 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7877 } 7878 %} 7879 ins_pipe( pipe_slow ); 7880 %} 7881 7882 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7883 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7884 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7885 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7886 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7887 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7888 (n->in(2)->get_int() == BoolTest::eq || 7889 n->in(2)->get_int() == BoolTest::lt || 7890 n->in(2)->get_int() == BoolTest::gt)); // cond 7891 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7892 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7893 ins_encode %{ 7894 int vlen_enc = vector_length_encoding(this, $src1); 7895 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7896 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7897 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 7898 %} 7899 ins_pipe( pipe_slow ); 7900 %} 7901 7902 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7903 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7904 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7905 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7906 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7907 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7908 (n->in(2)->get_int() == BoolTest::ne || 7909 n->in(2)->get_int() == BoolTest::le || 7910 n->in(2)->get_int() == BoolTest::ge)); // cond 7911 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7912 effect(TEMP dst, TEMP xtmp); 7913 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7914 ins_encode %{ 7915 int vlen_enc = vector_length_encoding(this, $src1); 7916 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7917 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7918 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7919 %} 7920 ins_pipe( pipe_slow ); 7921 %} 7922 7923 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7924 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7925 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7926 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7927 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7928 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7929 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7930 effect(TEMP dst, TEMP xtmp); 7931 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7932 ins_encode %{ 7933 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 7934 int vlen_enc = vector_length_encoding(this, $src1); 7935 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7936 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7937 7938 if (vlen_enc == Assembler::AVX_128bit) { 7939 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7940 } else { 7941 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7942 } 7943 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7944 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7945 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7946 %} 7947 ins_pipe( pipe_slow ); 7948 %} 7949 7950 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7951 predicate((n->bottom_type()->isa_vectmask() == nullptr && 7952 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 7953 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7954 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7955 effect(TEMP ktmp); 7956 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7957 ins_encode %{ 7958 assert(UseAVX > 2, "required"); 7959 7960 int vlen_enc = vector_length_encoding(this, $src1); 7961 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7962 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7963 KRegister mask = k0; // The comparison itself is not being masked. 7964 bool merge = false; 7965 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7966 7967 switch (src1_elem_bt) { 7968 case T_INT: { 7969 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7970 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7971 break; 7972 } 7973 case T_LONG: { 7974 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7975 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7976 break; 7977 } 7978 default: assert(false, "%s", type2name(src1_elem_bt)); 7979 } 7980 %} 7981 ins_pipe( pipe_slow ); 7982 %} 7983 7984 7985 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 7986 predicate(n->bottom_type()->isa_vectmask() && 7987 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7988 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7989 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 7990 ins_encode %{ 7991 assert(UseAVX > 2, "required"); 7992 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7993 7994 int vlen_enc = vector_length_encoding(this, $src1); 7995 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7996 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7997 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7998 7999 // Comparison i 8000 switch (src1_elem_bt) { 8001 case T_BYTE: { 8002 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8003 break; 8004 } 8005 case T_SHORT: { 8006 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8007 break; 8008 } 8009 case T_INT: { 8010 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8011 break; 8012 } 8013 case T_LONG: { 8014 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8015 break; 8016 } 8017 default: assert(false, "%s", type2name(src1_elem_bt)); 8018 } 8019 %} 8020 ins_pipe( pipe_slow ); 8021 %} 8022 8023 // Extract 8024 8025 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 8026 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 8027 match(Set dst (ExtractI src idx)); 8028 match(Set dst (ExtractS src idx)); 8029 #ifdef _LP64 8030 match(Set dst (ExtractB src idx)); 8031 #endif 8032 format %{ "extractI $dst,$src,$idx\t!" %} 8033 ins_encode %{ 8034 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8035 8036 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8037 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8038 %} 8039 ins_pipe( pipe_slow ); 8040 %} 8041 8042 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 8043 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 8044 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 8045 match(Set dst (ExtractI src idx)); 8046 match(Set dst (ExtractS src idx)); 8047 #ifdef _LP64 8048 match(Set dst (ExtractB src idx)); 8049 #endif 8050 effect(TEMP vtmp); 8051 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 8052 ins_encode %{ 8053 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8054 8055 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8056 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8057 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 8058 %} 8059 ins_pipe( pipe_slow ); 8060 %} 8061 8062 #ifdef _LP64 8063 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 8064 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 8065 match(Set dst (ExtractL src idx)); 8066 format %{ "extractL $dst,$src,$idx\t!" %} 8067 ins_encode %{ 8068 assert(UseSSE >= 4, "required"); 8069 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8070 8071 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8072 %} 8073 ins_pipe( pipe_slow ); 8074 %} 8075 8076 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 8077 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8078 Matcher::vector_length(n->in(1)) == 8); // src 8079 match(Set dst (ExtractL src idx)); 8080 effect(TEMP vtmp); 8081 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 8082 ins_encode %{ 8083 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8084 8085 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8086 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 8087 %} 8088 ins_pipe( pipe_slow ); 8089 %} 8090 #endif 8091 8092 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8093 predicate(Matcher::vector_length(n->in(1)) <= 4); 8094 match(Set dst (ExtractF src idx)); 8095 effect(TEMP dst, TEMP vtmp); 8096 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8097 ins_encode %{ 8098 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8099 8100 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 8101 %} 8102 ins_pipe( pipe_slow ); 8103 %} 8104 8105 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8106 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 8107 Matcher::vector_length(n->in(1)/*src*/) == 16); 8108 match(Set dst (ExtractF src idx)); 8109 effect(TEMP vtmp); 8110 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8111 ins_encode %{ 8112 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8113 8114 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8115 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 8116 %} 8117 ins_pipe( pipe_slow ); 8118 %} 8119 8120 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 8121 predicate(Matcher::vector_length(n->in(1)) == 2); // src 8122 match(Set dst (ExtractD src idx)); 8123 format %{ "extractD $dst,$src,$idx\t!" %} 8124 ins_encode %{ 8125 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8126 8127 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8128 %} 8129 ins_pipe( pipe_slow ); 8130 %} 8131 8132 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 8133 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8134 Matcher::vector_length(n->in(1)) == 8); // src 8135 match(Set dst (ExtractD src idx)); 8136 effect(TEMP vtmp); 8137 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 8138 ins_encode %{ 8139 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8140 8141 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8142 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 8143 %} 8144 ins_pipe( pipe_slow ); 8145 %} 8146 8147 // --------------------------------- Vector Blend -------------------------------------- 8148 8149 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 8150 predicate(UseAVX == 0); 8151 match(Set dst (VectorBlend (Binary dst src) mask)); 8152 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 8153 effect(TEMP tmp); 8154 ins_encode %{ 8155 assert(UseSSE >= 4, "required"); 8156 8157 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 8158 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 8159 } 8160 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 8161 %} 8162 ins_pipe( pipe_slow ); 8163 %} 8164 8165 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8166 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8167 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8168 Matcher::vector_length_in_bytes(n) <= 32 && 8169 is_integral_type(Matcher::vector_element_basic_type(n))); 8170 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8171 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8172 ins_encode %{ 8173 int vlen_enc = vector_length_encoding(this); 8174 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8175 %} 8176 ins_pipe( pipe_slow ); 8177 %} 8178 8179 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8180 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8181 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8182 Matcher::vector_length_in_bytes(n) <= 32 && 8183 !is_integral_type(Matcher::vector_element_basic_type(n))); 8184 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8185 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8186 ins_encode %{ 8187 int vlen_enc = vector_length_encoding(this); 8188 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8189 %} 8190 ins_pipe( pipe_slow ); 8191 %} 8192 8193 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ 8194 predicate(UseAVX > 0 && EnableX86ECoreOpts && 8195 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8196 Matcher::vector_length_in_bytes(n) <= 32); 8197 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8198 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} 8199 effect(TEMP vtmp, TEMP dst); 8200 ins_encode %{ 8201 int vlen_enc = vector_length_encoding(this); 8202 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8203 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8204 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8205 %} 8206 ins_pipe( pipe_slow ); 8207 %} 8208 8209 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 8210 predicate(Matcher::vector_length_in_bytes(n) == 64 && 8211 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 8212 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8213 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8214 effect(TEMP ktmp); 8215 ins_encode %{ 8216 int vlen_enc = Assembler::AVX_512bit; 8217 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8218 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 8219 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8220 %} 8221 ins_pipe( pipe_slow ); 8222 %} 8223 8224 8225 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 8226 predicate(n->in(2)->bottom_type()->isa_vectmask() && 8227 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 8228 VM_Version::supports_avx512bw())); 8229 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8230 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8231 ins_encode %{ 8232 int vlen_enc = vector_length_encoding(this); 8233 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8234 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8235 %} 8236 ins_pipe( pipe_slow ); 8237 %} 8238 8239 // --------------------------------- ABS -------------------------------------- 8240 // a = |a| 8241 instruct vabsB_reg(vec dst, vec src) %{ 8242 match(Set dst (AbsVB src)); 8243 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 8244 ins_encode %{ 8245 uint vlen = Matcher::vector_length(this); 8246 if (vlen <= 16) { 8247 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8248 } else { 8249 int vlen_enc = vector_length_encoding(this); 8250 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8251 } 8252 %} 8253 ins_pipe( pipe_slow ); 8254 %} 8255 8256 instruct vabsS_reg(vec dst, vec src) %{ 8257 match(Set dst (AbsVS src)); 8258 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 8259 ins_encode %{ 8260 uint vlen = Matcher::vector_length(this); 8261 if (vlen <= 8) { 8262 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8263 } else { 8264 int vlen_enc = vector_length_encoding(this); 8265 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8266 } 8267 %} 8268 ins_pipe( pipe_slow ); 8269 %} 8270 8271 instruct vabsI_reg(vec dst, vec src) %{ 8272 match(Set dst (AbsVI src)); 8273 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 8274 ins_encode %{ 8275 uint vlen = Matcher::vector_length(this); 8276 if (vlen <= 4) { 8277 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8278 } else { 8279 int vlen_enc = vector_length_encoding(this); 8280 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8281 } 8282 %} 8283 ins_pipe( pipe_slow ); 8284 %} 8285 8286 instruct vabsL_reg(vec dst, vec src) %{ 8287 match(Set dst (AbsVL src)); 8288 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 8289 ins_encode %{ 8290 assert(UseAVX > 2, "required"); 8291 int vlen_enc = vector_length_encoding(this); 8292 if (!VM_Version::supports_avx512vl()) { 8293 vlen_enc = Assembler::AVX_512bit; 8294 } 8295 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8296 %} 8297 ins_pipe( pipe_slow ); 8298 %} 8299 8300 // --------------------------------- ABSNEG -------------------------------------- 8301 8302 instruct vabsnegF(vec dst, vec src) %{ 8303 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 8304 match(Set dst (AbsVF src)); 8305 match(Set dst (NegVF src)); 8306 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 8307 ins_cost(150); 8308 ins_encode %{ 8309 int opcode = this->ideal_Opcode(); 8310 int vlen = Matcher::vector_length(this); 8311 if (vlen == 2) { 8312 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8313 } else { 8314 assert(vlen == 8 || vlen == 16, "required"); 8315 int vlen_enc = vector_length_encoding(this); 8316 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8317 } 8318 %} 8319 ins_pipe( pipe_slow ); 8320 %} 8321 8322 instruct vabsneg4F(vec dst) %{ 8323 predicate(Matcher::vector_length(n) == 4); 8324 match(Set dst (AbsVF dst)); 8325 match(Set dst (NegVF dst)); 8326 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 8327 ins_cost(150); 8328 ins_encode %{ 8329 int opcode = this->ideal_Opcode(); 8330 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 8331 %} 8332 ins_pipe( pipe_slow ); 8333 %} 8334 8335 instruct vabsnegD(vec dst, vec src) %{ 8336 match(Set dst (AbsVD src)); 8337 match(Set dst (NegVD src)); 8338 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 8339 ins_encode %{ 8340 int opcode = this->ideal_Opcode(); 8341 uint vlen = Matcher::vector_length(this); 8342 if (vlen == 2) { 8343 assert(UseSSE >= 2, "required"); 8344 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8345 } else { 8346 int vlen_enc = vector_length_encoding(this); 8347 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8348 } 8349 %} 8350 ins_pipe( pipe_slow ); 8351 %} 8352 8353 //------------------------------------- VectorTest -------------------------------------------- 8354 8355 #ifdef _LP64 8356 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8357 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8358 match(Set cr (VectorTest src1 src2)); 8359 effect(TEMP vtmp); 8360 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8361 ins_encode %{ 8362 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8363 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8364 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8365 %} 8366 ins_pipe( pipe_slow ); 8367 %} 8368 8369 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8370 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8371 match(Set cr (VectorTest src1 src2)); 8372 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8373 ins_encode %{ 8374 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8375 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8376 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8377 %} 8378 ins_pipe( pipe_slow ); 8379 %} 8380 8381 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8382 predicate((Matcher::vector_length(n->in(1)) < 8 || 8383 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8384 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8385 match(Set cr (VectorTest src1 src2)); 8386 effect(TEMP tmp); 8387 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8388 ins_encode %{ 8389 uint masklen = Matcher::vector_length(this, $src1); 8390 __ kmovwl($tmp$$Register, $src1$$KRegister); 8391 __ andl($tmp$$Register, (1 << masklen) - 1); 8392 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8393 %} 8394 ins_pipe( pipe_slow ); 8395 %} 8396 8397 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8398 predicate((Matcher::vector_length(n->in(1)) < 8 || 8399 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8400 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8401 match(Set cr (VectorTest src1 src2)); 8402 effect(TEMP tmp); 8403 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8404 ins_encode %{ 8405 uint masklen = Matcher::vector_length(this, $src1); 8406 __ kmovwl($tmp$$Register, $src1$$KRegister); 8407 __ andl($tmp$$Register, (1 << masklen) - 1); 8408 %} 8409 ins_pipe( pipe_slow ); 8410 %} 8411 8412 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8413 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8414 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8415 match(Set cr (VectorTest src1 src2)); 8416 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8417 ins_encode %{ 8418 uint masklen = Matcher::vector_length(this, $src1); 8419 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8420 %} 8421 ins_pipe( pipe_slow ); 8422 %} 8423 #endif 8424 8425 //------------------------------------- LoadMask -------------------------------------------- 8426 8427 instruct loadMask(legVec dst, legVec src) %{ 8428 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8429 match(Set dst (VectorLoadMask src)); 8430 effect(TEMP dst); 8431 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8432 ins_encode %{ 8433 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8434 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8435 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8436 %} 8437 ins_pipe( pipe_slow ); 8438 %} 8439 8440 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8441 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8442 match(Set dst (VectorLoadMask src)); 8443 effect(TEMP xtmp); 8444 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8445 ins_encode %{ 8446 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8447 true, Assembler::AVX_512bit); 8448 %} 8449 ins_pipe( pipe_slow ); 8450 %} 8451 8452 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8453 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8454 match(Set dst (VectorLoadMask src)); 8455 effect(TEMP xtmp); 8456 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8457 ins_encode %{ 8458 int vlen_enc = vector_length_encoding(in(1)); 8459 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8460 false, vlen_enc); 8461 %} 8462 ins_pipe( pipe_slow ); 8463 %} 8464 8465 //------------------------------------- StoreMask -------------------------------------------- 8466 8467 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8468 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8469 match(Set dst (VectorStoreMask src size)); 8470 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8471 ins_encode %{ 8472 int vlen = Matcher::vector_length(this); 8473 if (vlen <= 16 && UseAVX <= 2) { 8474 assert(UseSSE >= 3, "required"); 8475 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8476 } else { 8477 assert(UseAVX > 0, "required"); 8478 int src_vlen_enc = vector_length_encoding(this, $src); 8479 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8480 } 8481 %} 8482 ins_pipe( pipe_slow ); 8483 %} 8484 8485 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8486 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8487 match(Set dst (VectorStoreMask src size)); 8488 effect(TEMP_DEF dst, TEMP xtmp); 8489 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8490 ins_encode %{ 8491 int vlen_enc = Assembler::AVX_128bit; 8492 int vlen = Matcher::vector_length(this); 8493 if (vlen <= 8) { 8494 assert(UseSSE >= 3, "required"); 8495 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8496 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8497 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8498 } else { 8499 assert(UseAVX > 0, "required"); 8500 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8501 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8502 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8503 } 8504 %} 8505 ins_pipe( pipe_slow ); 8506 %} 8507 8508 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8509 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8510 match(Set dst (VectorStoreMask src size)); 8511 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8512 effect(TEMP_DEF dst, TEMP xtmp); 8513 ins_encode %{ 8514 int vlen_enc = Assembler::AVX_128bit; 8515 int vlen = Matcher::vector_length(this); 8516 if (vlen <= 4) { 8517 assert(UseSSE >= 3, "required"); 8518 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8519 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8520 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8521 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8522 } else { 8523 assert(UseAVX > 0, "required"); 8524 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8525 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8526 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8527 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8528 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8529 } 8530 %} 8531 ins_pipe( pipe_slow ); 8532 %} 8533 8534 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8535 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8536 match(Set dst (VectorStoreMask src size)); 8537 effect(TEMP_DEF dst, TEMP xtmp); 8538 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8539 ins_encode %{ 8540 assert(UseSSE >= 3, "required"); 8541 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8542 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8543 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8544 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8545 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8546 %} 8547 ins_pipe( pipe_slow ); 8548 %} 8549 8550 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8551 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8552 match(Set dst (VectorStoreMask src size)); 8553 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8554 effect(TEMP_DEF dst, TEMP vtmp); 8555 ins_encode %{ 8556 int vlen_enc = Assembler::AVX_128bit; 8557 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8558 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8559 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8560 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8561 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8562 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8563 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8564 %} 8565 ins_pipe( pipe_slow ); 8566 %} 8567 8568 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8569 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8570 match(Set dst (VectorStoreMask src size)); 8571 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8572 ins_encode %{ 8573 int src_vlen_enc = vector_length_encoding(this, $src); 8574 int dst_vlen_enc = vector_length_encoding(this); 8575 if (!VM_Version::supports_avx512vl()) { 8576 src_vlen_enc = Assembler::AVX_512bit; 8577 } 8578 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8579 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8580 %} 8581 ins_pipe( pipe_slow ); 8582 %} 8583 8584 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8585 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8586 match(Set dst (VectorStoreMask src size)); 8587 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8588 ins_encode %{ 8589 int src_vlen_enc = vector_length_encoding(this, $src); 8590 int dst_vlen_enc = vector_length_encoding(this); 8591 if (!VM_Version::supports_avx512vl()) { 8592 src_vlen_enc = Assembler::AVX_512bit; 8593 } 8594 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8595 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8596 %} 8597 ins_pipe( pipe_slow ); 8598 %} 8599 8600 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8601 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8602 match(Set dst (VectorStoreMask mask size)); 8603 effect(TEMP_DEF dst); 8604 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8605 ins_encode %{ 8606 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8607 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8608 false, Assembler::AVX_512bit, noreg); 8609 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8610 %} 8611 ins_pipe( pipe_slow ); 8612 %} 8613 8614 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8615 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8616 match(Set dst (VectorStoreMask mask size)); 8617 effect(TEMP_DEF dst); 8618 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8619 ins_encode %{ 8620 int dst_vlen_enc = vector_length_encoding(this); 8621 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8622 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8623 %} 8624 ins_pipe( pipe_slow ); 8625 %} 8626 8627 instruct vmaskcast_evex(kReg dst) %{ 8628 match(Set dst (VectorMaskCast dst)); 8629 ins_cost(0); 8630 format %{ "vector_mask_cast $dst" %} 8631 ins_encode %{ 8632 // empty 8633 %} 8634 ins_pipe(empty); 8635 %} 8636 8637 instruct vmaskcast(vec dst) %{ 8638 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8639 match(Set dst (VectorMaskCast dst)); 8640 ins_cost(0); 8641 format %{ "vector_mask_cast $dst" %} 8642 ins_encode %{ 8643 // empty 8644 %} 8645 ins_pipe(empty); 8646 %} 8647 8648 instruct vmaskcast_avx(vec dst, vec src) %{ 8649 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8650 match(Set dst (VectorMaskCast src)); 8651 format %{ "vector_mask_cast $dst, $src" %} 8652 ins_encode %{ 8653 int vlen = Matcher::vector_length(this); 8654 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8655 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8656 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8657 %} 8658 ins_pipe(pipe_slow); 8659 %} 8660 8661 //-------------------------------- Load Iota Indices ---------------------------------- 8662 8663 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8664 match(Set dst (VectorLoadConst src)); 8665 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8666 ins_encode %{ 8667 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8668 BasicType bt = Matcher::vector_element_basic_type(this); 8669 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8670 %} 8671 ins_pipe( pipe_slow ); 8672 %} 8673 8674 #ifdef _LP64 8675 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8676 match(Set dst (PopulateIndex src1 src2)); 8677 effect(TEMP dst, TEMP vtmp); 8678 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8679 ins_encode %{ 8680 assert($src2$$constant == 1, "required"); 8681 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8682 int vlen_enc = vector_length_encoding(this); 8683 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8684 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8685 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8686 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8687 %} 8688 ins_pipe( pipe_slow ); 8689 %} 8690 8691 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8692 match(Set dst (PopulateIndex src1 src2)); 8693 effect(TEMP dst, TEMP vtmp); 8694 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8695 ins_encode %{ 8696 assert($src2$$constant == 1, "required"); 8697 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8698 int vlen_enc = vector_length_encoding(this); 8699 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8700 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8701 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8702 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8703 %} 8704 ins_pipe( pipe_slow ); 8705 %} 8706 #endif 8707 //-------------------------------- Rearrange ---------------------------------- 8708 8709 // LoadShuffle/Rearrange for Byte 8710 8711 instruct loadShuffleB(vec dst) %{ 8712 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 8713 match(Set dst (VectorLoadShuffle dst)); 8714 format %{ "vector_load_shuffle $dst, $dst" %} 8715 ins_encode %{ 8716 // empty 8717 %} 8718 ins_pipe( pipe_slow ); 8719 %} 8720 8721 instruct rearrangeB(vec dst, vec shuffle) %{ 8722 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8723 Matcher::vector_length(n) < 32); 8724 match(Set dst (VectorRearrange dst shuffle)); 8725 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8726 ins_encode %{ 8727 assert(UseSSE >= 4, "required"); 8728 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8729 %} 8730 ins_pipe( pipe_slow ); 8731 %} 8732 8733 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8734 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8735 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8736 match(Set dst (VectorRearrange src shuffle)); 8737 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8738 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8739 ins_encode %{ 8740 assert(UseAVX >= 2, "required"); 8741 // Swap src into vtmp1 8742 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8743 // Shuffle swapped src to get entries from other 128 bit lane 8744 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8745 // Shuffle original src to get entries from self 128 bit lane 8746 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8747 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8748 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8749 // Perform the blend 8750 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8751 %} 8752 ins_pipe( pipe_slow ); 8753 %} 8754 8755 8756 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8757 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8758 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8759 match(Set dst (VectorRearrange src shuffle)); 8760 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8761 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8762 ins_encode %{ 8763 int vlen_enc = vector_length_encoding(this); 8764 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8765 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8766 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8767 %} 8768 ins_pipe( pipe_slow ); 8769 %} 8770 8771 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8772 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8773 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8774 match(Set dst (VectorRearrange src shuffle)); 8775 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8776 ins_encode %{ 8777 int vlen_enc = vector_length_encoding(this); 8778 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8779 %} 8780 ins_pipe( pipe_slow ); 8781 %} 8782 8783 // LoadShuffle/Rearrange for Short 8784 8785 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8786 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8787 Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS 8788 match(Set dst (VectorLoadShuffle src)); 8789 effect(TEMP dst, TEMP vtmp); 8790 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8791 ins_encode %{ 8792 // Create a byte shuffle mask from short shuffle mask 8793 // only byte shuffle instruction available on these platforms 8794 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8795 if (UseAVX == 0) { 8796 assert(vlen_in_bytes <= 16, "required"); 8797 // Multiply each shuffle by two to get byte index 8798 __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); 8799 __ psllw($vtmp$$XMMRegister, 1); 8800 8801 // Duplicate to create 2 copies of byte index 8802 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8803 __ psllw($dst$$XMMRegister, 8); 8804 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8805 8806 // Add one to get alternate byte index 8807 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8808 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8809 } else { 8810 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8811 int vlen_enc = vector_length_encoding(this); 8812 // Multiply each shuffle by two to get byte index 8813 __ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8814 __ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8815 8816 // Duplicate to create 2 copies of byte index 8817 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8818 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8819 8820 // Add one to get alternate byte index 8821 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 8822 } 8823 %} 8824 ins_pipe( pipe_slow ); 8825 %} 8826 8827 instruct rearrangeS(vec dst, vec shuffle) %{ 8828 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8829 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8830 match(Set dst (VectorRearrange dst shuffle)); 8831 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8832 ins_encode %{ 8833 assert(UseSSE >= 4, "required"); 8834 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8835 %} 8836 ins_pipe( pipe_slow ); 8837 %} 8838 8839 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8840 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8841 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8842 match(Set dst (VectorRearrange src shuffle)); 8843 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8844 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8845 ins_encode %{ 8846 assert(UseAVX >= 2, "required"); 8847 // Swap src into vtmp1 8848 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8849 // Shuffle swapped src to get entries from other 128 bit lane 8850 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8851 // Shuffle original src to get entries from self 128 bit lane 8852 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8853 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8854 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8855 // Perform the blend 8856 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8857 %} 8858 ins_pipe( pipe_slow ); 8859 %} 8860 8861 instruct loadShuffleS_evex(vec dst, vec src) %{ 8862 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8863 VM_Version::supports_avx512bw()); 8864 match(Set dst (VectorLoadShuffle src)); 8865 format %{ "vector_load_shuffle $dst, $src" %} 8866 ins_encode %{ 8867 int vlen_enc = vector_length_encoding(this); 8868 if (!VM_Version::supports_avx512vl()) { 8869 vlen_enc = Assembler::AVX_512bit; 8870 } 8871 __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8872 %} 8873 ins_pipe( pipe_slow ); 8874 %} 8875 8876 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8877 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8878 VM_Version::supports_avx512bw()); 8879 match(Set dst (VectorRearrange src shuffle)); 8880 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8881 ins_encode %{ 8882 int vlen_enc = vector_length_encoding(this); 8883 if (!VM_Version::supports_avx512vl()) { 8884 vlen_enc = Assembler::AVX_512bit; 8885 } 8886 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8887 %} 8888 ins_pipe( pipe_slow ); 8889 %} 8890 8891 // LoadShuffle/Rearrange for Integer and Float 8892 8893 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 8894 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8895 Matcher::vector_length(n) == 4 && UseAVX == 0); 8896 match(Set dst (VectorLoadShuffle src)); 8897 effect(TEMP dst, TEMP vtmp); 8898 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8899 ins_encode %{ 8900 assert(UseSSE >= 4, "required"); 8901 8902 // Create a byte shuffle mask from int shuffle mask 8903 // only byte shuffle instruction available on these platforms 8904 8905 // Duplicate and multiply each shuffle by 4 8906 __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); 8907 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8908 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8909 __ psllw($vtmp$$XMMRegister, 2); 8910 8911 // Duplicate again to create 4 copies of byte index 8912 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8913 __ psllw($dst$$XMMRegister, 8); 8914 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 8915 8916 // Add 3,2,1,0 to get alternate byte index 8917 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 8918 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8919 %} 8920 ins_pipe( pipe_slow ); 8921 %} 8922 8923 instruct rearrangeI(vec dst, vec shuffle) %{ 8924 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8925 UseAVX == 0); 8926 match(Set dst (VectorRearrange dst shuffle)); 8927 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8928 ins_encode %{ 8929 assert(UseSSE >= 4, "required"); 8930 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8931 %} 8932 ins_pipe( pipe_slow ); 8933 %} 8934 8935 instruct loadShuffleI_avx(vec dst, vec src) %{ 8936 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8937 UseAVX > 0); 8938 match(Set dst (VectorLoadShuffle src)); 8939 format %{ "vector_load_shuffle $dst, $src" %} 8940 ins_encode %{ 8941 int vlen_enc = vector_length_encoding(this); 8942 __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8943 %} 8944 ins_pipe( pipe_slow ); 8945 %} 8946 8947 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 8948 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8949 UseAVX > 0); 8950 match(Set dst (VectorRearrange src shuffle)); 8951 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8952 ins_encode %{ 8953 int vlen_enc = vector_length_encoding(this); 8954 BasicType bt = Matcher::vector_element_basic_type(this); 8955 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8956 %} 8957 ins_pipe( pipe_slow ); 8958 %} 8959 8960 // LoadShuffle/Rearrange for Long and Double 8961 8962 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 8963 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8964 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8965 match(Set dst (VectorLoadShuffle src)); 8966 effect(TEMP dst, TEMP vtmp); 8967 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8968 ins_encode %{ 8969 assert(UseAVX >= 2, "required"); 8970 8971 int vlen_enc = vector_length_encoding(this); 8972 // Create a double word shuffle mask from long shuffle mask 8973 // only double word shuffle instruction available on these platforms 8974 8975 // Multiply each shuffle by two to get double word index 8976 __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8977 __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8978 8979 // Duplicate each double word shuffle 8980 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 8981 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8982 8983 // Add one to get alternate double word index 8984 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 8985 %} 8986 ins_pipe( pipe_slow ); 8987 %} 8988 8989 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 8990 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8991 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8992 match(Set dst (VectorRearrange src shuffle)); 8993 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8994 ins_encode %{ 8995 assert(UseAVX >= 2, "required"); 8996 8997 int vlen_enc = vector_length_encoding(this); 8998 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8999 %} 9000 ins_pipe( pipe_slow ); 9001 %} 9002 9003 instruct loadShuffleL_evex(vec dst, vec src) %{ 9004 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9005 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 9006 match(Set dst (VectorLoadShuffle src)); 9007 format %{ "vector_load_shuffle $dst, $src" %} 9008 ins_encode %{ 9009 assert(UseAVX > 2, "required"); 9010 9011 int vlen_enc = vector_length_encoding(this); 9012 __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9013 %} 9014 ins_pipe( pipe_slow ); 9015 %} 9016 9017 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 9018 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9019 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 9020 match(Set dst (VectorRearrange src shuffle)); 9021 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9022 ins_encode %{ 9023 assert(UseAVX > 2, "required"); 9024 9025 int vlen_enc = vector_length_encoding(this); 9026 if (vlen_enc == Assembler::AVX_128bit) { 9027 vlen_enc = Assembler::AVX_256bit; 9028 } 9029 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9030 %} 9031 ins_pipe( pipe_slow ); 9032 %} 9033 9034 // --------------------------------- FMA -------------------------------------- 9035 // a * b + c 9036 9037 instruct vfmaF_reg(vec a, vec b, vec c) %{ 9038 match(Set c (FmaVF c (Binary a b))); 9039 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9040 ins_cost(150); 9041 ins_encode %{ 9042 assert(UseFMA, "not enabled"); 9043 int vlen_enc = vector_length_encoding(this); 9044 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9045 %} 9046 ins_pipe( pipe_slow ); 9047 %} 9048 9049 instruct vfmaF_mem(vec a, memory b, vec c) %{ 9050 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9051 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9052 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9053 ins_cost(150); 9054 ins_encode %{ 9055 assert(UseFMA, "not enabled"); 9056 int vlen_enc = vector_length_encoding(this); 9057 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9058 %} 9059 ins_pipe( pipe_slow ); 9060 %} 9061 9062 instruct vfmaD_reg(vec a, vec b, vec c) %{ 9063 match(Set c (FmaVD c (Binary a b))); 9064 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9065 ins_cost(150); 9066 ins_encode %{ 9067 assert(UseFMA, "not enabled"); 9068 int vlen_enc = vector_length_encoding(this); 9069 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9070 %} 9071 ins_pipe( pipe_slow ); 9072 %} 9073 9074 instruct vfmaD_mem(vec a, memory b, vec c) %{ 9075 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9076 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9077 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9078 ins_cost(150); 9079 ins_encode %{ 9080 assert(UseFMA, "not enabled"); 9081 int vlen_enc = vector_length_encoding(this); 9082 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9083 %} 9084 ins_pipe( pipe_slow ); 9085 %} 9086 9087 // --------------------------------- Vector Multiply Add -------------------------------------- 9088 9089 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 9090 predicate(UseAVX == 0); 9091 match(Set dst (MulAddVS2VI dst src1)); 9092 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 9093 ins_encode %{ 9094 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9095 %} 9096 ins_pipe( pipe_slow ); 9097 %} 9098 9099 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 9100 predicate(UseAVX > 0); 9101 match(Set dst (MulAddVS2VI src1 src2)); 9102 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 9103 ins_encode %{ 9104 int vlen_enc = vector_length_encoding(this); 9105 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9106 %} 9107 ins_pipe( pipe_slow ); 9108 %} 9109 9110 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9111 9112 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 9113 predicate(VM_Version::supports_avx512_vnni()); 9114 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9115 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 9116 ins_encode %{ 9117 assert(UseAVX > 2, "required"); 9118 int vlen_enc = vector_length_encoding(this); 9119 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9120 %} 9121 ins_pipe( pipe_slow ); 9122 ins_cost(10); 9123 %} 9124 9125 // --------------------------------- PopCount -------------------------------------- 9126 9127 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 9128 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9129 match(Set dst (PopCountVI src)); 9130 match(Set dst (PopCountVL src)); 9131 format %{ "vector_popcount_integral $dst, $src" %} 9132 ins_encode %{ 9133 int opcode = this->ideal_Opcode(); 9134 int vlen_enc = vector_length_encoding(this, $src); 9135 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9136 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 9137 %} 9138 ins_pipe( pipe_slow ); 9139 %} 9140 9141 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9142 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9143 match(Set dst (PopCountVI src mask)); 9144 match(Set dst (PopCountVL src mask)); 9145 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 9146 ins_encode %{ 9147 int vlen_enc = vector_length_encoding(this, $src); 9148 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9149 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9150 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 9151 %} 9152 ins_pipe( pipe_slow ); 9153 %} 9154 9155 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 9156 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9157 match(Set dst (PopCountVI src)); 9158 match(Set dst (PopCountVL src)); 9159 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9160 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 9161 ins_encode %{ 9162 int opcode = this->ideal_Opcode(); 9163 int vlen_enc = vector_length_encoding(this, $src); 9164 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9165 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9166 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 9167 %} 9168 ins_pipe( pipe_slow ); 9169 %} 9170 9171 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 9172 9173 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 9174 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9175 Matcher::vector_length_in_bytes(n->in(1)))); 9176 match(Set dst (CountTrailingZerosV src)); 9177 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 9178 ins_cost(400); 9179 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 9180 ins_encode %{ 9181 int vlen_enc = vector_length_encoding(this, $src); 9182 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9183 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9184 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9185 %} 9186 ins_pipe( pipe_slow ); 9187 %} 9188 9189 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9190 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9191 VM_Version::supports_avx512cd() && 9192 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9193 match(Set dst (CountTrailingZerosV src)); 9194 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9195 ins_cost(400); 9196 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 9197 ins_encode %{ 9198 int vlen_enc = vector_length_encoding(this, $src); 9199 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9200 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9201 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9202 %} 9203 ins_pipe( pipe_slow ); 9204 %} 9205 9206 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 9207 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9208 match(Set dst (CountTrailingZerosV src)); 9209 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 9210 ins_cost(400); 9211 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 9212 ins_encode %{ 9213 int vlen_enc = vector_length_encoding(this, $src); 9214 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9215 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9216 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 9217 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 9218 %} 9219 ins_pipe( pipe_slow ); 9220 %} 9221 9222 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9223 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9224 match(Set dst (CountTrailingZerosV src)); 9225 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9226 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9227 ins_encode %{ 9228 int vlen_enc = vector_length_encoding(this, $src); 9229 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9230 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9231 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9232 %} 9233 ins_pipe( pipe_slow ); 9234 %} 9235 9236 9237 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 9238 9239 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 9240 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 9241 effect(TEMP dst); 9242 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9243 ins_encode %{ 9244 int vector_len = vector_length_encoding(this); 9245 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 9246 %} 9247 ins_pipe( pipe_slow ); 9248 %} 9249 9250 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 9251 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 9252 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 9253 effect(TEMP dst); 9254 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9255 ins_encode %{ 9256 int vector_len = vector_length_encoding(this); 9257 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 9258 %} 9259 ins_pipe( pipe_slow ); 9260 %} 9261 9262 // --------------------------------- Rotation Operations ---------------------------------- 9263 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 9264 match(Set dst (RotateLeftV src shift)); 9265 match(Set dst (RotateRightV src shift)); 9266 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 9267 ins_encode %{ 9268 int opcode = this->ideal_Opcode(); 9269 int vector_len = vector_length_encoding(this); 9270 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9271 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 9272 %} 9273 ins_pipe( pipe_slow ); 9274 %} 9275 9276 instruct vprorate(vec dst, vec src, vec shift) %{ 9277 match(Set dst (RotateLeftV src shift)); 9278 match(Set dst (RotateRightV src shift)); 9279 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 9280 ins_encode %{ 9281 int opcode = this->ideal_Opcode(); 9282 int vector_len = vector_length_encoding(this); 9283 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9284 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9285 %} 9286 ins_pipe( pipe_slow ); 9287 %} 9288 9289 // ---------------------------------- Masked Operations ------------------------------------ 9290 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 9291 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 9292 match(Set dst (LoadVectorMasked mem mask)); 9293 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9294 ins_encode %{ 9295 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9296 int vlen_enc = vector_length_encoding(this); 9297 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 9298 %} 9299 ins_pipe( pipe_slow ); 9300 %} 9301 9302 9303 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 9304 predicate(n->in(3)->bottom_type()->isa_vectmask()); 9305 match(Set dst (LoadVectorMasked mem mask)); 9306 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9307 ins_encode %{ 9308 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9309 int vector_len = vector_length_encoding(this); 9310 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 9311 %} 9312 ins_pipe( pipe_slow ); 9313 %} 9314 9315 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 9316 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9317 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9318 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9319 ins_encode %{ 9320 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9321 int vlen_enc = vector_length_encoding(src_node); 9322 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9323 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 9324 %} 9325 ins_pipe( pipe_slow ); 9326 %} 9327 9328 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 9329 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9330 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9331 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9332 ins_encode %{ 9333 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9334 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9335 int vlen_enc = vector_length_encoding(src_node); 9336 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 9337 %} 9338 ins_pipe( pipe_slow ); 9339 %} 9340 9341 #ifdef _LP64 9342 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{ 9343 match(Set addr (VerifyVectorAlignment addr mask)); 9344 effect(KILL cr); 9345 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %} 9346 ins_encode %{ 9347 Label Lskip; 9348 // check if masked bits of addr are zero 9349 __ testq($addr$$Register, $mask$$constant); 9350 __ jccb(Assembler::equal, Lskip); 9351 __ stop("verify_vector_alignment found a misaligned vector memory access"); 9352 __ bind(Lskip); 9353 %} 9354 ins_pipe(pipe_slow); 9355 %} 9356 9357 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9358 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9359 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9360 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9361 ins_encode %{ 9362 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9363 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9364 9365 Label DONE; 9366 int vlen_enc = vector_length_encoding(this, $src1); 9367 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9368 9369 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9370 __ mov64($dst$$Register, -1L); 9371 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9372 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9373 __ jccb(Assembler::carrySet, DONE); 9374 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9375 __ notq($dst$$Register); 9376 __ tzcntq($dst$$Register, $dst$$Register); 9377 __ bind(DONE); 9378 %} 9379 ins_pipe( pipe_slow ); 9380 %} 9381 9382 9383 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9384 match(Set dst (VectorMaskGen len)); 9385 effect(TEMP temp, KILL cr); 9386 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9387 ins_encode %{ 9388 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9389 %} 9390 ins_pipe( pipe_slow ); 9391 %} 9392 9393 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9394 match(Set dst (VectorMaskGen len)); 9395 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9396 effect(TEMP temp); 9397 ins_encode %{ 9398 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9399 __ kmovql($dst$$KRegister, $temp$$Register); 9400 %} 9401 ins_pipe( pipe_slow ); 9402 %} 9403 9404 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9405 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9406 match(Set dst (VectorMaskToLong mask)); 9407 effect(TEMP dst, KILL cr); 9408 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9409 ins_encode %{ 9410 int opcode = this->ideal_Opcode(); 9411 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9412 int mask_len = Matcher::vector_length(this, $mask); 9413 int mask_size = mask_len * type2aelembytes(mbt); 9414 int vlen_enc = vector_length_encoding(this, $mask); 9415 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9416 $dst$$Register, mask_len, mask_size, vlen_enc); 9417 %} 9418 ins_pipe( pipe_slow ); 9419 %} 9420 9421 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9422 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9423 match(Set dst (VectorMaskToLong mask)); 9424 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9425 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9426 ins_encode %{ 9427 int opcode = this->ideal_Opcode(); 9428 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9429 int mask_len = Matcher::vector_length(this, $mask); 9430 int vlen_enc = vector_length_encoding(this, $mask); 9431 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9432 $dst$$Register, mask_len, mbt, vlen_enc); 9433 %} 9434 ins_pipe( pipe_slow ); 9435 %} 9436 9437 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9438 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9439 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9440 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9441 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9442 ins_encode %{ 9443 int opcode = this->ideal_Opcode(); 9444 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9445 int mask_len = Matcher::vector_length(this, $mask); 9446 int vlen_enc = vector_length_encoding(this, $mask); 9447 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9448 $dst$$Register, mask_len, mbt, vlen_enc); 9449 %} 9450 ins_pipe( pipe_slow ); 9451 %} 9452 9453 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9454 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9455 match(Set dst (VectorMaskTrueCount mask)); 9456 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9457 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9458 ins_encode %{ 9459 int opcode = this->ideal_Opcode(); 9460 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9461 int mask_len = Matcher::vector_length(this, $mask); 9462 int mask_size = mask_len * type2aelembytes(mbt); 9463 int vlen_enc = vector_length_encoding(this, $mask); 9464 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9465 $tmp$$Register, mask_len, mask_size, vlen_enc); 9466 %} 9467 ins_pipe( pipe_slow ); 9468 %} 9469 9470 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9471 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9472 match(Set dst (VectorMaskTrueCount mask)); 9473 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9474 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9475 ins_encode %{ 9476 int opcode = this->ideal_Opcode(); 9477 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9478 int mask_len = Matcher::vector_length(this, $mask); 9479 int vlen_enc = vector_length_encoding(this, $mask); 9480 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9481 $tmp$$Register, mask_len, mbt, vlen_enc); 9482 %} 9483 ins_pipe( pipe_slow ); 9484 %} 9485 9486 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9487 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9488 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9489 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9490 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9491 ins_encode %{ 9492 int opcode = this->ideal_Opcode(); 9493 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9494 int mask_len = Matcher::vector_length(this, $mask); 9495 int vlen_enc = vector_length_encoding(this, $mask); 9496 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9497 $tmp$$Register, mask_len, mbt, vlen_enc); 9498 %} 9499 ins_pipe( pipe_slow ); 9500 %} 9501 9502 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9503 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9504 match(Set dst (VectorMaskFirstTrue mask)); 9505 match(Set dst (VectorMaskLastTrue mask)); 9506 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9507 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9508 ins_encode %{ 9509 int opcode = this->ideal_Opcode(); 9510 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9511 int mask_len = Matcher::vector_length(this, $mask); 9512 int mask_size = mask_len * type2aelembytes(mbt); 9513 int vlen_enc = vector_length_encoding(this, $mask); 9514 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9515 $tmp$$Register, mask_len, mask_size, vlen_enc); 9516 %} 9517 ins_pipe( pipe_slow ); 9518 %} 9519 9520 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9521 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9522 match(Set dst (VectorMaskFirstTrue mask)); 9523 match(Set dst (VectorMaskLastTrue mask)); 9524 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9525 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9526 ins_encode %{ 9527 int opcode = this->ideal_Opcode(); 9528 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9529 int mask_len = Matcher::vector_length(this, $mask); 9530 int vlen_enc = vector_length_encoding(this, $mask); 9531 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9532 $tmp$$Register, mask_len, mbt, vlen_enc); 9533 %} 9534 ins_pipe( pipe_slow ); 9535 %} 9536 9537 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9538 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9539 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9540 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9541 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9542 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9543 ins_encode %{ 9544 int opcode = this->ideal_Opcode(); 9545 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9546 int mask_len = Matcher::vector_length(this, $mask); 9547 int vlen_enc = vector_length_encoding(this, $mask); 9548 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9549 $tmp$$Register, mask_len, mbt, vlen_enc); 9550 %} 9551 ins_pipe( pipe_slow ); 9552 %} 9553 9554 // --------------------------------- Compress/Expand Operations --------------------------- 9555 #ifdef _LP64 9556 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{ 9557 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 9558 match(Set dst (CompressV src mask)); 9559 match(Set dst (ExpandV src mask)); 9560 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr); 9561 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %} 9562 ins_encode %{ 9563 int opcode = this->ideal_Opcode(); 9564 int vlen_enc = vector_length_encoding(this); 9565 BasicType bt = Matcher::vector_element_basic_type(this); 9566 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register, 9567 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc); 9568 %} 9569 ins_pipe( pipe_slow ); 9570 %} 9571 #endif 9572 9573 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9574 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 9575 match(Set dst (CompressV src mask)); 9576 match(Set dst (ExpandV src mask)); 9577 format %{ "vector_compress_expand $dst, $src, $mask" %} 9578 ins_encode %{ 9579 int opcode = this->ideal_Opcode(); 9580 int vector_len = vector_length_encoding(this); 9581 BasicType bt = Matcher::vector_element_basic_type(this); 9582 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9583 %} 9584 ins_pipe( pipe_slow ); 9585 %} 9586 9587 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9588 match(Set dst (CompressM mask)); 9589 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9590 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9591 ins_encode %{ 9592 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9593 int mask_len = Matcher::vector_length(this); 9594 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9595 %} 9596 ins_pipe( pipe_slow ); 9597 %} 9598 9599 #endif // _LP64 9600 9601 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9602 9603 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9604 predicate(!VM_Version::supports_gfni()); 9605 match(Set dst (ReverseV src)); 9606 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9607 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9608 ins_encode %{ 9609 int vec_enc = vector_length_encoding(this); 9610 BasicType bt = Matcher::vector_element_basic_type(this); 9611 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9612 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9613 %} 9614 ins_pipe( pipe_slow ); 9615 %} 9616 9617 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9618 predicate(VM_Version::supports_gfni()); 9619 match(Set dst (ReverseV src)); 9620 effect(TEMP dst, TEMP xtmp); 9621 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9622 ins_encode %{ 9623 int vec_enc = vector_length_encoding(this); 9624 BasicType bt = Matcher::vector_element_basic_type(this); 9625 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1)); 9626 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9627 $xtmp$$XMMRegister); 9628 %} 9629 ins_pipe( pipe_slow ); 9630 %} 9631 9632 instruct vreverse_byte_reg(vec dst, vec src) %{ 9633 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9634 match(Set dst (ReverseBytesV src)); 9635 effect(TEMP dst); 9636 format %{ "vector_reverse_byte $dst, $src" %} 9637 ins_encode %{ 9638 int vec_enc = vector_length_encoding(this); 9639 BasicType bt = Matcher::vector_element_basic_type(this); 9640 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9641 %} 9642 ins_pipe( pipe_slow ); 9643 %} 9644 9645 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9646 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9647 match(Set dst (ReverseBytesV src)); 9648 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9649 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9650 ins_encode %{ 9651 int vec_enc = vector_length_encoding(this); 9652 BasicType bt = Matcher::vector_element_basic_type(this); 9653 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9654 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9655 %} 9656 ins_pipe( pipe_slow ); 9657 %} 9658 9659 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9660 9661 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9662 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9663 Matcher::vector_length_in_bytes(n->in(1)))); 9664 match(Set dst (CountLeadingZerosV src)); 9665 format %{ "vector_count_leading_zeros $dst, $src" %} 9666 ins_encode %{ 9667 int vlen_enc = vector_length_encoding(this, $src); 9668 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9669 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9670 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9671 %} 9672 ins_pipe( pipe_slow ); 9673 %} 9674 9675 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9676 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9677 Matcher::vector_length_in_bytes(n->in(1)))); 9678 match(Set dst (CountLeadingZerosV src mask)); 9679 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9680 ins_encode %{ 9681 int vlen_enc = vector_length_encoding(this, $src); 9682 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9683 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9684 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9685 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9686 %} 9687 ins_pipe( pipe_slow ); 9688 %} 9689 9690 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9691 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9692 VM_Version::supports_avx512cd() && 9693 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9694 match(Set dst (CountLeadingZerosV src)); 9695 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9696 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9697 ins_encode %{ 9698 int vlen_enc = vector_length_encoding(this, $src); 9699 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9700 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9701 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9702 %} 9703 ins_pipe( pipe_slow ); 9704 %} 9705 9706 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9707 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9708 match(Set dst (CountLeadingZerosV src)); 9709 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9710 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9711 ins_encode %{ 9712 int vlen_enc = vector_length_encoding(this, $src); 9713 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9714 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9715 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9716 $rtmp$$Register, true, vlen_enc); 9717 %} 9718 ins_pipe( pipe_slow ); 9719 %} 9720 9721 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9722 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9723 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9724 match(Set dst (CountLeadingZerosV src)); 9725 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9726 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9727 ins_encode %{ 9728 int vlen_enc = vector_length_encoding(this, $src); 9729 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9730 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9731 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9732 %} 9733 ins_pipe( pipe_slow ); 9734 %} 9735 9736 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9737 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9738 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9739 match(Set dst (CountLeadingZerosV src)); 9740 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9741 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9742 ins_encode %{ 9743 int vlen_enc = vector_length_encoding(this, $src); 9744 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9745 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9746 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9747 %} 9748 ins_pipe( pipe_slow ); 9749 %} 9750 9751 // ---------------------------------- Vector Masked Operations ------------------------------------ 9752 9753 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9754 match(Set dst (AddVB (Binary dst src2) mask)); 9755 match(Set dst (AddVS (Binary dst src2) mask)); 9756 match(Set dst (AddVI (Binary dst src2) mask)); 9757 match(Set dst (AddVL (Binary dst src2) mask)); 9758 match(Set dst (AddVF (Binary dst src2) mask)); 9759 match(Set dst (AddVD (Binary dst src2) mask)); 9760 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9761 ins_encode %{ 9762 int vlen_enc = vector_length_encoding(this); 9763 BasicType bt = Matcher::vector_element_basic_type(this); 9764 int opc = this->ideal_Opcode(); 9765 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9766 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9767 %} 9768 ins_pipe( pipe_slow ); 9769 %} 9770 9771 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9772 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9773 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9774 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9775 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9776 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9777 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9778 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9779 ins_encode %{ 9780 int vlen_enc = vector_length_encoding(this); 9781 BasicType bt = Matcher::vector_element_basic_type(this); 9782 int opc = this->ideal_Opcode(); 9783 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9784 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9785 %} 9786 ins_pipe( pipe_slow ); 9787 %} 9788 9789 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9790 match(Set dst (XorV (Binary dst src2) mask)); 9791 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9792 ins_encode %{ 9793 int vlen_enc = vector_length_encoding(this); 9794 BasicType bt = Matcher::vector_element_basic_type(this); 9795 int opc = this->ideal_Opcode(); 9796 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9797 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9798 %} 9799 ins_pipe( pipe_slow ); 9800 %} 9801 9802 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9803 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9804 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9805 ins_encode %{ 9806 int vlen_enc = vector_length_encoding(this); 9807 BasicType bt = Matcher::vector_element_basic_type(this); 9808 int opc = this->ideal_Opcode(); 9809 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9810 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9811 %} 9812 ins_pipe( pipe_slow ); 9813 %} 9814 9815 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9816 match(Set dst (OrV (Binary dst src2) mask)); 9817 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9818 ins_encode %{ 9819 int vlen_enc = vector_length_encoding(this); 9820 BasicType bt = Matcher::vector_element_basic_type(this); 9821 int opc = this->ideal_Opcode(); 9822 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9823 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9824 %} 9825 ins_pipe( pipe_slow ); 9826 %} 9827 9828 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9829 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9830 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9831 ins_encode %{ 9832 int vlen_enc = vector_length_encoding(this); 9833 BasicType bt = Matcher::vector_element_basic_type(this); 9834 int opc = this->ideal_Opcode(); 9835 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9836 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9837 %} 9838 ins_pipe( pipe_slow ); 9839 %} 9840 9841 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9842 match(Set dst (AndV (Binary dst src2) mask)); 9843 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9844 ins_encode %{ 9845 int vlen_enc = vector_length_encoding(this); 9846 BasicType bt = Matcher::vector_element_basic_type(this); 9847 int opc = this->ideal_Opcode(); 9848 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9849 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9850 %} 9851 ins_pipe( pipe_slow ); 9852 %} 9853 9854 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9855 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9856 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9857 ins_encode %{ 9858 int vlen_enc = vector_length_encoding(this); 9859 BasicType bt = Matcher::vector_element_basic_type(this); 9860 int opc = this->ideal_Opcode(); 9861 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9862 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9863 %} 9864 ins_pipe( pipe_slow ); 9865 %} 9866 9867 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9868 match(Set dst (SubVB (Binary dst src2) mask)); 9869 match(Set dst (SubVS (Binary dst src2) mask)); 9870 match(Set dst (SubVI (Binary dst src2) mask)); 9871 match(Set dst (SubVL (Binary dst src2) mask)); 9872 match(Set dst (SubVF (Binary dst src2) mask)); 9873 match(Set dst (SubVD (Binary dst src2) mask)); 9874 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9875 ins_encode %{ 9876 int vlen_enc = vector_length_encoding(this); 9877 BasicType bt = Matcher::vector_element_basic_type(this); 9878 int opc = this->ideal_Opcode(); 9879 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9880 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9881 %} 9882 ins_pipe( pipe_slow ); 9883 %} 9884 9885 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9886 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9887 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9888 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9889 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9890 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9891 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9892 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9893 ins_encode %{ 9894 int vlen_enc = vector_length_encoding(this); 9895 BasicType bt = Matcher::vector_element_basic_type(this); 9896 int opc = this->ideal_Opcode(); 9897 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9898 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9899 %} 9900 ins_pipe( pipe_slow ); 9901 %} 9902 9903 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9904 match(Set dst (MulVS (Binary dst src2) mask)); 9905 match(Set dst (MulVI (Binary dst src2) mask)); 9906 match(Set dst (MulVL (Binary dst src2) mask)); 9907 match(Set dst (MulVF (Binary dst src2) mask)); 9908 match(Set dst (MulVD (Binary dst src2) mask)); 9909 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9910 ins_encode %{ 9911 int vlen_enc = vector_length_encoding(this); 9912 BasicType bt = Matcher::vector_element_basic_type(this); 9913 int opc = this->ideal_Opcode(); 9914 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9915 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9916 %} 9917 ins_pipe( pipe_slow ); 9918 %} 9919 9920 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 9921 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 9922 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 9923 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 9924 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 9925 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 9926 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9927 ins_encode %{ 9928 int vlen_enc = vector_length_encoding(this); 9929 BasicType bt = Matcher::vector_element_basic_type(this); 9930 int opc = this->ideal_Opcode(); 9931 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9932 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9933 %} 9934 ins_pipe( pipe_slow ); 9935 %} 9936 9937 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 9938 match(Set dst (SqrtVF dst mask)); 9939 match(Set dst (SqrtVD dst mask)); 9940 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 9941 ins_encode %{ 9942 int vlen_enc = vector_length_encoding(this); 9943 BasicType bt = Matcher::vector_element_basic_type(this); 9944 int opc = this->ideal_Opcode(); 9945 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9946 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9947 %} 9948 ins_pipe( pipe_slow ); 9949 %} 9950 9951 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 9952 match(Set dst (DivVF (Binary dst src2) mask)); 9953 match(Set dst (DivVD (Binary dst src2) mask)); 9954 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9955 ins_encode %{ 9956 int vlen_enc = vector_length_encoding(this); 9957 BasicType bt = Matcher::vector_element_basic_type(this); 9958 int opc = this->ideal_Opcode(); 9959 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9960 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9961 %} 9962 ins_pipe( pipe_slow ); 9963 %} 9964 9965 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 9966 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 9967 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 9968 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9969 ins_encode %{ 9970 int vlen_enc = vector_length_encoding(this); 9971 BasicType bt = Matcher::vector_element_basic_type(this); 9972 int opc = this->ideal_Opcode(); 9973 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9974 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9975 %} 9976 ins_pipe( pipe_slow ); 9977 %} 9978 9979 9980 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9981 match(Set dst (RotateLeftV (Binary dst shift) mask)); 9982 match(Set dst (RotateRightV (Binary dst shift) mask)); 9983 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 9984 ins_encode %{ 9985 int vlen_enc = vector_length_encoding(this); 9986 BasicType bt = Matcher::vector_element_basic_type(this); 9987 int opc = this->ideal_Opcode(); 9988 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9989 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9990 %} 9991 ins_pipe( pipe_slow ); 9992 %} 9993 9994 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 9995 match(Set dst (RotateLeftV (Binary dst src2) mask)); 9996 match(Set dst (RotateRightV (Binary dst src2) mask)); 9997 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 9998 ins_encode %{ 9999 int vlen_enc = vector_length_encoding(this); 10000 BasicType bt = Matcher::vector_element_basic_type(this); 10001 int opc = this->ideal_Opcode(); 10002 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10003 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10004 %} 10005 ins_pipe( pipe_slow ); 10006 %} 10007 10008 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10009 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 10010 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 10011 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 10012 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 10013 ins_encode %{ 10014 int vlen_enc = vector_length_encoding(this); 10015 BasicType bt = Matcher::vector_element_basic_type(this); 10016 int opc = this->ideal_Opcode(); 10017 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10018 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10019 %} 10020 ins_pipe( pipe_slow ); 10021 %} 10022 10023 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10024 predicate(!n->as_ShiftV()->is_var_shift()); 10025 match(Set dst (LShiftVS (Binary dst src2) mask)); 10026 match(Set dst (LShiftVI (Binary dst src2) mask)); 10027 match(Set dst (LShiftVL (Binary dst src2) mask)); 10028 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10029 ins_encode %{ 10030 int vlen_enc = vector_length_encoding(this); 10031 BasicType bt = Matcher::vector_element_basic_type(this); 10032 int opc = this->ideal_Opcode(); 10033 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10034 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10035 %} 10036 ins_pipe( pipe_slow ); 10037 %} 10038 10039 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10040 predicate(n->as_ShiftV()->is_var_shift()); 10041 match(Set dst (LShiftVS (Binary dst src2) mask)); 10042 match(Set dst (LShiftVI (Binary dst src2) mask)); 10043 match(Set dst (LShiftVL (Binary dst src2) mask)); 10044 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10045 ins_encode %{ 10046 int vlen_enc = vector_length_encoding(this); 10047 BasicType bt = Matcher::vector_element_basic_type(this); 10048 int opc = this->ideal_Opcode(); 10049 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10050 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10051 %} 10052 ins_pipe( pipe_slow ); 10053 %} 10054 10055 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10056 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 10057 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 10058 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 10059 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 10060 ins_encode %{ 10061 int vlen_enc = vector_length_encoding(this); 10062 BasicType bt = Matcher::vector_element_basic_type(this); 10063 int opc = this->ideal_Opcode(); 10064 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10065 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10066 %} 10067 ins_pipe( pipe_slow ); 10068 %} 10069 10070 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10071 predicate(!n->as_ShiftV()->is_var_shift()); 10072 match(Set dst (RShiftVS (Binary dst src2) mask)); 10073 match(Set dst (RShiftVI (Binary dst src2) mask)); 10074 match(Set dst (RShiftVL (Binary dst src2) mask)); 10075 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10076 ins_encode %{ 10077 int vlen_enc = vector_length_encoding(this); 10078 BasicType bt = Matcher::vector_element_basic_type(this); 10079 int opc = this->ideal_Opcode(); 10080 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10081 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10082 %} 10083 ins_pipe( pipe_slow ); 10084 %} 10085 10086 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10087 predicate(n->as_ShiftV()->is_var_shift()); 10088 match(Set dst (RShiftVS (Binary dst src2) mask)); 10089 match(Set dst (RShiftVI (Binary dst src2) mask)); 10090 match(Set dst (RShiftVL (Binary dst src2) mask)); 10091 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10092 ins_encode %{ 10093 int vlen_enc = vector_length_encoding(this); 10094 BasicType bt = Matcher::vector_element_basic_type(this); 10095 int opc = this->ideal_Opcode(); 10096 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10097 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10098 %} 10099 ins_pipe( pipe_slow ); 10100 %} 10101 10102 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10103 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 10104 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 10105 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 10106 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 10107 ins_encode %{ 10108 int vlen_enc = vector_length_encoding(this); 10109 BasicType bt = Matcher::vector_element_basic_type(this); 10110 int opc = this->ideal_Opcode(); 10111 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10112 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10113 %} 10114 ins_pipe( pipe_slow ); 10115 %} 10116 10117 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10118 predicate(!n->as_ShiftV()->is_var_shift()); 10119 match(Set dst (URShiftVS (Binary dst src2) mask)); 10120 match(Set dst (URShiftVI (Binary dst src2) mask)); 10121 match(Set dst (URShiftVL (Binary dst src2) mask)); 10122 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10123 ins_encode %{ 10124 int vlen_enc = vector_length_encoding(this); 10125 BasicType bt = Matcher::vector_element_basic_type(this); 10126 int opc = this->ideal_Opcode(); 10127 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10128 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10129 %} 10130 ins_pipe( pipe_slow ); 10131 %} 10132 10133 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10134 predicate(n->as_ShiftV()->is_var_shift()); 10135 match(Set dst (URShiftVS (Binary dst src2) mask)); 10136 match(Set dst (URShiftVI (Binary dst src2) mask)); 10137 match(Set dst (URShiftVL (Binary dst src2) mask)); 10138 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10139 ins_encode %{ 10140 int vlen_enc = vector_length_encoding(this); 10141 BasicType bt = Matcher::vector_element_basic_type(this); 10142 int opc = this->ideal_Opcode(); 10143 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10144 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10145 %} 10146 ins_pipe( pipe_slow ); 10147 %} 10148 10149 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 10150 match(Set dst (MaxV (Binary dst src2) mask)); 10151 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10152 ins_encode %{ 10153 int vlen_enc = vector_length_encoding(this); 10154 BasicType bt = Matcher::vector_element_basic_type(this); 10155 int opc = this->ideal_Opcode(); 10156 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10157 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10158 %} 10159 ins_pipe( pipe_slow ); 10160 %} 10161 10162 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 10163 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 10164 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10165 ins_encode %{ 10166 int vlen_enc = vector_length_encoding(this); 10167 BasicType bt = Matcher::vector_element_basic_type(this); 10168 int opc = this->ideal_Opcode(); 10169 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10170 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10171 %} 10172 ins_pipe( pipe_slow ); 10173 %} 10174 10175 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 10176 match(Set dst (MinV (Binary dst src2) mask)); 10177 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10178 ins_encode %{ 10179 int vlen_enc = vector_length_encoding(this); 10180 BasicType bt = Matcher::vector_element_basic_type(this); 10181 int opc = this->ideal_Opcode(); 10182 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10183 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10184 %} 10185 ins_pipe( pipe_slow ); 10186 %} 10187 10188 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 10189 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 10190 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10191 ins_encode %{ 10192 int vlen_enc = vector_length_encoding(this); 10193 BasicType bt = Matcher::vector_element_basic_type(this); 10194 int opc = this->ideal_Opcode(); 10195 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10196 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10197 %} 10198 ins_pipe( pipe_slow ); 10199 %} 10200 10201 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 10202 match(Set dst (VectorRearrange (Binary dst src2) mask)); 10203 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 10204 ins_encode %{ 10205 int vlen_enc = vector_length_encoding(this); 10206 BasicType bt = Matcher::vector_element_basic_type(this); 10207 int opc = this->ideal_Opcode(); 10208 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10209 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10210 %} 10211 ins_pipe( pipe_slow ); 10212 %} 10213 10214 instruct vabs_masked(vec dst, kReg mask) %{ 10215 match(Set dst (AbsVB dst mask)); 10216 match(Set dst (AbsVS dst mask)); 10217 match(Set dst (AbsVI dst mask)); 10218 match(Set dst (AbsVL dst mask)); 10219 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 10220 ins_encode %{ 10221 int vlen_enc = vector_length_encoding(this); 10222 BasicType bt = Matcher::vector_element_basic_type(this); 10223 int opc = this->ideal_Opcode(); 10224 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10225 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10226 %} 10227 ins_pipe( pipe_slow ); 10228 %} 10229 10230 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 10231 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 10232 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 10233 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10234 ins_encode %{ 10235 assert(UseFMA, "Needs FMA instructions support."); 10236 int vlen_enc = vector_length_encoding(this); 10237 BasicType bt = Matcher::vector_element_basic_type(this); 10238 int opc = this->ideal_Opcode(); 10239 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10240 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 10241 %} 10242 ins_pipe( pipe_slow ); 10243 %} 10244 10245 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 10246 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 10247 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 10248 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10249 ins_encode %{ 10250 assert(UseFMA, "Needs FMA instructions support."); 10251 int vlen_enc = vector_length_encoding(this); 10252 BasicType bt = Matcher::vector_element_basic_type(this); 10253 int opc = this->ideal_Opcode(); 10254 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10255 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 10256 %} 10257 ins_pipe( pipe_slow ); 10258 %} 10259 10260 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 10261 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 10262 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 10263 ins_encode %{ 10264 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 10265 int vlen_enc = vector_length_encoding(this, $src1); 10266 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 10267 10268 // Comparison i 10269 switch (src1_elem_bt) { 10270 case T_BYTE: { 10271 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10272 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10273 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10274 break; 10275 } 10276 case T_SHORT: { 10277 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10278 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10279 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10280 break; 10281 } 10282 case T_INT: { 10283 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10284 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10285 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10286 break; 10287 } 10288 case T_LONG: { 10289 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10290 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10291 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10292 break; 10293 } 10294 case T_FLOAT: { 10295 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10296 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10297 break; 10298 } 10299 case T_DOUBLE: { 10300 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10301 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10302 break; 10303 } 10304 default: assert(false, "%s", type2name(src1_elem_bt)); break; 10305 } 10306 %} 10307 ins_pipe( pipe_slow ); 10308 %} 10309 10310 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 10311 predicate(Matcher::vector_length(n) <= 32); 10312 match(Set dst (MaskAll src)); 10313 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 10314 ins_encode %{ 10315 int mask_len = Matcher::vector_length(this); 10316 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 10317 %} 10318 ins_pipe( pipe_slow ); 10319 %} 10320 10321 #ifdef _LP64 10322 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10323 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10324 match(Set dst (XorVMask src (MaskAll cnt))); 10325 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10326 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10327 ins_encode %{ 10328 uint masklen = Matcher::vector_length(this); 10329 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10330 %} 10331 ins_pipe( pipe_slow ); 10332 %} 10333 10334 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10335 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10336 (Matcher::vector_length(n) == 16) || 10337 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10338 match(Set dst (XorVMask src (MaskAll cnt))); 10339 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10340 ins_encode %{ 10341 uint masklen = Matcher::vector_length(this); 10342 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10343 %} 10344 ins_pipe( pipe_slow ); 10345 %} 10346 10347 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10348 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10349 match(Set dst (VectorLongToMask src)); 10350 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10351 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10352 ins_encode %{ 10353 int mask_len = Matcher::vector_length(this); 10354 int vec_enc = vector_length_encoding(mask_len); 10355 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10356 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10357 %} 10358 ins_pipe( pipe_slow ); 10359 %} 10360 10361 10362 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10363 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10364 match(Set dst (VectorLongToMask src)); 10365 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10366 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10367 ins_encode %{ 10368 int mask_len = Matcher::vector_length(this); 10369 assert(mask_len <= 32, "invalid mask length"); 10370 int vec_enc = vector_length_encoding(mask_len); 10371 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10372 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10373 %} 10374 ins_pipe( pipe_slow ); 10375 %} 10376 10377 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10378 predicate(n->bottom_type()->isa_vectmask()); 10379 match(Set dst (VectorLongToMask src)); 10380 format %{ "long_to_mask_evex $dst, $src\t!" %} 10381 ins_encode %{ 10382 __ kmov($dst$$KRegister, $src$$Register); 10383 %} 10384 ins_pipe( pipe_slow ); 10385 %} 10386 #endif 10387 10388 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10389 match(Set dst (AndVMask src1 src2)); 10390 match(Set dst (OrVMask src1 src2)); 10391 match(Set dst (XorVMask src1 src2)); 10392 effect(TEMP kscratch); 10393 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10394 ins_encode %{ 10395 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10396 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10397 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal"); 10398 uint masklen = Matcher::vector_length(this); 10399 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10400 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10401 %} 10402 ins_pipe( pipe_slow ); 10403 %} 10404 10405 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10406 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10407 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10408 ins_encode %{ 10409 int vlen_enc = vector_length_encoding(this); 10410 BasicType bt = Matcher::vector_element_basic_type(this); 10411 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10412 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10413 %} 10414 ins_pipe( pipe_slow ); 10415 %} 10416 10417 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10418 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10419 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10420 ins_encode %{ 10421 int vlen_enc = vector_length_encoding(this); 10422 BasicType bt = Matcher::vector_element_basic_type(this); 10423 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10424 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10425 %} 10426 ins_pipe( pipe_slow ); 10427 %} 10428 10429 instruct castMM(kReg dst) 10430 %{ 10431 match(Set dst (CastVV dst)); 10432 10433 size(0); 10434 format %{ "# castVV of $dst" %} 10435 ins_encode(/* empty encoding */); 10436 ins_cost(0); 10437 ins_pipe(empty); 10438 %} 10439 10440 instruct castVV(vec dst) 10441 %{ 10442 match(Set dst (CastVV dst)); 10443 10444 size(0); 10445 format %{ "# castVV of $dst" %} 10446 ins_encode(/* empty encoding */); 10447 ins_cost(0); 10448 ins_pipe(empty); 10449 %} 10450 10451 instruct castVVLeg(legVec dst) 10452 %{ 10453 match(Set dst (CastVV dst)); 10454 10455 size(0); 10456 format %{ "# castVV of $dst" %} 10457 ins_encode(/* empty encoding */); 10458 ins_cost(0); 10459 ins_pipe(empty); 10460 %} 10461 10462 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10463 %{ 10464 match(Set dst (IsInfiniteF src)); 10465 effect(TEMP ktmp, KILL cr); 10466 format %{ "float_class_check $dst, $src" %} 10467 ins_encode %{ 10468 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10469 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10470 %} 10471 ins_pipe(pipe_slow); 10472 %} 10473 10474 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10475 %{ 10476 match(Set dst (IsInfiniteD src)); 10477 effect(TEMP ktmp, KILL cr); 10478 format %{ "double_class_check $dst, $src" %} 10479 ins_encode %{ 10480 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10481 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10482 %} 10483 ins_pipe(pipe_slow); 10484 %} 10485 10486 10487 instruct vector_selectfrom_twovectors_reg_evex(vec index, vec src1, vec src2) 10488 %{ 10489 match(Set index (SelectFromTwoVector (Binary index src1) src2)); 10490 format %{ "select_from_two_vector $index, $src1, $src2 \t!" %} 10491 ins_encode %{ 10492 int vlen_enc = vector_length_encoding(this); 10493 BasicType bt = Matcher::vector_element_basic_type(this); 10494 __ select_from_two_vectors_evex(bt, $index$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 10495 %} 10496 ins_pipe(pipe_slow); 10497 %}