1 // 2 // Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 // AVX3 Mask Registers. 632 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 633 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 634 635 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 636 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 637 638 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 639 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 640 641 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 642 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 643 644 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 645 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 646 647 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 648 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 649 650 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 651 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 652 653 654 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 655 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 656 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 657 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 658 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 659 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 660 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 661 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 662 #ifdef _LP64 663 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 664 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 665 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 666 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 667 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 668 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 669 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 670 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 671 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 672 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 673 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 674 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 675 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 676 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 677 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 678 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 679 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 680 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 681 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 682 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 683 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 684 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 685 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 686 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 687 #endif 688 ); 689 690 alloc_class chunk2(K7, K7_H, 691 K6, K6_H, 692 K5, K5_H, 693 K4, K4_H, 694 K3, K3_H, 695 K2, K2_H, 696 K1, K1_H); 697 698 reg_class vectmask_reg(K1, K1_H, 699 K2, K2_H, 700 K3, K3_H, 701 K4, K4_H, 702 K5, K5_H, 703 K6, K6_H, 704 K7, K7_H); 705 706 reg_class vectmask_reg_K1(K1, K1_H); 707 reg_class vectmask_reg_K2(K2, K2_H); 708 reg_class vectmask_reg_K3(K3, K3_H); 709 reg_class vectmask_reg_K4(K4, K4_H); 710 reg_class vectmask_reg_K5(K5, K5_H); 711 reg_class vectmask_reg_K6(K6, K6_H); 712 reg_class vectmask_reg_K7(K7, K7_H); 713 714 // flags allocation class should be last. 715 alloc_class chunk3(RFLAGS); 716 717 718 // Singleton class for condition codes 719 reg_class int_flags(RFLAGS); 720 721 // Class for pre evex float registers 722 reg_class float_reg_legacy(XMM0, 723 XMM1, 724 XMM2, 725 XMM3, 726 XMM4, 727 XMM5, 728 XMM6, 729 XMM7 730 #ifdef _LP64 731 ,XMM8, 732 XMM9, 733 XMM10, 734 XMM11, 735 XMM12, 736 XMM13, 737 XMM14, 738 XMM15 739 #endif 740 ); 741 742 // Class for evex float registers 743 reg_class float_reg_evex(XMM0, 744 XMM1, 745 XMM2, 746 XMM3, 747 XMM4, 748 XMM5, 749 XMM6, 750 XMM7 751 #ifdef _LP64 752 ,XMM8, 753 XMM9, 754 XMM10, 755 XMM11, 756 XMM12, 757 XMM13, 758 XMM14, 759 XMM15, 760 XMM16, 761 XMM17, 762 XMM18, 763 XMM19, 764 XMM20, 765 XMM21, 766 XMM22, 767 XMM23, 768 XMM24, 769 XMM25, 770 XMM26, 771 XMM27, 772 XMM28, 773 XMM29, 774 XMM30, 775 XMM31 776 #endif 777 ); 778 779 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 780 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 781 782 // Class for pre evex double registers 783 reg_class double_reg_legacy(XMM0, XMM0b, 784 XMM1, XMM1b, 785 XMM2, XMM2b, 786 XMM3, XMM3b, 787 XMM4, XMM4b, 788 XMM5, XMM5b, 789 XMM6, XMM6b, 790 XMM7, XMM7b 791 #ifdef _LP64 792 ,XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b 800 #endif 801 ); 802 803 // Class for evex double registers 804 reg_class double_reg_evex(XMM0, XMM0b, 805 XMM1, XMM1b, 806 XMM2, XMM2b, 807 XMM3, XMM3b, 808 XMM4, XMM4b, 809 XMM5, XMM5b, 810 XMM6, XMM6b, 811 XMM7, XMM7b 812 #ifdef _LP64 813 ,XMM8, XMM8b, 814 XMM9, XMM9b, 815 XMM10, XMM10b, 816 XMM11, XMM11b, 817 XMM12, XMM12b, 818 XMM13, XMM13b, 819 XMM14, XMM14b, 820 XMM15, XMM15b, 821 XMM16, XMM16b, 822 XMM17, XMM17b, 823 XMM18, XMM18b, 824 XMM19, XMM19b, 825 XMM20, XMM20b, 826 XMM21, XMM21b, 827 XMM22, XMM22b, 828 XMM23, XMM23b, 829 XMM24, XMM24b, 830 XMM25, XMM25b, 831 XMM26, XMM26b, 832 XMM27, XMM27b, 833 XMM28, XMM28b, 834 XMM29, XMM29b, 835 XMM30, XMM30b, 836 XMM31, XMM31b 837 #endif 838 ); 839 840 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 841 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 842 843 // Class for pre evex 32bit vector registers 844 reg_class vectors_reg_legacy(XMM0, 845 XMM1, 846 XMM2, 847 XMM3, 848 XMM4, 849 XMM5, 850 XMM6, 851 XMM7 852 #ifdef _LP64 853 ,XMM8, 854 XMM9, 855 XMM10, 856 XMM11, 857 XMM12, 858 XMM13, 859 XMM14, 860 XMM15 861 #endif 862 ); 863 864 // Class for evex 32bit vector registers 865 reg_class vectors_reg_evex(XMM0, 866 XMM1, 867 XMM2, 868 XMM3, 869 XMM4, 870 XMM5, 871 XMM6, 872 XMM7 873 #ifdef _LP64 874 ,XMM8, 875 XMM9, 876 XMM10, 877 XMM11, 878 XMM12, 879 XMM13, 880 XMM14, 881 XMM15, 882 XMM16, 883 XMM17, 884 XMM18, 885 XMM19, 886 XMM20, 887 XMM21, 888 XMM22, 889 XMM23, 890 XMM24, 891 XMM25, 892 XMM26, 893 XMM27, 894 XMM28, 895 XMM29, 896 XMM30, 897 XMM31 898 #endif 899 ); 900 901 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 902 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 903 904 // Class for all 64bit vector registers 905 reg_class vectord_reg_legacy(XMM0, XMM0b, 906 XMM1, XMM1b, 907 XMM2, XMM2b, 908 XMM3, XMM3b, 909 XMM4, XMM4b, 910 XMM5, XMM5b, 911 XMM6, XMM6b, 912 XMM7, XMM7b 913 #ifdef _LP64 914 ,XMM8, XMM8b, 915 XMM9, XMM9b, 916 XMM10, XMM10b, 917 XMM11, XMM11b, 918 XMM12, XMM12b, 919 XMM13, XMM13b, 920 XMM14, XMM14b, 921 XMM15, XMM15b 922 #endif 923 ); 924 925 // Class for all 64bit vector registers 926 reg_class vectord_reg_evex(XMM0, XMM0b, 927 XMM1, XMM1b, 928 XMM2, XMM2b, 929 XMM3, XMM3b, 930 XMM4, XMM4b, 931 XMM5, XMM5b, 932 XMM6, XMM6b, 933 XMM7, XMM7b 934 #ifdef _LP64 935 ,XMM8, XMM8b, 936 XMM9, XMM9b, 937 XMM10, XMM10b, 938 XMM11, XMM11b, 939 XMM12, XMM12b, 940 XMM13, XMM13b, 941 XMM14, XMM14b, 942 XMM15, XMM15b, 943 XMM16, XMM16b, 944 XMM17, XMM17b, 945 XMM18, XMM18b, 946 XMM19, XMM19b, 947 XMM20, XMM20b, 948 XMM21, XMM21b, 949 XMM22, XMM22b, 950 XMM23, XMM23b, 951 XMM24, XMM24b, 952 XMM25, XMM25b, 953 XMM26, XMM26b, 954 XMM27, XMM27b, 955 XMM28, XMM28b, 956 XMM29, XMM29b, 957 XMM30, XMM30b, 958 XMM31, XMM31b 959 #endif 960 ); 961 962 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 963 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 964 965 // Class for all 128bit vector registers 966 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 967 XMM1, XMM1b, XMM1c, XMM1d, 968 XMM2, XMM2b, XMM2c, XMM2d, 969 XMM3, XMM3b, XMM3c, XMM3d, 970 XMM4, XMM4b, XMM4c, XMM4d, 971 XMM5, XMM5b, XMM5c, XMM5d, 972 XMM6, XMM6b, XMM6c, XMM6d, 973 XMM7, XMM7b, XMM7c, XMM7d 974 #ifdef _LP64 975 ,XMM8, XMM8b, XMM8c, XMM8d, 976 XMM9, XMM9b, XMM9c, XMM9d, 977 XMM10, XMM10b, XMM10c, XMM10d, 978 XMM11, XMM11b, XMM11c, XMM11d, 979 XMM12, XMM12b, XMM12c, XMM12d, 980 XMM13, XMM13b, XMM13c, XMM13d, 981 XMM14, XMM14b, XMM14c, XMM14d, 982 XMM15, XMM15b, XMM15c, XMM15d 983 #endif 984 ); 985 986 // Class for all 128bit vector registers 987 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 988 XMM1, XMM1b, XMM1c, XMM1d, 989 XMM2, XMM2b, XMM2c, XMM2d, 990 XMM3, XMM3b, XMM3c, XMM3d, 991 XMM4, XMM4b, XMM4c, XMM4d, 992 XMM5, XMM5b, XMM5c, XMM5d, 993 XMM6, XMM6b, XMM6c, XMM6d, 994 XMM7, XMM7b, XMM7c, XMM7d 995 #ifdef _LP64 996 ,XMM8, XMM8b, XMM8c, XMM8d, 997 XMM9, XMM9b, XMM9c, XMM9d, 998 XMM10, XMM10b, XMM10c, XMM10d, 999 XMM11, XMM11b, XMM11c, XMM11d, 1000 XMM12, XMM12b, XMM12c, XMM12d, 1001 XMM13, XMM13b, XMM13c, XMM13d, 1002 XMM14, XMM14b, XMM14c, XMM14d, 1003 XMM15, XMM15b, XMM15c, XMM15d, 1004 XMM16, XMM16b, XMM16c, XMM16d, 1005 XMM17, XMM17b, XMM17c, XMM17d, 1006 XMM18, XMM18b, XMM18c, XMM18d, 1007 XMM19, XMM19b, XMM19c, XMM19d, 1008 XMM20, XMM20b, XMM20c, XMM20d, 1009 XMM21, XMM21b, XMM21c, XMM21d, 1010 XMM22, XMM22b, XMM22c, XMM22d, 1011 XMM23, XMM23b, XMM23c, XMM23d, 1012 XMM24, XMM24b, XMM24c, XMM24d, 1013 XMM25, XMM25b, XMM25c, XMM25d, 1014 XMM26, XMM26b, XMM26c, XMM26d, 1015 XMM27, XMM27b, XMM27c, XMM27d, 1016 XMM28, XMM28b, XMM28c, XMM28d, 1017 XMM29, XMM29b, XMM29c, XMM29d, 1018 XMM30, XMM30b, XMM30c, XMM30d, 1019 XMM31, XMM31b, XMM31c, XMM31d 1020 #endif 1021 ); 1022 1023 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1024 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1025 1026 // Class for all 256bit vector registers 1027 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1028 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1029 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1030 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1031 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1032 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1033 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1034 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1035 #ifdef _LP64 1036 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1037 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1038 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1039 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1040 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1041 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1042 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1043 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1044 #endif 1045 ); 1046 1047 // Class for all 256bit vector registers 1048 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1049 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1050 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1051 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1052 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1053 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1054 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1055 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1056 #ifdef _LP64 1057 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1058 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1059 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1060 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1061 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1062 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1063 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1064 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1065 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1066 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1067 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1068 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1069 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1070 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1071 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1072 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1073 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1074 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1075 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1076 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1077 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1078 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1079 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1080 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1081 #endif 1082 ); 1083 1084 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1085 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1086 1087 // Class for all 512bit vector registers 1088 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1089 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1090 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1091 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1092 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1093 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1094 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1095 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1096 #ifdef _LP64 1097 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1098 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1099 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1100 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1101 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1102 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1103 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1104 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1105 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1106 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1107 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1108 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1109 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1110 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1111 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1112 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1113 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1114 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1115 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1116 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1117 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1118 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1119 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1120 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1121 #endif 1122 ); 1123 1124 // Class for restricted 512bit vector registers 1125 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1126 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1127 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1128 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1129 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1130 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1131 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1132 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1133 #ifdef _LP64 1134 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1135 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1136 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1137 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1138 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1139 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1140 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1141 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1142 #endif 1143 ); 1144 1145 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1146 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1147 1148 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1149 %} 1150 1151 1152 //----------SOURCE BLOCK------------------------------------------------------- 1153 // This is a block of C++ code which provides values, functions, and 1154 // definitions necessary in the rest of the architecture description 1155 1156 source_hpp %{ 1157 // Header information of the source block. 1158 // Method declarations/definitions which are used outside 1159 // the ad-scope can conveniently be defined here. 1160 // 1161 // To keep related declarations/definitions/uses close together, 1162 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1163 1164 #include "runtime/vm_version.hpp" 1165 1166 class NativeJump; 1167 1168 class CallStubImpl { 1169 1170 //-------------------------------------------------------------- 1171 //---< Used for optimization in Compile::shorten_branches >--- 1172 //-------------------------------------------------------------- 1173 1174 public: 1175 // Size of call trampoline stub. 1176 static uint size_call_trampoline() { 1177 return 0; // no call trampolines on this platform 1178 } 1179 1180 // number of relocations needed by a call trampoline stub 1181 static uint reloc_call_trampoline() { 1182 return 0; // no call trampolines on this platform 1183 } 1184 }; 1185 1186 class HandlerImpl { 1187 1188 public: 1189 1190 static int emit_exception_handler(CodeBuffer &cbuf); 1191 static int emit_deopt_handler(CodeBuffer& cbuf); 1192 1193 static uint size_exception_handler() { 1194 // NativeCall instruction size is the same as NativeJump. 1195 // exception handler starts out as jump and can be patched to 1196 // a call be deoptimization. (4932387) 1197 // Note that this value is also credited (in output.cpp) to 1198 // the size of the code section. 1199 return NativeJump::instruction_size; 1200 } 1201 1202 #ifdef _LP64 1203 static uint size_deopt_handler() { 1204 // three 5 byte instructions plus one move for unreachable address. 1205 return 15+3; 1206 } 1207 #else 1208 static uint size_deopt_handler() { 1209 // NativeCall instruction size is the same as NativeJump. 1210 // exception handler starts out as jump and can be patched to 1211 // a call be deoptimization. (4932387) 1212 // Note that this value is also credited (in output.cpp) to 1213 // the size of the code section. 1214 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1215 } 1216 #endif 1217 }; 1218 1219 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1220 switch(bytes) { 1221 case 4: // fall-through 1222 case 8: // fall-through 1223 case 16: return Assembler::AVX_128bit; 1224 case 32: return Assembler::AVX_256bit; 1225 case 64: return Assembler::AVX_512bit; 1226 1227 default: { 1228 ShouldNotReachHere(); 1229 return Assembler::AVX_NoVec; 1230 } 1231 } 1232 } 1233 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1236 } 1237 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1239 uint def_idx = use->operand_index(opnd); 1240 Node* def = use->in(def_idx); 1241 return vector_length_encoding(def); 1242 } 1243 1244 static inline bool is_vector_popcount_predicate(BasicType bt) { 1245 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1246 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1247 } 1248 1249 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1250 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1251 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1252 } 1253 1254 class Node::PD { 1255 public: 1256 enum NodeFlags { 1257 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1258 Flag_sets_carry_flag = Node::_last_flag << 2, 1259 Flag_sets_parity_flag = Node::_last_flag << 3, 1260 Flag_sets_zero_flag = Node::_last_flag << 4, 1261 Flag_sets_overflow_flag = Node::_last_flag << 5, 1262 Flag_sets_sign_flag = Node::_last_flag << 6, 1263 Flag_clears_carry_flag = Node::_last_flag << 7, 1264 Flag_clears_parity_flag = Node::_last_flag << 8, 1265 Flag_clears_zero_flag = Node::_last_flag << 9, 1266 Flag_clears_overflow_flag = Node::_last_flag << 10, 1267 Flag_clears_sign_flag = Node::_last_flag << 11, 1268 _last_flag = Flag_clears_sign_flag 1269 }; 1270 }; 1271 1272 %} // end source_hpp 1273 1274 source %{ 1275 1276 #include "opto/addnode.hpp" 1277 #include "c2_intelJccErratum_x86.hpp" 1278 1279 void PhaseOutput::pd_perform_mach_node_analysis() { 1280 if (VM_Version::has_intel_jcc_erratum()) { 1281 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1282 _buf_sizes._code += extra_padding; 1283 } 1284 } 1285 1286 int MachNode::pd_alignment_required() const { 1287 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1288 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1289 return IntelJccErratum::largest_jcc_size() + 1; 1290 } else { 1291 return 1; 1292 } 1293 } 1294 1295 int MachNode::compute_padding(int current_offset) const { 1296 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1297 Compile* C = Compile::current(); 1298 PhaseOutput* output = C->output(); 1299 Block* block = output->block(); 1300 int index = output->index(); 1301 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1302 } else { 1303 return 0; 1304 } 1305 } 1306 1307 // Emit exception handler code. 1308 // Stuff framesize into a register and call a VM stub routine. 1309 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1310 1311 // Note that the code buffer's insts_mark is always relative to insts. 1312 // That's why we must use the macroassembler to generate a handler. 1313 C2_MacroAssembler _masm(&cbuf); 1314 address base = __ start_a_stub(size_exception_handler()); 1315 if (base == nullptr) { 1316 ciEnv::current()->record_failure("CodeCache is full"); 1317 return 0; // CodeBuffer::expand failed 1318 } 1319 int offset = __ offset(); 1320 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1321 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1322 __ end_a_stub(); 1323 return offset; 1324 } 1325 1326 // Emit deopt handler code. 1327 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1328 1329 // Note that the code buffer's insts_mark is always relative to insts. 1330 // That's why we must use the macroassembler to generate a handler. 1331 C2_MacroAssembler _masm(&cbuf); 1332 address base = __ start_a_stub(size_deopt_handler()); 1333 if (base == nullptr) { 1334 ciEnv::current()->record_failure("CodeCache is full"); 1335 return 0; // CodeBuffer::expand failed 1336 } 1337 int offset = __ offset(); 1338 1339 #ifdef _LP64 1340 address the_pc = (address) __ pc(); 1341 Label next; 1342 // push a "the_pc" on the stack without destroying any registers 1343 // as they all may be live. 1344 1345 // push address of "next" 1346 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1347 __ bind(next); 1348 // adjust it so it matches "the_pc" 1349 __ subptr(Address(rsp, 0), __ offset() - offset); 1350 #else 1351 InternalAddress here(__ pc()); 1352 __ pushptr(here.addr(), noreg); 1353 #endif 1354 1355 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1356 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1357 __ end_a_stub(); 1358 return offset; 1359 } 1360 1361 Assembler::Width widthForType(BasicType bt) { 1362 if (bt == T_BYTE) { 1363 return Assembler::B; 1364 } else if (bt == T_SHORT) { 1365 return Assembler::W; 1366 } else if (bt == T_INT) { 1367 return Assembler::D; 1368 } else { 1369 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1370 return Assembler::Q; 1371 } 1372 } 1373 1374 //============================================================================= 1375 1376 // Float masks come from different places depending on platform. 1377 #ifdef _LP64 1378 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1379 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1380 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1381 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1382 #else 1383 static address float_signmask() { return (address)float_signmask_pool; } 1384 static address float_signflip() { return (address)float_signflip_pool; } 1385 static address double_signmask() { return (address)double_signmask_pool; } 1386 static address double_signflip() { return (address)double_signflip_pool; } 1387 #endif 1388 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1389 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1390 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1391 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1392 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1393 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1394 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1395 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1396 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1397 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1398 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1399 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1400 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1401 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1402 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1403 1404 //============================================================================= 1405 bool Matcher::match_rule_supported(int opcode) { 1406 if (!has_match_rule(opcode)) { 1407 return false; // no match rule present 1408 } 1409 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1410 switch (opcode) { 1411 case Op_AbsVL: 1412 case Op_StoreVectorScatter: 1413 if (UseAVX < 3) { 1414 return false; 1415 } 1416 break; 1417 case Op_PopCountI: 1418 case Op_PopCountL: 1419 if (!UsePopCountInstruction) { 1420 return false; 1421 } 1422 break; 1423 case Op_PopCountVI: 1424 if (UseAVX < 2) { 1425 return false; 1426 } 1427 break; 1428 case Op_PopCountVL: 1429 if (UseAVX < 2) { 1430 return false; 1431 } 1432 break; 1433 case Op_MulVI: 1434 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1435 return false; 1436 } 1437 break; 1438 case Op_MulVL: 1439 if (UseSSE < 4) { // only with SSE4_1 or AVX 1440 return false; 1441 } 1442 break; 1443 case Op_MulReductionVL: 1444 if (VM_Version::supports_avx512dq() == false) { 1445 return false; 1446 } 1447 break; 1448 case Op_AddReductionVL: 1449 if (UseSSE < 2) { // requires at least SSE2 1450 return false; 1451 } 1452 break; 1453 case Op_AbsVB: 1454 case Op_AbsVS: 1455 case Op_AbsVI: 1456 case Op_AddReductionVI: 1457 case Op_AndReductionV: 1458 case Op_OrReductionV: 1459 case Op_XorReductionV: 1460 if (UseSSE < 3) { // requires at least SSSE3 1461 return false; 1462 } 1463 break; 1464 case Op_VectorLoadShuffle: 1465 case Op_VectorRearrange: 1466 case Op_MulReductionVI: 1467 if (UseSSE < 4) { // requires at least SSE4 1468 return false; 1469 } 1470 break; 1471 case Op_IsInfiniteF: 1472 case Op_IsInfiniteD: 1473 if (!VM_Version::supports_avx512dq()) { 1474 return false; 1475 } 1476 break; 1477 case Op_SqrtVD: 1478 case Op_SqrtVF: 1479 case Op_VectorMaskCmp: 1480 case Op_VectorCastB2X: 1481 case Op_VectorCastS2X: 1482 case Op_VectorCastI2X: 1483 case Op_VectorCastL2X: 1484 case Op_VectorCastF2X: 1485 case Op_VectorCastD2X: 1486 case Op_VectorUCastB2X: 1487 case Op_VectorUCastS2X: 1488 case Op_VectorUCastI2X: 1489 case Op_VectorMaskCast: 1490 if (UseAVX < 1) { // enabled for AVX only 1491 return false; 1492 } 1493 break; 1494 case Op_PopulateIndex: 1495 if (!is_LP64 || (UseAVX < 2)) { 1496 return false; 1497 } 1498 break; 1499 case Op_RoundVF: 1500 if (UseAVX < 2) { // enabled for AVX2 only 1501 return false; 1502 } 1503 break; 1504 case Op_RoundVD: 1505 if (UseAVX < 3) { 1506 return false; // enabled for AVX3 only 1507 } 1508 break; 1509 case Op_CompareAndSwapL: 1510 #ifdef _LP64 1511 case Op_CompareAndSwapP: 1512 #endif 1513 if (!VM_Version::supports_cx8()) { 1514 return false; 1515 } 1516 break; 1517 case Op_StrIndexOf: 1518 if (!UseSSE42Intrinsics) { 1519 return false; 1520 } 1521 break; 1522 case Op_StrIndexOfChar: 1523 if (!UseSSE42Intrinsics) { 1524 return false; 1525 } 1526 break; 1527 case Op_OnSpinWait: 1528 if (VM_Version::supports_on_spin_wait() == false) { 1529 return false; 1530 } 1531 break; 1532 case Op_MulVB: 1533 case Op_LShiftVB: 1534 case Op_RShiftVB: 1535 case Op_URShiftVB: 1536 case Op_VectorInsert: 1537 case Op_VectorLoadMask: 1538 case Op_VectorStoreMask: 1539 case Op_VectorBlend: 1540 if (UseSSE < 4) { 1541 return false; 1542 } 1543 break; 1544 #ifdef _LP64 1545 case Op_MaxD: 1546 case Op_MaxF: 1547 case Op_MinD: 1548 case Op_MinF: 1549 if (UseAVX < 1) { // enabled for AVX only 1550 return false; 1551 } 1552 break; 1553 #endif 1554 case Op_CacheWB: 1555 case Op_CacheWBPreSync: 1556 case Op_CacheWBPostSync: 1557 if (!VM_Version::supports_data_cache_line_flush()) { 1558 return false; 1559 } 1560 break; 1561 case Op_ExtractB: 1562 case Op_ExtractL: 1563 case Op_ExtractI: 1564 case Op_RoundDoubleMode: 1565 if (UseSSE < 4) { 1566 return false; 1567 } 1568 break; 1569 case Op_RoundDoubleModeV: 1570 if (VM_Version::supports_avx() == false) { 1571 return false; // 128bit vroundpd is not available 1572 } 1573 break; 1574 case Op_LoadVectorGather: 1575 if (UseAVX < 2) { 1576 return false; 1577 } 1578 break; 1579 case Op_FmaF: 1580 case Op_FmaD: 1581 case Op_FmaVD: 1582 case Op_FmaVF: 1583 if (!UseFMA) { 1584 return false; 1585 } 1586 break; 1587 case Op_MacroLogicV: 1588 if (UseAVX < 3 || !UseVectorMacroLogic) { 1589 return false; 1590 } 1591 break; 1592 1593 case Op_VectorCmpMasked: 1594 case Op_VectorMaskGen: 1595 if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { 1596 return false; 1597 } 1598 break; 1599 case Op_VectorMaskFirstTrue: 1600 case Op_VectorMaskLastTrue: 1601 case Op_VectorMaskTrueCount: 1602 case Op_VectorMaskToLong: 1603 if (!is_LP64 || UseAVX < 1) { 1604 return false; 1605 } 1606 break; 1607 case Op_RoundF: 1608 case Op_RoundD: 1609 if (!is_LP64) { 1610 return false; 1611 } 1612 break; 1613 case Op_CopySignD: 1614 case Op_CopySignF: 1615 if (UseAVX < 3 || !is_LP64) { 1616 return false; 1617 } 1618 if (!VM_Version::supports_avx512vl()) { 1619 return false; 1620 } 1621 break; 1622 #ifndef _LP64 1623 case Op_AddReductionVF: 1624 case Op_AddReductionVD: 1625 case Op_MulReductionVF: 1626 case Op_MulReductionVD: 1627 if (UseSSE < 1) { // requires at least SSE 1628 return false; 1629 } 1630 break; 1631 case Op_MulAddVS2VI: 1632 case Op_RShiftVL: 1633 case Op_AbsVD: 1634 case Op_NegVD: 1635 if (UseSSE < 2) { 1636 return false; 1637 } 1638 break; 1639 #endif // !LP64 1640 case Op_CompressBits: 1641 if (!VM_Version::supports_bmi2() || (!is_LP64 && UseSSE < 2)) { 1642 return false; 1643 } 1644 break; 1645 case Op_ExpandBits: 1646 if (!VM_Version::supports_bmi2() || (!is_LP64 && (UseSSE < 2 || !VM_Version::supports_bmi1()))) { 1647 return false; 1648 } 1649 break; 1650 case Op_SignumF: 1651 if (UseSSE < 1) { 1652 return false; 1653 } 1654 break; 1655 case Op_SignumD: 1656 if (UseSSE < 2) { 1657 return false; 1658 } 1659 break; 1660 case Op_CompressM: 1661 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1662 return false; 1663 } 1664 break; 1665 case Op_CompressV: 1666 case Op_ExpandV: 1667 if (!VM_Version::supports_avx512vl()) { 1668 return false; 1669 } 1670 break; 1671 case Op_SqrtF: 1672 if (UseSSE < 1) { 1673 return false; 1674 } 1675 break; 1676 case Op_SqrtD: 1677 #ifdef _LP64 1678 if (UseSSE < 2) { 1679 return false; 1680 } 1681 #else 1682 // x86_32.ad has a special match rule for SqrtD. 1683 // Together with common x86 rules, this handles all UseSSE cases. 1684 #endif 1685 break; 1686 case Op_ConvF2HF: 1687 case Op_ConvHF2F: 1688 if (!VM_Version::supports_float16()) { 1689 return false; 1690 } 1691 break; 1692 case Op_VectorCastF2HF: 1693 case Op_VectorCastHF2F: 1694 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1695 return false; 1696 } 1697 break; 1698 } 1699 return true; // Match rules are supported by default. 1700 } 1701 1702 //------------------------------------------------------------------------ 1703 1704 static inline bool is_pop_count_instr_target(BasicType bt) { 1705 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1706 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1707 } 1708 1709 bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) { 1710 return match_rule_supported_vector(opcode, vlen, bt); 1711 } 1712 1713 // Identify extra cases that we might want to provide match rules for vector nodes and 1714 // other intrinsics guarded with vector length (vlen) and element type (bt). 1715 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1716 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1717 if (!match_rule_supported(opcode)) { 1718 return false; 1719 } 1720 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1721 // * SSE2 supports 128bit vectors for all types; 1722 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1723 // * AVX2 supports 256bit vectors for all types; 1724 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1725 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1726 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1727 // And MaxVectorSize is taken into account as well. 1728 if (!vector_size_supported(bt, vlen)) { 1729 return false; 1730 } 1731 // Special cases which require vector length follow: 1732 // * implementation limitations 1733 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1734 // * 128bit vroundpd instruction is present only in AVX1 1735 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1736 switch (opcode) { 1737 case Op_AbsVF: 1738 case Op_NegVF: 1739 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1740 return false; // 512bit vandps and vxorps are not available 1741 } 1742 break; 1743 case Op_AbsVD: 1744 case Op_NegVD: 1745 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1746 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1747 } 1748 break; 1749 case Op_RotateRightV: 1750 case Op_RotateLeftV: 1751 if (bt != T_INT && bt != T_LONG) { 1752 return false; 1753 } // fallthrough 1754 case Op_MacroLogicV: 1755 if (!VM_Version::supports_evex() || 1756 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1757 return false; 1758 } 1759 break; 1760 case Op_ClearArray: 1761 case Op_VectorMaskGen: 1762 case Op_VectorCmpMasked: 1763 if (!is_LP64 || !VM_Version::supports_avx512bw()) { 1764 return false; 1765 } 1766 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1767 return false; 1768 } 1769 break; 1770 case Op_LoadVectorMasked: 1771 case Op_StoreVectorMasked: 1772 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1773 return false; 1774 } 1775 break; 1776 case Op_MaxV: 1777 case Op_MinV: 1778 if (UseSSE < 4 && is_integral_type(bt)) { 1779 return false; 1780 } 1781 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1782 // Float/Double intrinsics are enabled for AVX family currently. 1783 if (UseAVX == 0) { 1784 return false; 1785 } 1786 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1787 return false; 1788 } 1789 } 1790 break; 1791 case Op_CallLeafVector: 1792 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1793 return false; 1794 } 1795 break; 1796 case Op_AddReductionVI: 1797 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1798 return false; 1799 } 1800 // fallthrough 1801 case Op_AndReductionV: 1802 case Op_OrReductionV: 1803 case Op_XorReductionV: 1804 if (is_subword_type(bt) && (UseSSE < 4)) { 1805 return false; 1806 } 1807 #ifndef _LP64 1808 if (bt == T_BYTE || bt == T_LONG) { 1809 return false; 1810 } 1811 #endif 1812 break; 1813 #ifndef _LP64 1814 case Op_VectorInsert: 1815 if (bt == T_LONG || bt == T_DOUBLE) { 1816 return false; 1817 } 1818 break; 1819 #endif 1820 case Op_MinReductionV: 1821 case Op_MaxReductionV: 1822 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1823 return false; 1824 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1825 return false; 1826 } 1827 // Float/Double intrinsics enabled for AVX family. 1828 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1829 return false; 1830 } 1831 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1832 return false; 1833 } 1834 #ifndef _LP64 1835 if (bt == T_BYTE || bt == T_LONG) { 1836 return false; 1837 } 1838 #endif 1839 break; 1840 case Op_VectorTest: 1841 if (UseSSE < 4) { 1842 return false; // Implementation limitation 1843 } else if (size_in_bits < 32) { 1844 return false; // Implementation limitation 1845 } 1846 break; 1847 case Op_VectorLoadShuffle: 1848 case Op_VectorRearrange: 1849 if(vlen == 2) { 1850 return false; // Implementation limitation due to how shuffle is loaded 1851 } else if (size_in_bits == 256 && UseAVX < 2) { 1852 return false; // Implementation limitation 1853 } 1854 break; 1855 case Op_VectorLoadMask: 1856 case Op_VectorMaskCast: 1857 if (size_in_bits == 256 && UseAVX < 2) { 1858 return false; // Implementation limitation 1859 } 1860 // fallthrough 1861 case Op_VectorStoreMask: 1862 if (vlen == 2) { 1863 return false; // Implementation limitation 1864 } 1865 break; 1866 case Op_PopulateIndex: 1867 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1868 return false; 1869 } 1870 break; 1871 case Op_VectorCastB2X: 1872 case Op_VectorCastS2X: 1873 case Op_VectorCastI2X: 1874 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1875 return false; 1876 } 1877 break; 1878 case Op_VectorCastL2X: 1879 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1880 return false; 1881 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1882 return false; 1883 } 1884 break; 1885 case Op_VectorCastF2X: { 1886 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1887 // happen after intermediate conversion to integer and special handling 1888 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1889 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1890 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1891 return false; 1892 } 1893 } 1894 // fallthrough 1895 case Op_VectorCastD2X: 1896 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1897 return false; 1898 } 1899 break; 1900 case Op_VectorCastF2HF: 1901 case Op_VectorCastHF2F: 1902 if (!VM_Version::supports_f16c() && 1903 ((!VM_Version::supports_evex() || 1904 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1905 return false; 1906 } 1907 break; 1908 case Op_RoundVD: 1909 if (!VM_Version::supports_avx512dq()) { 1910 return false; 1911 } 1912 break; 1913 case Op_MulReductionVI: 1914 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1915 return false; 1916 } 1917 break; 1918 case Op_LoadVectorGatherMasked: 1919 case Op_StoreVectorScatterMasked: 1920 case Op_StoreVectorScatter: 1921 if (is_subword_type(bt)) { 1922 return false; 1923 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1924 return false; 1925 } 1926 // fallthrough 1927 case Op_LoadVectorGather: 1928 if (size_in_bits == 64 ) { 1929 return false; 1930 } 1931 break; 1932 case Op_MaskAll: 1933 if (!VM_Version::supports_evex()) { 1934 return false; 1935 } 1936 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1937 return false; 1938 } 1939 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1940 return false; 1941 } 1942 break; 1943 case Op_VectorMaskCmp: 1944 if (vlen < 2 || size_in_bits < 32) { 1945 return false; 1946 } 1947 break; 1948 case Op_CompressM: 1949 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1950 return false; 1951 } 1952 break; 1953 case Op_CompressV: 1954 case Op_ExpandV: 1955 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1956 return false; 1957 } 1958 if (size_in_bits < 128 ) { 1959 return false; 1960 } 1961 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1962 return false; 1963 } 1964 break; 1965 case Op_VectorLongToMask: 1966 if (UseAVX < 1 || !is_LP64) { 1967 return false; 1968 } 1969 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1970 return false; 1971 } 1972 break; 1973 case Op_SignumVD: 1974 case Op_SignumVF: 1975 if (UseAVX < 1) { 1976 return false; 1977 } 1978 break; 1979 case Op_PopCountVI: 1980 case Op_PopCountVL: { 1981 if (!is_pop_count_instr_target(bt) && 1982 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 1983 return false; 1984 } 1985 } 1986 break; 1987 case Op_ReverseV: 1988 case Op_ReverseBytesV: 1989 if (UseAVX < 2) { 1990 return false; 1991 } 1992 break; 1993 case Op_CountTrailingZerosV: 1994 case Op_CountLeadingZerosV: 1995 if (UseAVX < 2) { 1996 return false; 1997 } 1998 break; 1999 } 2000 return true; // Per default match rules are supported. 2001 } 2002 2003 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 2004 // ADLC based match_rule_supported routine checks for the existence of pattern based 2005 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 2006 // of their non-masked counterpart with mask edge being the differentiator. 2007 // This routine does a strict check on the existence of masked operation patterns 2008 // by returning a default false value for all the other opcodes apart from the 2009 // ones whose masked instruction patterns are defined in this file. 2010 if (!match_rule_supported_vector(opcode, vlen, bt)) { 2011 return false; 2012 } 2013 2014 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 2015 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 2016 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 2017 return false; 2018 } 2019 switch(opcode) { 2020 // Unary masked operations 2021 case Op_AbsVB: 2022 case Op_AbsVS: 2023 if(!VM_Version::supports_avx512bw()) { 2024 return false; // Implementation limitation 2025 } 2026 case Op_AbsVI: 2027 case Op_AbsVL: 2028 return true; 2029 2030 // Ternary masked operations 2031 case Op_FmaVF: 2032 case Op_FmaVD: 2033 return true; 2034 2035 case Op_MacroLogicV: 2036 if(bt != T_INT && bt != T_LONG) { 2037 return false; 2038 } 2039 return true; 2040 2041 // Binary masked operations 2042 case Op_AddVB: 2043 case Op_AddVS: 2044 case Op_SubVB: 2045 case Op_SubVS: 2046 case Op_MulVS: 2047 case Op_LShiftVS: 2048 case Op_RShiftVS: 2049 case Op_URShiftVS: 2050 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2051 if (!VM_Version::supports_avx512bw()) { 2052 return false; // Implementation limitation 2053 } 2054 return true; 2055 2056 case Op_MulVL: 2057 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2058 if (!VM_Version::supports_avx512dq()) { 2059 return false; // Implementation limitation 2060 } 2061 return true; 2062 2063 case Op_AndV: 2064 case Op_OrV: 2065 case Op_XorV: 2066 case Op_RotateRightV: 2067 case Op_RotateLeftV: 2068 if (bt != T_INT && bt != T_LONG) { 2069 return false; // Implementation limitation 2070 } 2071 return true; 2072 2073 case Op_VectorLoadMask: 2074 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2075 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2076 return false; 2077 } 2078 return true; 2079 2080 case Op_AddVI: 2081 case Op_AddVL: 2082 case Op_AddVF: 2083 case Op_AddVD: 2084 case Op_SubVI: 2085 case Op_SubVL: 2086 case Op_SubVF: 2087 case Op_SubVD: 2088 case Op_MulVI: 2089 case Op_MulVF: 2090 case Op_MulVD: 2091 case Op_DivVF: 2092 case Op_DivVD: 2093 case Op_SqrtVF: 2094 case Op_SqrtVD: 2095 case Op_LShiftVI: 2096 case Op_LShiftVL: 2097 case Op_RShiftVI: 2098 case Op_RShiftVL: 2099 case Op_URShiftVI: 2100 case Op_URShiftVL: 2101 case Op_LoadVectorMasked: 2102 case Op_StoreVectorMasked: 2103 case Op_LoadVectorGatherMasked: 2104 case Op_StoreVectorScatterMasked: 2105 return true; 2106 2107 case Op_MaxV: 2108 case Op_MinV: 2109 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2110 return false; // Implementation limitation 2111 } 2112 if (is_floating_point_type(bt)) { 2113 return false; // Implementation limitation 2114 } 2115 return true; 2116 2117 case Op_VectorMaskCmp: 2118 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2119 return false; // Implementation limitation 2120 } 2121 return true; 2122 2123 case Op_VectorRearrange: 2124 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2125 return false; // Implementation limitation 2126 } 2127 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2128 return false; // Implementation limitation 2129 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2130 return false; // Implementation limitation 2131 } 2132 return true; 2133 2134 // Binary Logical operations 2135 case Op_AndVMask: 2136 case Op_OrVMask: 2137 case Op_XorVMask: 2138 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2139 return false; // Implementation limitation 2140 } 2141 return true; 2142 2143 case Op_PopCountVI: 2144 case Op_PopCountVL: 2145 if (!is_pop_count_instr_target(bt)) { 2146 return false; 2147 } 2148 return true; 2149 2150 case Op_MaskAll: 2151 return true; 2152 2153 case Op_CountLeadingZerosV: 2154 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2155 return true; 2156 } 2157 default: 2158 return false; 2159 } 2160 } 2161 2162 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2163 return false; 2164 } 2165 2166 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2167 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2168 bool legacy = (generic_opnd->opcode() == LEGVEC); 2169 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2170 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2171 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2172 return new legVecZOper(); 2173 } 2174 if (legacy) { 2175 switch (ideal_reg) { 2176 case Op_VecS: return new legVecSOper(); 2177 case Op_VecD: return new legVecDOper(); 2178 case Op_VecX: return new legVecXOper(); 2179 case Op_VecY: return new legVecYOper(); 2180 case Op_VecZ: return new legVecZOper(); 2181 } 2182 } else { 2183 switch (ideal_reg) { 2184 case Op_VecS: return new vecSOper(); 2185 case Op_VecD: return new vecDOper(); 2186 case Op_VecX: return new vecXOper(); 2187 case Op_VecY: return new vecYOper(); 2188 case Op_VecZ: return new vecZOper(); 2189 } 2190 } 2191 ShouldNotReachHere(); 2192 return nullptr; 2193 } 2194 2195 bool Matcher::is_reg2reg_move(MachNode* m) { 2196 switch (m->rule()) { 2197 case MoveVec2Leg_rule: 2198 case MoveLeg2Vec_rule: 2199 case MoveF2VL_rule: 2200 case MoveF2LEG_rule: 2201 case MoveVL2F_rule: 2202 case MoveLEG2F_rule: 2203 case MoveD2VL_rule: 2204 case MoveD2LEG_rule: 2205 case MoveVL2D_rule: 2206 case MoveLEG2D_rule: 2207 return true; 2208 default: 2209 return false; 2210 } 2211 } 2212 2213 bool Matcher::is_generic_vector(MachOper* opnd) { 2214 switch (opnd->opcode()) { 2215 case VEC: 2216 case LEGVEC: 2217 return true; 2218 default: 2219 return false; 2220 } 2221 } 2222 2223 //------------------------------------------------------------------------ 2224 2225 const RegMask* Matcher::predicate_reg_mask(void) { 2226 return &_VECTMASK_REG_mask; 2227 } 2228 2229 const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) { 2230 return new TypeVectMask(elemTy, length); 2231 } 2232 2233 // Max vector size in bytes. 0 if not supported. 2234 int Matcher::vector_width_in_bytes(BasicType bt) { 2235 assert(is_java_primitive(bt), "only primitive type vectors"); 2236 if (UseSSE < 2) return 0; 2237 // SSE2 supports 128bit vectors for all types. 2238 // AVX2 supports 256bit vectors for all types. 2239 // AVX2/EVEX supports 512bit vectors for all types. 2240 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2241 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2242 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2243 size = (UseAVX > 2) ? 64 : 32; 2244 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2245 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2246 // Use flag to limit vector size. 2247 size = MIN2(size,(int)MaxVectorSize); 2248 // Minimum 2 values in vector (or 4 for bytes). 2249 switch (bt) { 2250 case T_DOUBLE: 2251 case T_LONG: 2252 if (size < 16) return 0; 2253 break; 2254 case T_FLOAT: 2255 case T_INT: 2256 if (size < 8) return 0; 2257 break; 2258 case T_BOOLEAN: 2259 if (size < 4) return 0; 2260 break; 2261 case T_CHAR: 2262 if (size < 4) return 0; 2263 break; 2264 case T_BYTE: 2265 if (size < 4) return 0; 2266 break; 2267 case T_SHORT: 2268 if (size < 4) return 0; 2269 break; 2270 default: 2271 ShouldNotReachHere(); 2272 } 2273 return size; 2274 } 2275 2276 // Limits on vector size (number of elements) loaded into vector. 2277 int Matcher::max_vector_size(const BasicType bt) { 2278 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2279 } 2280 int Matcher::min_vector_size(const BasicType bt) { 2281 int max_size = max_vector_size(bt); 2282 // Min size which can be loaded into vector is 4 bytes. 2283 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2284 // Support for calling svml double64 vectors 2285 if (bt == T_DOUBLE) { 2286 size = 1; 2287 } 2288 return MIN2(size,max_size); 2289 } 2290 2291 int Matcher::superword_max_vector_size(const BasicType bt) { 2292 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2293 // by default on Cascade Lake 2294 if (VM_Version::is_default_intel_cascade_lake()) { 2295 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2296 } 2297 return Matcher::max_vector_size(bt); 2298 } 2299 2300 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2301 return -1; 2302 } 2303 2304 // Vector ideal reg corresponding to specified size in bytes 2305 uint Matcher::vector_ideal_reg(int size) { 2306 assert(MaxVectorSize >= size, ""); 2307 switch(size) { 2308 case 4: return Op_VecS; 2309 case 8: return Op_VecD; 2310 case 16: return Op_VecX; 2311 case 32: return Op_VecY; 2312 case 64: return Op_VecZ; 2313 } 2314 ShouldNotReachHere(); 2315 return 0; 2316 } 2317 2318 // Check for shift by small constant as well 2319 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2320 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2321 shift->in(2)->get_int() <= 3 && 2322 // Are there other uses besides address expressions? 2323 !matcher->is_visited(shift)) { 2324 address_visited.set(shift->_idx); // Flag as address_visited 2325 mstack.push(shift->in(2), Matcher::Visit); 2326 Node *conv = shift->in(1); 2327 #ifdef _LP64 2328 // Allow Matcher to match the rule which bypass 2329 // ConvI2L operation for an array index on LP64 2330 // if the index value is positive. 2331 if (conv->Opcode() == Op_ConvI2L && 2332 conv->as_Type()->type()->is_long()->_lo >= 0 && 2333 // Are there other uses besides address expressions? 2334 !matcher->is_visited(conv)) { 2335 address_visited.set(conv->_idx); // Flag as address_visited 2336 mstack.push(conv->in(1), Matcher::Pre_Visit); 2337 } else 2338 #endif 2339 mstack.push(conv, Matcher::Pre_Visit); 2340 return true; 2341 } 2342 return false; 2343 } 2344 2345 // This function identifies sub-graphs in which a 'load' node is 2346 // input to two different nodes, and such that it can be matched 2347 // with BMI instructions like blsi, blsr, etc. 2348 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2349 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2350 // refers to the same node. 2351 // 2352 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2353 // This is a temporary solution until we make DAGs expressible in ADL. 2354 template<typename ConType> 2355 class FusedPatternMatcher { 2356 Node* _op1_node; 2357 Node* _mop_node; 2358 int _con_op; 2359 2360 static int match_next(Node* n, int next_op, int next_op_idx) { 2361 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2362 return -1; 2363 } 2364 2365 if (next_op_idx == -1) { // n is commutative, try rotations 2366 if (n->in(1)->Opcode() == next_op) { 2367 return 1; 2368 } else if (n->in(2)->Opcode() == next_op) { 2369 return 2; 2370 } 2371 } else { 2372 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2373 if (n->in(next_op_idx)->Opcode() == next_op) { 2374 return next_op_idx; 2375 } 2376 } 2377 return -1; 2378 } 2379 2380 public: 2381 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2382 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2383 2384 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2385 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2386 typename ConType::NativeType con_value) { 2387 if (_op1_node->Opcode() != op1) { 2388 return false; 2389 } 2390 if (_mop_node->outcnt() > 2) { 2391 return false; 2392 } 2393 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2394 if (op1_op2_idx == -1) { 2395 return false; 2396 } 2397 // Memory operation must be the other edge 2398 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2399 2400 // Check that the mop node is really what we want 2401 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2402 Node* op2_node = _op1_node->in(op1_op2_idx); 2403 if (op2_node->outcnt() > 1) { 2404 return false; 2405 } 2406 assert(op2_node->Opcode() == op2, "Should be"); 2407 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2408 if (op2_con_idx == -1) { 2409 return false; 2410 } 2411 // Memory operation must be the other edge 2412 int op2_mop_idx = (op2_con_idx & 1) + 1; 2413 // Check that the memory operation is the same node 2414 if (op2_node->in(op2_mop_idx) == _mop_node) { 2415 // Now check the constant 2416 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2417 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2418 return true; 2419 } 2420 } 2421 } 2422 return false; 2423 } 2424 }; 2425 2426 static bool is_bmi_pattern(Node* n, Node* m) { 2427 assert(UseBMI1Instructions, "sanity"); 2428 if (n != nullptr && m != nullptr) { 2429 if (m->Opcode() == Op_LoadI) { 2430 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2431 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2432 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2433 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2434 } else if (m->Opcode() == Op_LoadL) { 2435 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2436 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2437 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2438 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2439 } 2440 } 2441 return false; 2442 } 2443 2444 // Should the matcher clone input 'm' of node 'n'? 2445 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2446 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2447 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2448 mstack.push(m, Visit); 2449 return true; 2450 } 2451 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2452 mstack.push(m, Visit); // m = ShiftCntV 2453 return true; 2454 } 2455 return false; 2456 } 2457 2458 // Should the Matcher clone shifts on addressing modes, expecting them 2459 // to be subsumed into complex addressing expressions or compute them 2460 // into registers? 2461 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2462 Node *off = m->in(AddPNode::Offset); 2463 if (off->is_Con()) { 2464 address_visited.test_set(m->_idx); // Flag as address_visited 2465 Node *adr = m->in(AddPNode::Address); 2466 2467 // Intel can handle 2 adds in addressing mode 2468 // AtomicAdd is not an addressing expression. 2469 // Cheap to find it by looking for screwy base. 2470 if (adr->is_AddP() && 2471 !adr->in(AddPNode::Base)->is_top() && 2472 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2473 // Are there other uses besides address expressions? 2474 !is_visited(adr)) { 2475 address_visited.set(adr->_idx); // Flag as address_visited 2476 Node *shift = adr->in(AddPNode::Offset); 2477 if (!clone_shift(shift, this, mstack, address_visited)) { 2478 mstack.push(shift, Pre_Visit); 2479 } 2480 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2481 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2482 } else { 2483 mstack.push(adr, Pre_Visit); 2484 } 2485 2486 // Clone X+offset as it also folds into most addressing expressions 2487 mstack.push(off, Visit); 2488 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2489 return true; 2490 } else if (clone_shift(off, this, mstack, address_visited)) { 2491 address_visited.test_set(m->_idx); // Flag as address_visited 2492 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2493 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2494 return true; 2495 } 2496 return false; 2497 } 2498 2499 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2500 switch (bt) { 2501 case BoolTest::eq: 2502 return Assembler::eq; 2503 case BoolTest::ne: 2504 return Assembler::neq; 2505 case BoolTest::le: 2506 case BoolTest::ule: 2507 return Assembler::le; 2508 case BoolTest::ge: 2509 case BoolTest::uge: 2510 return Assembler::nlt; 2511 case BoolTest::lt: 2512 case BoolTest::ult: 2513 return Assembler::lt; 2514 case BoolTest::gt: 2515 case BoolTest::ugt: 2516 return Assembler::nle; 2517 default : ShouldNotReachHere(); return Assembler::_false; 2518 } 2519 } 2520 2521 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2522 switch (bt) { 2523 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2524 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2525 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2526 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2527 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2528 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2529 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2530 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2531 } 2532 } 2533 2534 // Helper methods for MachSpillCopyNode::implementation(). 2535 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 2536 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2537 assert(ireg == Op_VecS || // 32bit vector 2538 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2539 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 2540 "no non-adjacent vector moves" ); 2541 if (cbuf) { 2542 C2_MacroAssembler _masm(cbuf); 2543 switch (ireg) { 2544 case Op_VecS: // copy whole register 2545 case Op_VecD: 2546 case Op_VecX: 2547 #ifndef _LP64 2548 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2549 #else 2550 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2551 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2552 } else { 2553 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2554 } 2555 #endif 2556 break; 2557 case Op_VecY: 2558 #ifndef _LP64 2559 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2560 #else 2561 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2562 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2563 } else { 2564 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2565 } 2566 #endif 2567 break; 2568 case Op_VecZ: 2569 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2570 break; 2571 default: 2572 ShouldNotReachHere(); 2573 } 2574 #ifndef PRODUCT 2575 } else { 2576 switch (ireg) { 2577 case Op_VecS: 2578 case Op_VecD: 2579 case Op_VecX: 2580 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2581 break; 2582 case Op_VecY: 2583 case Op_VecZ: 2584 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2585 break; 2586 default: 2587 ShouldNotReachHere(); 2588 } 2589 #endif 2590 } 2591 } 2592 2593 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 2594 int stack_offset, int reg, uint ireg, outputStream* st) { 2595 if (cbuf) { 2596 C2_MacroAssembler _masm(cbuf); 2597 if (is_load) { 2598 switch (ireg) { 2599 case Op_VecS: 2600 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2601 break; 2602 case Op_VecD: 2603 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2604 break; 2605 case Op_VecX: 2606 #ifndef _LP64 2607 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2608 #else 2609 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2610 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2611 } else { 2612 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2613 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2614 } 2615 #endif 2616 break; 2617 case Op_VecY: 2618 #ifndef _LP64 2619 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2620 #else 2621 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2622 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2623 } else { 2624 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2625 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2626 } 2627 #endif 2628 break; 2629 case Op_VecZ: 2630 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2631 break; 2632 default: 2633 ShouldNotReachHere(); 2634 } 2635 } else { // store 2636 switch (ireg) { 2637 case Op_VecS: 2638 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2639 break; 2640 case Op_VecD: 2641 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2642 break; 2643 case Op_VecX: 2644 #ifndef _LP64 2645 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2646 #else 2647 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2648 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2649 } 2650 else { 2651 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2652 } 2653 #endif 2654 break; 2655 case Op_VecY: 2656 #ifndef _LP64 2657 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2658 #else 2659 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2660 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2661 } 2662 else { 2663 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2664 } 2665 #endif 2666 break; 2667 case Op_VecZ: 2668 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2669 break; 2670 default: 2671 ShouldNotReachHere(); 2672 } 2673 } 2674 #ifndef PRODUCT 2675 } else { 2676 if (is_load) { 2677 switch (ireg) { 2678 case Op_VecS: 2679 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2680 break; 2681 case Op_VecD: 2682 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2683 break; 2684 case Op_VecX: 2685 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2686 break; 2687 case Op_VecY: 2688 case Op_VecZ: 2689 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2690 break; 2691 default: 2692 ShouldNotReachHere(); 2693 } 2694 } else { // store 2695 switch (ireg) { 2696 case Op_VecS: 2697 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2698 break; 2699 case Op_VecD: 2700 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2701 break; 2702 case Op_VecX: 2703 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2704 break; 2705 case Op_VecY: 2706 case Op_VecZ: 2707 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2708 break; 2709 default: 2710 ShouldNotReachHere(); 2711 } 2712 } 2713 #endif 2714 } 2715 } 2716 2717 template <class T> 2718 static inline GrowableArray<jvalue>* vreplicate_imm(BasicType bt, T con, int len) { 2719 GrowableArray<jvalue>* val = new GrowableArray<jvalue>(len); 2720 jvalue ele; 2721 switch (bt) { 2722 case T_BYTE: ele.b = con; break; 2723 case T_SHORT: ele.s = con; break; 2724 case T_INT: ele.i = con; break; 2725 case T_LONG: ele.j = con; break; 2726 case T_FLOAT: ele.f = con; break; 2727 case T_DOUBLE: ele.d = con; break; 2728 default: ShouldNotReachHere(); 2729 } 2730 for (int i = 0; i < len; i++) { 2731 val->append(ele); 2732 } 2733 return val; 2734 } 2735 2736 static inline jlong high_bit_set(BasicType bt) { 2737 switch (bt) { 2738 case T_BYTE: return 0x8080808080808080; 2739 case T_SHORT: return 0x8000800080008000; 2740 case T_INT: return 0x8000000080000000; 2741 case T_LONG: return 0x8000000000000000; 2742 default: 2743 ShouldNotReachHere(); 2744 return 0; 2745 } 2746 } 2747 2748 #ifndef PRODUCT 2749 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2750 st->print("nop \t# %d bytes pad for loops and calls", _count); 2751 } 2752 #endif 2753 2754 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2755 C2_MacroAssembler _masm(&cbuf); 2756 __ nop(_count); 2757 } 2758 2759 uint MachNopNode::size(PhaseRegAlloc*) const { 2760 return _count; 2761 } 2762 2763 #ifndef PRODUCT 2764 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2765 st->print("# breakpoint"); 2766 } 2767 #endif 2768 2769 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2770 C2_MacroAssembler _masm(&cbuf); 2771 __ int3(); 2772 } 2773 2774 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2775 return MachNode::size(ra_); 2776 } 2777 2778 %} 2779 2780 encode %{ 2781 2782 enc_class call_epilog %{ 2783 C2_MacroAssembler _masm(&cbuf); 2784 if (VerifyStackAtCalls) { 2785 // Check that stack depth is unchanged: find majik cookie on stack 2786 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2787 Label L; 2788 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2789 __ jccb(Assembler::equal, L); 2790 // Die if stack mismatch 2791 __ int3(); 2792 __ bind(L); 2793 } 2794 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) { 2795 C2_MacroAssembler _masm(&cbuf); 2796 if (!_method->signature()->returns_null_free_inline_type()) { 2797 // The last return value is not set by the callee but used to pass IsInit information to compiled code. 2798 // Search for the corresponding projection, get the register and emit code that initialized it. 2799 uint con = (tf()->range_cc()->cnt() - 1); 2800 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { 2801 ProjNode* proj = fast_out(i)->as_Proj(); 2802 if (proj->_con == con) { 2803 // Set IsInit if rax is non-null (a non-null value is returned buffered or scalarized) 2804 OptoReg::Name optoReg = ra_->get_reg_first(proj); 2805 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP)); 2806 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1; 2807 __ testq(rax, rax); 2808 __ setb(Assembler::notZero, toReg); 2809 __ movzbl(toReg, toReg); 2810 if (reg->is_stack()) { 2811 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size; 2812 __ movq(Address(rsp, st_off), toReg); 2813 } 2814 break; 2815 } 2816 } 2817 } 2818 if (return_value_is_used()) { 2819 // An inline type is returned as fields in multiple registers. 2820 // Rax either contains an oop if the inline type is buffered or a pointer 2821 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax 2822 // if the lowest bit is set to allow C2 to use the oop after null checking. 2823 // rax &= (rax & 1) - 1 2824 __ movptr(rscratch1, rax); 2825 __ andptr(rscratch1, 0x1); 2826 __ subptr(rscratch1, 0x1); 2827 __ andptr(rax, rscratch1); 2828 } 2829 } 2830 %} 2831 2832 %} 2833 2834 // Operands for bound floating pointer register arguments 2835 operand rxmm0() %{ 2836 constraint(ALLOC_IN_RC(xmm0_reg)); 2837 match(VecX); 2838 format%{%} 2839 interface(REG_INTER); 2840 %} 2841 2842 //----------OPERANDS----------------------------------------------------------- 2843 // Operand definitions must precede instruction definitions for correct parsing 2844 // in the ADLC because operands constitute user defined types which are used in 2845 // instruction definitions. 2846 2847 // Vectors 2848 2849 // Dummy generic vector class. Should be used for all vector operands. 2850 // Replaced with vec[SDXYZ] during post-selection pass. 2851 operand vec() %{ 2852 constraint(ALLOC_IN_RC(dynamic)); 2853 match(VecX); 2854 match(VecY); 2855 match(VecZ); 2856 match(VecS); 2857 match(VecD); 2858 2859 format %{ %} 2860 interface(REG_INTER); 2861 %} 2862 2863 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2864 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2865 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2866 // runtime code generation via reg_class_dynamic. 2867 operand legVec() %{ 2868 constraint(ALLOC_IN_RC(dynamic)); 2869 match(VecX); 2870 match(VecY); 2871 match(VecZ); 2872 match(VecS); 2873 match(VecD); 2874 2875 format %{ %} 2876 interface(REG_INTER); 2877 %} 2878 2879 // Replaces vec during post-selection cleanup. See above. 2880 operand vecS() %{ 2881 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2882 match(VecS); 2883 2884 format %{ %} 2885 interface(REG_INTER); 2886 %} 2887 2888 // Replaces legVec during post-selection cleanup. See above. 2889 operand legVecS() %{ 2890 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2891 match(VecS); 2892 2893 format %{ %} 2894 interface(REG_INTER); 2895 %} 2896 2897 // Replaces vec during post-selection cleanup. See above. 2898 operand vecD() %{ 2899 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2900 match(VecD); 2901 2902 format %{ %} 2903 interface(REG_INTER); 2904 %} 2905 2906 // Replaces legVec during post-selection cleanup. See above. 2907 operand legVecD() %{ 2908 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2909 match(VecD); 2910 2911 format %{ %} 2912 interface(REG_INTER); 2913 %} 2914 2915 // Replaces vec during post-selection cleanup. See above. 2916 operand vecX() %{ 2917 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2918 match(VecX); 2919 2920 format %{ %} 2921 interface(REG_INTER); 2922 %} 2923 2924 // Replaces legVec during post-selection cleanup. See above. 2925 operand legVecX() %{ 2926 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2927 match(VecX); 2928 2929 format %{ %} 2930 interface(REG_INTER); 2931 %} 2932 2933 // Replaces vec during post-selection cleanup. See above. 2934 operand vecY() %{ 2935 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2936 match(VecY); 2937 2938 format %{ %} 2939 interface(REG_INTER); 2940 %} 2941 2942 // Replaces legVec during post-selection cleanup. See above. 2943 operand legVecY() %{ 2944 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2945 match(VecY); 2946 2947 format %{ %} 2948 interface(REG_INTER); 2949 %} 2950 2951 // Replaces vec during post-selection cleanup. See above. 2952 operand vecZ() %{ 2953 constraint(ALLOC_IN_RC(vectorz_reg)); 2954 match(VecZ); 2955 2956 format %{ %} 2957 interface(REG_INTER); 2958 %} 2959 2960 // Replaces legVec during post-selection cleanup. See above. 2961 operand legVecZ() %{ 2962 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2963 match(VecZ); 2964 2965 format %{ %} 2966 interface(REG_INTER); 2967 %} 2968 2969 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2970 2971 // ============================================================================ 2972 2973 instruct ShouldNotReachHere() %{ 2974 match(Halt); 2975 format %{ "stop\t# ShouldNotReachHere" %} 2976 ins_encode %{ 2977 if (is_reachable()) { 2978 __ stop(_halt_reason); 2979 } 2980 %} 2981 ins_pipe(pipe_slow); 2982 %} 2983 2984 // ============================================================================ 2985 2986 instruct addF_reg(regF dst, regF src) %{ 2987 predicate((UseSSE>=1) && (UseAVX == 0)); 2988 match(Set dst (AddF dst src)); 2989 2990 format %{ "addss $dst, $src" %} 2991 ins_cost(150); 2992 ins_encode %{ 2993 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2994 %} 2995 ins_pipe(pipe_slow); 2996 %} 2997 2998 instruct addF_mem(regF dst, memory src) %{ 2999 predicate((UseSSE>=1) && (UseAVX == 0)); 3000 match(Set dst (AddF dst (LoadF src))); 3001 3002 format %{ "addss $dst, $src" %} 3003 ins_cost(150); 3004 ins_encode %{ 3005 __ addss($dst$$XMMRegister, $src$$Address); 3006 %} 3007 ins_pipe(pipe_slow); 3008 %} 3009 3010 instruct addF_imm(regF dst, immF con) %{ 3011 predicate((UseSSE>=1) && (UseAVX == 0)); 3012 match(Set dst (AddF dst con)); 3013 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3014 ins_cost(150); 3015 ins_encode %{ 3016 __ addss($dst$$XMMRegister, $constantaddress($con)); 3017 %} 3018 ins_pipe(pipe_slow); 3019 %} 3020 3021 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 3022 predicate(UseAVX > 0); 3023 match(Set dst (AddF src1 src2)); 3024 3025 format %{ "vaddss $dst, $src1, $src2" %} 3026 ins_cost(150); 3027 ins_encode %{ 3028 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3029 %} 3030 ins_pipe(pipe_slow); 3031 %} 3032 3033 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 3034 predicate(UseAVX > 0); 3035 match(Set dst (AddF src1 (LoadF src2))); 3036 3037 format %{ "vaddss $dst, $src1, $src2" %} 3038 ins_cost(150); 3039 ins_encode %{ 3040 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3041 %} 3042 ins_pipe(pipe_slow); 3043 %} 3044 3045 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 3046 predicate(UseAVX > 0); 3047 match(Set dst (AddF src con)); 3048 3049 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3050 ins_cost(150); 3051 ins_encode %{ 3052 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3053 %} 3054 ins_pipe(pipe_slow); 3055 %} 3056 3057 instruct addD_reg(regD dst, regD src) %{ 3058 predicate((UseSSE>=2) && (UseAVX == 0)); 3059 match(Set dst (AddD dst src)); 3060 3061 format %{ "addsd $dst, $src" %} 3062 ins_cost(150); 3063 ins_encode %{ 3064 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 3065 %} 3066 ins_pipe(pipe_slow); 3067 %} 3068 3069 instruct addD_mem(regD dst, memory src) %{ 3070 predicate((UseSSE>=2) && (UseAVX == 0)); 3071 match(Set dst (AddD dst (LoadD src))); 3072 3073 format %{ "addsd $dst, $src" %} 3074 ins_cost(150); 3075 ins_encode %{ 3076 __ addsd($dst$$XMMRegister, $src$$Address); 3077 %} 3078 ins_pipe(pipe_slow); 3079 %} 3080 3081 instruct addD_imm(regD dst, immD con) %{ 3082 predicate((UseSSE>=2) && (UseAVX == 0)); 3083 match(Set dst (AddD dst con)); 3084 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3085 ins_cost(150); 3086 ins_encode %{ 3087 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3088 %} 3089 ins_pipe(pipe_slow); 3090 %} 3091 3092 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3093 predicate(UseAVX > 0); 3094 match(Set dst (AddD src1 src2)); 3095 3096 format %{ "vaddsd $dst, $src1, $src2" %} 3097 ins_cost(150); 3098 ins_encode %{ 3099 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3100 %} 3101 ins_pipe(pipe_slow); 3102 %} 3103 3104 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3105 predicate(UseAVX > 0); 3106 match(Set dst (AddD src1 (LoadD src2))); 3107 3108 format %{ "vaddsd $dst, $src1, $src2" %} 3109 ins_cost(150); 3110 ins_encode %{ 3111 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3112 %} 3113 ins_pipe(pipe_slow); 3114 %} 3115 3116 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3117 predicate(UseAVX > 0); 3118 match(Set dst (AddD src con)); 3119 3120 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3121 ins_cost(150); 3122 ins_encode %{ 3123 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3124 %} 3125 ins_pipe(pipe_slow); 3126 %} 3127 3128 instruct subF_reg(regF dst, regF src) %{ 3129 predicate((UseSSE>=1) && (UseAVX == 0)); 3130 match(Set dst (SubF dst src)); 3131 3132 format %{ "subss $dst, $src" %} 3133 ins_cost(150); 3134 ins_encode %{ 3135 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3136 %} 3137 ins_pipe(pipe_slow); 3138 %} 3139 3140 instruct subF_mem(regF dst, memory src) %{ 3141 predicate((UseSSE>=1) && (UseAVX == 0)); 3142 match(Set dst (SubF dst (LoadF src))); 3143 3144 format %{ "subss $dst, $src" %} 3145 ins_cost(150); 3146 ins_encode %{ 3147 __ subss($dst$$XMMRegister, $src$$Address); 3148 %} 3149 ins_pipe(pipe_slow); 3150 %} 3151 3152 instruct subF_imm(regF dst, immF con) %{ 3153 predicate((UseSSE>=1) && (UseAVX == 0)); 3154 match(Set dst (SubF dst con)); 3155 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3156 ins_cost(150); 3157 ins_encode %{ 3158 __ subss($dst$$XMMRegister, $constantaddress($con)); 3159 %} 3160 ins_pipe(pipe_slow); 3161 %} 3162 3163 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3164 predicate(UseAVX > 0); 3165 match(Set dst (SubF src1 src2)); 3166 3167 format %{ "vsubss $dst, $src1, $src2" %} 3168 ins_cost(150); 3169 ins_encode %{ 3170 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3171 %} 3172 ins_pipe(pipe_slow); 3173 %} 3174 3175 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3176 predicate(UseAVX > 0); 3177 match(Set dst (SubF src1 (LoadF src2))); 3178 3179 format %{ "vsubss $dst, $src1, $src2" %} 3180 ins_cost(150); 3181 ins_encode %{ 3182 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3183 %} 3184 ins_pipe(pipe_slow); 3185 %} 3186 3187 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3188 predicate(UseAVX > 0); 3189 match(Set dst (SubF src con)); 3190 3191 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3192 ins_cost(150); 3193 ins_encode %{ 3194 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3195 %} 3196 ins_pipe(pipe_slow); 3197 %} 3198 3199 instruct subD_reg(regD dst, regD src) %{ 3200 predicate((UseSSE>=2) && (UseAVX == 0)); 3201 match(Set dst (SubD dst src)); 3202 3203 format %{ "subsd $dst, $src" %} 3204 ins_cost(150); 3205 ins_encode %{ 3206 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3207 %} 3208 ins_pipe(pipe_slow); 3209 %} 3210 3211 instruct subD_mem(regD dst, memory src) %{ 3212 predicate((UseSSE>=2) && (UseAVX == 0)); 3213 match(Set dst (SubD dst (LoadD src))); 3214 3215 format %{ "subsd $dst, $src" %} 3216 ins_cost(150); 3217 ins_encode %{ 3218 __ subsd($dst$$XMMRegister, $src$$Address); 3219 %} 3220 ins_pipe(pipe_slow); 3221 %} 3222 3223 instruct subD_imm(regD dst, immD con) %{ 3224 predicate((UseSSE>=2) && (UseAVX == 0)); 3225 match(Set dst (SubD dst con)); 3226 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3227 ins_cost(150); 3228 ins_encode %{ 3229 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3230 %} 3231 ins_pipe(pipe_slow); 3232 %} 3233 3234 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3235 predicate(UseAVX > 0); 3236 match(Set dst (SubD src1 src2)); 3237 3238 format %{ "vsubsd $dst, $src1, $src2" %} 3239 ins_cost(150); 3240 ins_encode %{ 3241 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3242 %} 3243 ins_pipe(pipe_slow); 3244 %} 3245 3246 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3247 predicate(UseAVX > 0); 3248 match(Set dst (SubD src1 (LoadD src2))); 3249 3250 format %{ "vsubsd $dst, $src1, $src2" %} 3251 ins_cost(150); 3252 ins_encode %{ 3253 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3254 %} 3255 ins_pipe(pipe_slow); 3256 %} 3257 3258 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3259 predicate(UseAVX > 0); 3260 match(Set dst (SubD src con)); 3261 3262 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3263 ins_cost(150); 3264 ins_encode %{ 3265 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3266 %} 3267 ins_pipe(pipe_slow); 3268 %} 3269 3270 instruct mulF_reg(regF dst, regF src) %{ 3271 predicate((UseSSE>=1) && (UseAVX == 0)); 3272 match(Set dst (MulF dst src)); 3273 3274 format %{ "mulss $dst, $src" %} 3275 ins_cost(150); 3276 ins_encode %{ 3277 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3278 %} 3279 ins_pipe(pipe_slow); 3280 %} 3281 3282 instruct mulF_mem(regF dst, memory src) %{ 3283 predicate((UseSSE>=1) && (UseAVX == 0)); 3284 match(Set dst (MulF dst (LoadF src))); 3285 3286 format %{ "mulss $dst, $src" %} 3287 ins_cost(150); 3288 ins_encode %{ 3289 __ mulss($dst$$XMMRegister, $src$$Address); 3290 %} 3291 ins_pipe(pipe_slow); 3292 %} 3293 3294 instruct mulF_imm(regF dst, immF con) %{ 3295 predicate((UseSSE>=1) && (UseAVX == 0)); 3296 match(Set dst (MulF dst con)); 3297 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3298 ins_cost(150); 3299 ins_encode %{ 3300 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3301 %} 3302 ins_pipe(pipe_slow); 3303 %} 3304 3305 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3306 predicate(UseAVX > 0); 3307 match(Set dst (MulF src1 src2)); 3308 3309 format %{ "vmulss $dst, $src1, $src2" %} 3310 ins_cost(150); 3311 ins_encode %{ 3312 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3313 %} 3314 ins_pipe(pipe_slow); 3315 %} 3316 3317 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3318 predicate(UseAVX > 0); 3319 match(Set dst (MulF src1 (LoadF src2))); 3320 3321 format %{ "vmulss $dst, $src1, $src2" %} 3322 ins_cost(150); 3323 ins_encode %{ 3324 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3325 %} 3326 ins_pipe(pipe_slow); 3327 %} 3328 3329 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3330 predicate(UseAVX > 0); 3331 match(Set dst (MulF src con)); 3332 3333 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3334 ins_cost(150); 3335 ins_encode %{ 3336 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3337 %} 3338 ins_pipe(pipe_slow); 3339 %} 3340 3341 instruct mulD_reg(regD dst, regD src) %{ 3342 predicate((UseSSE>=2) && (UseAVX == 0)); 3343 match(Set dst (MulD dst src)); 3344 3345 format %{ "mulsd $dst, $src" %} 3346 ins_cost(150); 3347 ins_encode %{ 3348 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3349 %} 3350 ins_pipe(pipe_slow); 3351 %} 3352 3353 instruct mulD_mem(regD dst, memory src) %{ 3354 predicate((UseSSE>=2) && (UseAVX == 0)); 3355 match(Set dst (MulD dst (LoadD src))); 3356 3357 format %{ "mulsd $dst, $src" %} 3358 ins_cost(150); 3359 ins_encode %{ 3360 __ mulsd($dst$$XMMRegister, $src$$Address); 3361 %} 3362 ins_pipe(pipe_slow); 3363 %} 3364 3365 instruct mulD_imm(regD dst, immD con) %{ 3366 predicate((UseSSE>=2) && (UseAVX == 0)); 3367 match(Set dst (MulD dst con)); 3368 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3369 ins_cost(150); 3370 ins_encode %{ 3371 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3372 %} 3373 ins_pipe(pipe_slow); 3374 %} 3375 3376 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3377 predicate(UseAVX > 0); 3378 match(Set dst (MulD src1 src2)); 3379 3380 format %{ "vmulsd $dst, $src1, $src2" %} 3381 ins_cost(150); 3382 ins_encode %{ 3383 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3384 %} 3385 ins_pipe(pipe_slow); 3386 %} 3387 3388 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3389 predicate(UseAVX > 0); 3390 match(Set dst (MulD src1 (LoadD src2))); 3391 3392 format %{ "vmulsd $dst, $src1, $src2" %} 3393 ins_cost(150); 3394 ins_encode %{ 3395 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3396 %} 3397 ins_pipe(pipe_slow); 3398 %} 3399 3400 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3401 predicate(UseAVX > 0); 3402 match(Set dst (MulD src con)); 3403 3404 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3405 ins_cost(150); 3406 ins_encode %{ 3407 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3408 %} 3409 ins_pipe(pipe_slow); 3410 %} 3411 3412 instruct divF_reg(regF dst, regF src) %{ 3413 predicate((UseSSE>=1) && (UseAVX == 0)); 3414 match(Set dst (DivF dst src)); 3415 3416 format %{ "divss $dst, $src" %} 3417 ins_cost(150); 3418 ins_encode %{ 3419 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3420 %} 3421 ins_pipe(pipe_slow); 3422 %} 3423 3424 instruct divF_mem(regF dst, memory src) %{ 3425 predicate((UseSSE>=1) && (UseAVX == 0)); 3426 match(Set dst (DivF dst (LoadF src))); 3427 3428 format %{ "divss $dst, $src" %} 3429 ins_cost(150); 3430 ins_encode %{ 3431 __ divss($dst$$XMMRegister, $src$$Address); 3432 %} 3433 ins_pipe(pipe_slow); 3434 %} 3435 3436 instruct divF_imm(regF dst, immF con) %{ 3437 predicate((UseSSE>=1) && (UseAVX == 0)); 3438 match(Set dst (DivF dst con)); 3439 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3440 ins_cost(150); 3441 ins_encode %{ 3442 __ divss($dst$$XMMRegister, $constantaddress($con)); 3443 %} 3444 ins_pipe(pipe_slow); 3445 %} 3446 3447 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3448 predicate(UseAVX > 0); 3449 match(Set dst (DivF src1 src2)); 3450 3451 format %{ "vdivss $dst, $src1, $src2" %} 3452 ins_cost(150); 3453 ins_encode %{ 3454 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3455 %} 3456 ins_pipe(pipe_slow); 3457 %} 3458 3459 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3460 predicate(UseAVX > 0); 3461 match(Set dst (DivF src1 (LoadF src2))); 3462 3463 format %{ "vdivss $dst, $src1, $src2" %} 3464 ins_cost(150); 3465 ins_encode %{ 3466 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3467 %} 3468 ins_pipe(pipe_slow); 3469 %} 3470 3471 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3472 predicate(UseAVX > 0); 3473 match(Set dst (DivF src con)); 3474 3475 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3476 ins_cost(150); 3477 ins_encode %{ 3478 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3479 %} 3480 ins_pipe(pipe_slow); 3481 %} 3482 3483 instruct divD_reg(regD dst, regD src) %{ 3484 predicate((UseSSE>=2) && (UseAVX == 0)); 3485 match(Set dst (DivD dst src)); 3486 3487 format %{ "divsd $dst, $src" %} 3488 ins_cost(150); 3489 ins_encode %{ 3490 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3491 %} 3492 ins_pipe(pipe_slow); 3493 %} 3494 3495 instruct divD_mem(regD dst, memory src) %{ 3496 predicate((UseSSE>=2) && (UseAVX == 0)); 3497 match(Set dst (DivD dst (LoadD src))); 3498 3499 format %{ "divsd $dst, $src" %} 3500 ins_cost(150); 3501 ins_encode %{ 3502 __ divsd($dst$$XMMRegister, $src$$Address); 3503 %} 3504 ins_pipe(pipe_slow); 3505 %} 3506 3507 instruct divD_imm(regD dst, immD con) %{ 3508 predicate((UseSSE>=2) && (UseAVX == 0)); 3509 match(Set dst (DivD dst con)); 3510 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3511 ins_cost(150); 3512 ins_encode %{ 3513 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3514 %} 3515 ins_pipe(pipe_slow); 3516 %} 3517 3518 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3519 predicate(UseAVX > 0); 3520 match(Set dst (DivD src1 src2)); 3521 3522 format %{ "vdivsd $dst, $src1, $src2" %} 3523 ins_cost(150); 3524 ins_encode %{ 3525 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3526 %} 3527 ins_pipe(pipe_slow); 3528 %} 3529 3530 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3531 predicate(UseAVX > 0); 3532 match(Set dst (DivD src1 (LoadD src2))); 3533 3534 format %{ "vdivsd $dst, $src1, $src2" %} 3535 ins_cost(150); 3536 ins_encode %{ 3537 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3538 %} 3539 ins_pipe(pipe_slow); 3540 %} 3541 3542 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3543 predicate(UseAVX > 0); 3544 match(Set dst (DivD src con)); 3545 3546 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3547 ins_cost(150); 3548 ins_encode %{ 3549 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3550 %} 3551 ins_pipe(pipe_slow); 3552 %} 3553 3554 instruct absF_reg(regF dst) %{ 3555 predicate((UseSSE>=1) && (UseAVX == 0)); 3556 match(Set dst (AbsF dst)); 3557 ins_cost(150); 3558 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3559 ins_encode %{ 3560 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3561 %} 3562 ins_pipe(pipe_slow); 3563 %} 3564 3565 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3566 predicate(UseAVX > 0); 3567 match(Set dst (AbsF src)); 3568 ins_cost(150); 3569 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3570 ins_encode %{ 3571 int vlen_enc = Assembler::AVX_128bit; 3572 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3573 ExternalAddress(float_signmask()), vlen_enc); 3574 %} 3575 ins_pipe(pipe_slow); 3576 %} 3577 3578 instruct absD_reg(regD dst) %{ 3579 predicate((UseSSE>=2) && (UseAVX == 0)); 3580 match(Set dst (AbsD dst)); 3581 ins_cost(150); 3582 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3583 "# abs double by sign masking" %} 3584 ins_encode %{ 3585 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3586 %} 3587 ins_pipe(pipe_slow); 3588 %} 3589 3590 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3591 predicate(UseAVX > 0); 3592 match(Set dst (AbsD src)); 3593 ins_cost(150); 3594 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3595 "# abs double by sign masking" %} 3596 ins_encode %{ 3597 int vlen_enc = Assembler::AVX_128bit; 3598 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3599 ExternalAddress(double_signmask()), vlen_enc); 3600 %} 3601 ins_pipe(pipe_slow); 3602 %} 3603 3604 instruct negF_reg(regF dst) %{ 3605 predicate((UseSSE>=1) && (UseAVX == 0)); 3606 match(Set dst (NegF dst)); 3607 ins_cost(150); 3608 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3609 ins_encode %{ 3610 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3611 %} 3612 ins_pipe(pipe_slow); 3613 %} 3614 3615 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3616 predicate(UseAVX > 0); 3617 match(Set dst (NegF src)); 3618 ins_cost(150); 3619 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3620 ins_encode %{ 3621 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3622 ExternalAddress(float_signflip())); 3623 %} 3624 ins_pipe(pipe_slow); 3625 %} 3626 3627 instruct negD_reg(regD dst) %{ 3628 predicate((UseSSE>=2) && (UseAVX == 0)); 3629 match(Set dst (NegD dst)); 3630 ins_cost(150); 3631 format %{ "xorpd $dst, [0x8000000000000000]\t" 3632 "# neg double by sign flipping" %} 3633 ins_encode %{ 3634 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3635 %} 3636 ins_pipe(pipe_slow); 3637 %} 3638 3639 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3640 predicate(UseAVX > 0); 3641 match(Set dst (NegD src)); 3642 ins_cost(150); 3643 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3644 "# neg double by sign flipping" %} 3645 ins_encode %{ 3646 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3647 ExternalAddress(double_signflip())); 3648 %} 3649 ins_pipe(pipe_slow); 3650 %} 3651 3652 // sqrtss instruction needs destination register to be pre initialized for best performance 3653 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3654 instruct sqrtF_reg(regF dst) %{ 3655 predicate(UseSSE>=1); 3656 match(Set dst (SqrtF dst)); 3657 format %{ "sqrtss $dst, $dst" %} 3658 ins_encode %{ 3659 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3660 %} 3661 ins_pipe(pipe_slow); 3662 %} 3663 3664 // sqrtsd instruction needs destination register to be pre initialized for best performance 3665 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3666 instruct sqrtD_reg(regD dst) %{ 3667 predicate(UseSSE>=2); 3668 match(Set dst (SqrtD dst)); 3669 format %{ "sqrtsd $dst, $dst" %} 3670 ins_encode %{ 3671 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3672 %} 3673 ins_pipe(pipe_slow); 3674 %} 3675 3676 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3677 effect(TEMP tmp); 3678 match(Set dst (ConvF2HF src)); 3679 ins_cost(125); 3680 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3681 ins_encode %{ 3682 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3683 %} 3684 ins_pipe( pipe_slow ); 3685 %} 3686 3687 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3688 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3689 effect(TEMP ktmp, TEMP rtmp); 3690 match(Set mem (StoreC mem (ConvF2HF src))); 3691 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3692 ins_encode %{ 3693 __ movl($rtmp$$Register, 0x1); 3694 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3695 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3696 %} 3697 ins_pipe( pipe_slow ); 3698 %} 3699 3700 instruct vconvF2HF(vec dst, vec src) %{ 3701 match(Set dst (VectorCastF2HF src)); 3702 format %{ "vector_conv_F2HF $dst $src" %} 3703 ins_encode %{ 3704 int vlen_enc = vector_length_encoding(this, $src); 3705 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3706 %} 3707 ins_pipe( pipe_slow ); 3708 %} 3709 3710 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3711 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3712 format %{ "vcvtps2ph $mem,$src" %} 3713 ins_encode %{ 3714 int vlen_enc = vector_length_encoding(this, $src); 3715 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3716 %} 3717 ins_pipe( pipe_slow ); 3718 %} 3719 3720 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3721 match(Set dst (ConvHF2F src)); 3722 format %{ "vcvtph2ps $dst,$src" %} 3723 ins_encode %{ 3724 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3725 %} 3726 ins_pipe( pipe_slow ); 3727 %} 3728 3729 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3730 match(Set dst (VectorCastHF2F (LoadVector mem))); 3731 format %{ "vcvtph2ps $dst,$mem" %} 3732 ins_encode %{ 3733 int vlen_enc = vector_length_encoding(this); 3734 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3735 %} 3736 ins_pipe( pipe_slow ); 3737 %} 3738 3739 instruct vconvHF2F(vec dst, vec src) %{ 3740 match(Set dst (VectorCastHF2F src)); 3741 ins_cost(125); 3742 format %{ "vector_conv_HF2F $dst,$src" %} 3743 ins_encode %{ 3744 int vlen_enc = vector_length_encoding(this); 3745 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3746 %} 3747 ins_pipe( pipe_slow ); 3748 %} 3749 3750 // ---------------------------------------- VectorReinterpret ------------------------------------ 3751 instruct reinterpret_mask(kReg dst) %{ 3752 predicate(n->bottom_type()->isa_vectmask() && 3753 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3754 match(Set dst (VectorReinterpret dst)); 3755 ins_cost(125); 3756 format %{ "vector_reinterpret $dst\t!" %} 3757 ins_encode %{ 3758 // empty 3759 %} 3760 ins_pipe( pipe_slow ); 3761 %} 3762 3763 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3764 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3765 n->bottom_type()->isa_vectmask() && 3766 n->in(1)->bottom_type()->isa_vectmask() && 3767 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3768 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3769 match(Set dst (VectorReinterpret src)); 3770 effect(TEMP xtmp); 3771 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3772 ins_encode %{ 3773 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3774 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3775 assert(src_sz == dst_sz , "src and dst size mismatch"); 3776 int vlen_enc = vector_length_encoding(src_sz); 3777 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3778 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3779 %} 3780 ins_pipe( pipe_slow ); 3781 %} 3782 3783 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3784 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3785 n->bottom_type()->isa_vectmask() && 3786 n->in(1)->bottom_type()->isa_vectmask() && 3787 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3788 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3789 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3790 match(Set dst (VectorReinterpret src)); 3791 effect(TEMP xtmp); 3792 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3793 ins_encode %{ 3794 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3795 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3796 assert(src_sz == dst_sz , "src and dst size mismatch"); 3797 int vlen_enc = vector_length_encoding(src_sz); 3798 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3799 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3800 %} 3801 ins_pipe( pipe_slow ); 3802 %} 3803 3804 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3805 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3806 n->bottom_type()->isa_vectmask() && 3807 n->in(1)->bottom_type()->isa_vectmask() && 3808 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3809 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3810 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3811 match(Set dst (VectorReinterpret src)); 3812 effect(TEMP xtmp); 3813 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3814 ins_encode %{ 3815 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3816 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3817 assert(src_sz == dst_sz , "src and dst size mismatch"); 3818 int vlen_enc = vector_length_encoding(src_sz); 3819 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3820 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3821 %} 3822 ins_pipe( pipe_slow ); 3823 %} 3824 3825 instruct reinterpret(vec dst) %{ 3826 predicate(!n->bottom_type()->isa_vectmask() && 3827 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3828 match(Set dst (VectorReinterpret dst)); 3829 ins_cost(125); 3830 format %{ "vector_reinterpret $dst\t!" %} 3831 ins_encode %{ 3832 // empty 3833 %} 3834 ins_pipe( pipe_slow ); 3835 %} 3836 3837 instruct reinterpret_expand(vec dst, vec src) %{ 3838 predicate(UseAVX == 0 && 3839 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3840 match(Set dst (VectorReinterpret src)); 3841 ins_cost(125); 3842 effect(TEMP dst); 3843 format %{ "vector_reinterpret_expand $dst,$src" %} 3844 ins_encode %{ 3845 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3846 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3847 3848 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3849 if (src_vlen_in_bytes == 4) { 3850 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3851 } else { 3852 assert(src_vlen_in_bytes == 8, ""); 3853 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3854 } 3855 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3856 %} 3857 ins_pipe( pipe_slow ); 3858 %} 3859 3860 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3861 predicate(UseAVX > 0 && 3862 !n->bottom_type()->isa_vectmask() && 3863 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3864 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3865 match(Set dst (VectorReinterpret src)); 3866 ins_cost(125); 3867 format %{ "vector_reinterpret_expand $dst,$src" %} 3868 ins_encode %{ 3869 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3870 %} 3871 ins_pipe( pipe_slow ); 3872 %} 3873 3874 3875 instruct vreinterpret_expand(legVec dst, vec src) %{ 3876 predicate(UseAVX > 0 && 3877 !n->bottom_type()->isa_vectmask() && 3878 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3879 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3880 match(Set dst (VectorReinterpret src)); 3881 ins_cost(125); 3882 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3883 ins_encode %{ 3884 switch (Matcher::vector_length_in_bytes(this, $src)) { 3885 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3886 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3887 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3888 default: ShouldNotReachHere(); 3889 } 3890 %} 3891 ins_pipe( pipe_slow ); 3892 %} 3893 3894 instruct reinterpret_shrink(vec dst, legVec src) %{ 3895 predicate(!n->bottom_type()->isa_vectmask() && 3896 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3897 match(Set dst (VectorReinterpret src)); 3898 ins_cost(125); 3899 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3900 ins_encode %{ 3901 switch (Matcher::vector_length_in_bytes(this)) { 3902 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3903 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3904 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3905 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3906 default: ShouldNotReachHere(); 3907 } 3908 %} 3909 ins_pipe( pipe_slow ); 3910 %} 3911 3912 // ---------------------------------------------------------------------------------------------------- 3913 3914 #ifdef _LP64 3915 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3916 match(Set dst (RoundDoubleMode src rmode)); 3917 format %{ "roundsd $dst,$src" %} 3918 ins_cost(150); 3919 ins_encode %{ 3920 assert(UseSSE >= 4, "required"); 3921 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3922 %} 3923 ins_pipe(pipe_slow); 3924 %} 3925 3926 instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{ 3927 match(Set dst (RoundDoubleMode (LoadD src) rmode)); 3928 format %{ "roundsd $dst,$src" %} 3929 ins_cost(150); 3930 ins_encode %{ 3931 assert(UseSSE >= 4, "required"); 3932 __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant); 3933 %} 3934 ins_pipe(pipe_slow); 3935 %} 3936 3937 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3938 match(Set dst (RoundDoubleMode con rmode)); 3939 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3940 ins_cost(150); 3941 ins_encode %{ 3942 assert(UseSSE >= 4, "required"); 3943 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3944 %} 3945 ins_pipe(pipe_slow); 3946 %} 3947 3948 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3949 predicate(Matcher::vector_length(n) < 8); 3950 match(Set dst (RoundDoubleModeV src rmode)); 3951 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3952 ins_encode %{ 3953 assert(UseAVX > 0, "required"); 3954 int vlen_enc = vector_length_encoding(this); 3955 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3956 %} 3957 ins_pipe( pipe_slow ); 3958 %} 3959 3960 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3961 predicate(Matcher::vector_length(n) == 8); 3962 match(Set dst (RoundDoubleModeV src rmode)); 3963 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3964 ins_encode %{ 3965 assert(UseAVX > 2, "required"); 3966 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3967 %} 3968 ins_pipe( pipe_slow ); 3969 %} 3970 3971 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3972 predicate(Matcher::vector_length(n) < 8); 3973 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3974 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3975 ins_encode %{ 3976 assert(UseAVX > 0, "required"); 3977 int vlen_enc = vector_length_encoding(this); 3978 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3979 %} 3980 ins_pipe( pipe_slow ); 3981 %} 3982 3983 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3984 predicate(Matcher::vector_length(n) == 8); 3985 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3986 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3987 ins_encode %{ 3988 assert(UseAVX > 2, "required"); 3989 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3990 %} 3991 ins_pipe( pipe_slow ); 3992 %} 3993 #endif // _LP64 3994 3995 instruct onspinwait() %{ 3996 match(OnSpinWait); 3997 ins_cost(200); 3998 3999 format %{ 4000 $$template 4001 $$emit$$"pause\t! membar_onspinwait" 4002 %} 4003 ins_encode %{ 4004 __ pause(); 4005 %} 4006 ins_pipe(pipe_slow); 4007 %} 4008 4009 // a * b + c 4010 instruct fmaD_reg(regD a, regD b, regD c) %{ 4011 match(Set c (FmaD c (Binary a b))); 4012 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 4013 ins_cost(150); 4014 ins_encode %{ 4015 assert(UseFMA, "Needs FMA instructions support."); 4016 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4017 %} 4018 ins_pipe( pipe_slow ); 4019 %} 4020 4021 // a * b + c 4022 instruct fmaF_reg(regF a, regF b, regF c) %{ 4023 match(Set c (FmaF c (Binary a b))); 4024 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 4025 ins_cost(150); 4026 ins_encode %{ 4027 assert(UseFMA, "Needs FMA instructions support."); 4028 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4029 %} 4030 ins_pipe( pipe_slow ); 4031 %} 4032 4033 // ====================VECTOR INSTRUCTIONS===================================== 4034 4035 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4036 instruct MoveVec2Leg(legVec dst, vec src) %{ 4037 match(Set dst src); 4038 format %{ "" %} 4039 ins_encode %{ 4040 ShouldNotReachHere(); 4041 %} 4042 ins_pipe( fpu_reg_reg ); 4043 %} 4044 4045 instruct MoveLeg2Vec(vec dst, legVec src) %{ 4046 match(Set dst src); 4047 format %{ "" %} 4048 ins_encode %{ 4049 ShouldNotReachHere(); 4050 %} 4051 ins_pipe( fpu_reg_reg ); 4052 %} 4053 4054 // ============================================================================ 4055 4056 // Load vectors generic operand pattern 4057 instruct loadV(vec dst, memory mem) %{ 4058 match(Set dst (LoadVector mem)); 4059 ins_cost(125); 4060 format %{ "load_vector $dst,$mem" %} 4061 ins_encode %{ 4062 __ load_vector($dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 4063 %} 4064 ins_pipe( pipe_slow ); 4065 %} 4066 4067 // Store vectors generic operand pattern. 4068 instruct storeV(memory mem, vec src) %{ 4069 match(Set mem (StoreVector mem src)); 4070 ins_cost(145); 4071 format %{ "store_vector $mem,$src\n\t" %} 4072 ins_encode %{ 4073 switch (Matcher::vector_length_in_bytes(this, $src)) { 4074 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 4075 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 4076 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 4077 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 4078 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 4079 default: ShouldNotReachHere(); 4080 } 4081 %} 4082 ins_pipe( pipe_slow ); 4083 %} 4084 4085 // ---------------------------------------- Gather ------------------------------------ 4086 4087 // Gather INT, LONG, FLOAT, DOUBLE 4088 4089 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 4090 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 4091 match(Set dst (LoadVectorGather mem idx)); 4092 effect(TEMP dst, TEMP tmp, TEMP mask); 4093 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 4094 ins_encode %{ 4095 assert(UseAVX >= 2, "sanity"); 4096 4097 int vlen_enc = vector_length_encoding(this); 4098 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4099 4100 assert(Matcher::vector_length_in_bytes(this) >= 16, "sanity"); 4101 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4102 4103 if (vlen_enc == Assembler::AVX_128bit) { 4104 __ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), noreg); 4105 } else { 4106 __ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), noreg); 4107 } 4108 __ lea($tmp$$Register, $mem$$Address); 4109 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4110 %} 4111 ins_pipe( pipe_slow ); 4112 %} 4113 4114 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 4115 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 4116 match(Set dst (LoadVectorGather mem idx)); 4117 effect(TEMP dst, TEMP tmp, TEMP ktmp); 4118 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 4119 ins_encode %{ 4120 assert(UseAVX > 2, "sanity"); 4121 4122 int vlen_enc = vector_length_encoding(this); 4123 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4124 4125 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4126 4127 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4128 __ lea($tmp$$Register, $mem$$Address); 4129 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4130 %} 4131 ins_pipe( pipe_slow ); 4132 %} 4133 4134 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4135 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4136 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4137 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4138 ins_encode %{ 4139 assert(UseAVX > 2, "sanity"); 4140 int vlen_enc = vector_length_encoding(this); 4141 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4142 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4143 // Note: Since gather instruction partially updates the opmask register used 4144 // for predication hense moving mask operand to a temporary. 4145 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4146 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4147 __ lea($tmp$$Register, $mem$$Address); 4148 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4149 %} 4150 ins_pipe( pipe_slow ); 4151 %} 4152 // ====================Scatter======================================= 4153 4154 // Scatter INT, LONG, FLOAT, DOUBLE 4155 4156 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4157 predicate(UseAVX > 2); 4158 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4159 effect(TEMP tmp, TEMP ktmp); 4160 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4161 ins_encode %{ 4162 int vlen_enc = vector_length_encoding(this, $src); 4163 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4164 4165 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4166 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4167 4168 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4169 __ lea($tmp$$Register, $mem$$Address); 4170 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4171 %} 4172 ins_pipe( pipe_slow ); 4173 %} 4174 4175 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4176 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4177 effect(TEMP tmp, TEMP ktmp); 4178 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4179 ins_encode %{ 4180 int vlen_enc = vector_length_encoding(this, $src); 4181 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4182 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4183 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4184 // Note: Since scatter instruction partially updates the opmask register used 4185 // for predication hense moving mask operand to a temporary. 4186 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4187 __ lea($tmp$$Register, $mem$$Address); 4188 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4189 %} 4190 ins_pipe( pipe_slow ); 4191 %} 4192 4193 // ====================REPLICATE======================================= 4194 4195 // Replicate byte scalar to be vector 4196 instruct vReplB_reg(vec dst, rRegI src) %{ 4197 predicate(UseAVX >= 2); 4198 match(Set dst (ReplicateB src)); 4199 format %{ "replicateB $dst,$src" %} 4200 ins_encode %{ 4201 uint vlen = Matcher::vector_length(this); 4202 int vlen_enc = vector_length_encoding(this); 4203 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4204 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4205 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4206 } else { 4207 __ movdl($dst$$XMMRegister, $src$$Register); 4208 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4209 } 4210 %} 4211 ins_pipe( pipe_slow ); 4212 %} 4213 4214 instruct ReplB_reg(vec dst, rRegI src) %{ 4215 predicate(UseAVX < 2); 4216 match(Set dst (ReplicateB src)); 4217 format %{ "replicateB $dst,$src" %} 4218 ins_encode %{ 4219 uint vlen = Matcher::vector_length(this); 4220 __ movdl($dst$$XMMRegister, $src$$Register); 4221 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4222 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4223 if (vlen >= 16) { 4224 assert(vlen == 16, ""); 4225 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4226 } 4227 %} 4228 ins_pipe( pipe_slow ); 4229 %} 4230 4231 instruct ReplB_mem(vec dst, memory mem) %{ 4232 predicate(UseAVX >= 2); 4233 match(Set dst (ReplicateB (LoadB mem))); 4234 format %{ "replicateB $dst,$mem" %} 4235 ins_encode %{ 4236 int vlen_enc = vector_length_encoding(this); 4237 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4238 %} 4239 ins_pipe( pipe_slow ); 4240 %} 4241 4242 // ====================ReplicateS======================================= 4243 4244 instruct vReplS_reg(vec dst, rRegI src) %{ 4245 predicate(UseAVX >= 2); 4246 match(Set dst (ReplicateS src)); 4247 format %{ "replicateS $dst,$src" %} 4248 ins_encode %{ 4249 uint vlen = Matcher::vector_length(this); 4250 int vlen_enc = vector_length_encoding(this); 4251 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4252 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4253 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4254 } else { 4255 __ movdl($dst$$XMMRegister, $src$$Register); 4256 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4257 } 4258 %} 4259 ins_pipe( pipe_slow ); 4260 %} 4261 4262 instruct ReplS_reg(vec dst, rRegI src) %{ 4263 predicate(UseAVX < 2); 4264 match(Set dst (ReplicateS src)); 4265 format %{ "replicateS $dst,$src" %} 4266 ins_encode %{ 4267 uint vlen = Matcher::vector_length(this); 4268 int vlen_enc = vector_length_encoding(this); 4269 __ movdl($dst$$XMMRegister, $src$$Register); 4270 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4271 if (vlen >= 8) { 4272 assert(vlen == 8, ""); 4273 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4274 } 4275 %} 4276 ins_pipe( pipe_slow ); 4277 %} 4278 4279 instruct ReplS_mem(vec dst, memory mem) %{ 4280 predicate(UseAVX >= 2); 4281 match(Set dst (ReplicateS (LoadS mem))); 4282 format %{ "replicateS $dst,$mem" %} 4283 ins_encode %{ 4284 int vlen_enc = vector_length_encoding(this); 4285 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4286 %} 4287 ins_pipe( pipe_slow ); 4288 %} 4289 4290 // ====================ReplicateI======================================= 4291 4292 instruct ReplI_reg(vec dst, rRegI src) %{ 4293 match(Set dst (ReplicateI src)); 4294 format %{ "replicateI $dst,$src" %} 4295 ins_encode %{ 4296 uint vlen = Matcher::vector_length(this); 4297 int vlen_enc = vector_length_encoding(this); 4298 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4299 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4300 } else if (VM_Version::supports_avx2()) { 4301 __ movdl($dst$$XMMRegister, $src$$Register); 4302 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4303 } else { 4304 __ movdl($dst$$XMMRegister, $src$$Register); 4305 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4306 } 4307 %} 4308 ins_pipe( pipe_slow ); 4309 %} 4310 4311 instruct ReplI_mem(vec dst, memory mem) %{ 4312 match(Set dst (ReplicateI (LoadI mem))); 4313 format %{ "replicateI $dst,$mem" %} 4314 ins_encode %{ 4315 int vlen_enc = vector_length_encoding(this); 4316 if (VM_Version::supports_avx2()) { 4317 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4318 } else if (VM_Version::supports_avx()) { 4319 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4320 } else { 4321 __ movdl($dst$$XMMRegister, $mem$$Address); 4322 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4323 } 4324 %} 4325 ins_pipe( pipe_slow ); 4326 %} 4327 4328 instruct ReplI_imm(vec dst, immI con) %{ 4329 match(Set dst (ReplicateB con)); 4330 match(Set dst (ReplicateS con)); 4331 match(Set dst (ReplicateI con)); 4332 format %{ "replicateI $dst,$con" %} 4333 ins_encode %{ 4334 InternalAddress addr = $constantaddress(Matcher::vector_element_basic_type(this), 4335 vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4336 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 8) / 4337 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4338 BasicType bt = Matcher::vector_element_basic_type(this); 4339 int vlen = Matcher::vector_length_in_bytes(this); 4340 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4341 %} 4342 ins_pipe( pipe_slow ); 4343 %} 4344 4345 // Replicate scalar zero to be vector 4346 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4347 match(Set dst (ReplicateB zero)); 4348 match(Set dst (ReplicateS zero)); 4349 match(Set dst (ReplicateI zero)); 4350 format %{ "replicateI $dst,$zero" %} 4351 ins_encode %{ 4352 int vlen_enc = vector_length_encoding(this); 4353 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4354 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4355 } else { 4356 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4357 } 4358 %} 4359 ins_pipe( fpu_reg_reg ); 4360 %} 4361 4362 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4363 predicate(UseSSE >= 2); 4364 match(Set dst (ReplicateB con)); 4365 match(Set dst (ReplicateS con)); 4366 match(Set dst (ReplicateI con)); 4367 format %{ "vallones $dst" %} 4368 ins_encode %{ 4369 int vector_len = vector_length_encoding(this); 4370 __ vallones($dst$$XMMRegister, vector_len); 4371 %} 4372 ins_pipe( pipe_slow ); 4373 %} 4374 4375 // ====================ReplicateL======================================= 4376 4377 #ifdef _LP64 4378 // Replicate long (8 byte) scalar to be vector 4379 instruct ReplL_reg(vec dst, rRegL src) %{ 4380 match(Set dst (ReplicateL src)); 4381 format %{ "replicateL $dst,$src" %} 4382 ins_encode %{ 4383 int vlen = Matcher::vector_length(this); 4384 int vlen_enc = vector_length_encoding(this); 4385 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4386 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4387 } else if (VM_Version::supports_avx2()) { 4388 __ movdq($dst$$XMMRegister, $src$$Register); 4389 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4390 } else { 4391 __ movdq($dst$$XMMRegister, $src$$Register); 4392 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4393 } 4394 %} 4395 ins_pipe( pipe_slow ); 4396 %} 4397 #else // _LP64 4398 // Replicate long (8 byte) scalar to be vector 4399 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 4400 predicate(Matcher::vector_length(n) <= 4); 4401 match(Set dst (ReplicateL src)); 4402 effect(TEMP dst, USE src, TEMP tmp); 4403 format %{ "replicateL $dst,$src" %} 4404 ins_encode %{ 4405 uint vlen = Matcher::vector_length(this); 4406 if (vlen == 2) { 4407 __ movdl($dst$$XMMRegister, $src$$Register); 4408 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4409 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4410 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4411 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4412 int vlen_enc = Assembler::AVX_256bit; 4413 __ movdl($dst$$XMMRegister, $src$$Register); 4414 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4415 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4416 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4417 } else { 4418 __ movdl($dst$$XMMRegister, $src$$Register); 4419 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4420 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4421 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4422 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4423 } 4424 %} 4425 ins_pipe( pipe_slow ); 4426 %} 4427 4428 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 4429 predicate(Matcher::vector_length(n) == 8); 4430 match(Set dst (ReplicateL src)); 4431 effect(TEMP dst, USE src, TEMP tmp); 4432 format %{ "replicateL $dst,$src" %} 4433 ins_encode %{ 4434 if (VM_Version::supports_avx512vl()) { 4435 __ movdl($dst$$XMMRegister, $src$$Register); 4436 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4437 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4438 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4439 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4440 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4441 } else { 4442 int vlen_enc = Assembler::AVX_512bit; 4443 __ movdl($dst$$XMMRegister, $src$$Register); 4444 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4445 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4446 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4447 } 4448 %} 4449 ins_pipe( pipe_slow ); 4450 %} 4451 #endif // _LP64 4452 4453 instruct ReplL_mem(vec dst, memory mem) %{ 4454 match(Set dst (ReplicateL (LoadL mem))); 4455 format %{ "replicateL $dst,$mem" %} 4456 ins_encode %{ 4457 int vlen_enc = vector_length_encoding(this); 4458 if (VM_Version::supports_avx2()) { 4459 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4460 } else if (VM_Version::supports_sse3()) { 4461 __ movddup($dst$$XMMRegister, $mem$$Address); 4462 } else { 4463 __ movq($dst$$XMMRegister, $mem$$Address); 4464 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4465 } 4466 %} 4467 ins_pipe( pipe_slow ); 4468 %} 4469 4470 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4471 instruct ReplL_imm(vec dst, immL con) %{ 4472 match(Set dst (ReplicateL con)); 4473 format %{ "replicateL $dst,$con" %} 4474 ins_encode %{ 4475 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, 1)); 4476 int vlen = Matcher::vector_length_in_bytes(this); 4477 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4478 %} 4479 ins_pipe( pipe_slow ); 4480 %} 4481 4482 instruct ReplL_zero(vec dst, immL0 zero) %{ 4483 match(Set dst (ReplicateL zero)); 4484 format %{ "replicateL $dst,$zero" %} 4485 ins_encode %{ 4486 int vlen_enc = vector_length_encoding(this); 4487 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4488 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4489 } else { 4490 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4491 } 4492 %} 4493 ins_pipe( fpu_reg_reg ); 4494 %} 4495 4496 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4497 predicate(UseSSE >= 2); 4498 match(Set dst (ReplicateL con)); 4499 format %{ "vallones $dst" %} 4500 ins_encode %{ 4501 int vector_len = vector_length_encoding(this); 4502 __ vallones($dst$$XMMRegister, vector_len); 4503 %} 4504 ins_pipe( pipe_slow ); 4505 %} 4506 4507 // ====================ReplicateF======================================= 4508 4509 instruct vReplF_reg(vec dst, vlRegF src) %{ 4510 predicate(UseAVX > 0); 4511 match(Set dst (ReplicateF src)); 4512 format %{ "replicateF $dst,$src" %} 4513 ins_encode %{ 4514 uint vlen = Matcher::vector_length(this); 4515 int vlen_enc = vector_length_encoding(this); 4516 if (vlen <= 4) { 4517 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4518 } else if (VM_Version::supports_avx2()) { 4519 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4520 } else { 4521 assert(vlen == 8, "sanity"); 4522 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4523 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4524 } 4525 %} 4526 ins_pipe( pipe_slow ); 4527 %} 4528 4529 instruct ReplF_reg(vec dst, vlRegF src) %{ 4530 predicate(UseAVX == 0); 4531 match(Set dst (ReplicateF src)); 4532 format %{ "replicateF $dst,$src" %} 4533 ins_encode %{ 4534 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4535 %} 4536 ins_pipe( pipe_slow ); 4537 %} 4538 4539 instruct ReplF_mem(vec dst, memory mem) %{ 4540 predicate(UseAVX > 0); 4541 match(Set dst (ReplicateF (LoadF mem))); 4542 format %{ "replicateF $dst,$mem" %} 4543 ins_encode %{ 4544 int vlen_enc = vector_length_encoding(this); 4545 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4546 %} 4547 ins_pipe( pipe_slow ); 4548 %} 4549 4550 // Replicate float scalar immediate to be vector by loading from const table. 4551 instruct ReplF_imm(vec dst, immF con) %{ 4552 match(Set dst (ReplicateF con)); 4553 format %{ "replicateF $dst,$con" %} 4554 ins_encode %{ 4555 InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant, 4556 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 2)); 4557 int vlen = Matcher::vector_length_in_bytes(this); 4558 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4559 %} 4560 ins_pipe( pipe_slow ); 4561 %} 4562 4563 instruct ReplF_zero(vec dst, immF0 zero) %{ 4564 match(Set dst (ReplicateF zero)); 4565 format %{ "replicateF $dst,$zero" %} 4566 ins_encode %{ 4567 int vlen_enc = vector_length_encoding(this); 4568 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4569 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4570 } else { 4571 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4572 } 4573 %} 4574 ins_pipe( fpu_reg_reg ); 4575 %} 4576 4577 // ====================ReplicateD======================================= 4578 4579 // Replicate double (8 bytes) scalar to be vector 4580 instruct vReplD_reg(vec dst, vlRegD src) %{ 4581 predicate(UseSSE >= 3); 4582 match(Set dst (ReplicateD src)); 4583 format %{ "replicateD $dst,$src" %} 4584 ins_encode %{ 4585 uint vlen = Matcher::vector_length(this); 4586 int vlen_enc = vector_length_encoding(this); 4587 if (vlen <= 2) { 4588 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4589 } else if (VM_Version::supports_avx2()) { 4590 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4591 } else { 4592 assert(vlen == 4, "sanity"); 4593 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4594 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4595 } 4596 %} 4597 ins_pipe( pipe_slow ); 4598 %} 4599 4600 instruct ReplD_reg(vec dst, vlRegD src) %{ 4601 predicate(UseSSE < 3); 4602 match(Set dst (ReplicateD src)); 4603 format %{ "replicateD $dst,$src" %} 4604 ins_encode %{ 4605 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4606 %} 4607 ins_pipe( pipe_slow ); 4608 %} 4609 4610 instruct ReplD_mem(vec dst, memory mem) %{ 4611 predicate(UseSSE >= 3); 4612 match(Set dst (ReplicateD (LoadD mem))); 4613 format %{ "replicateD $dst,$mem" %} 4614 ins_encode %{ 4615 if (Matcher::vector_length(this) >= 4) { 4616 int vlen_enc = vector_length_encoding(this); 4617 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4618 } else { 4619 __ movddup($dst$$XMMRegister, $mem$$Address); 4620 } 4621 %} 4622 ins_pipe( pipe_slow ); 4623 %} 4624 4625 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4626 instruct ReplD_imm(vec dst, immD con) %{ 4627 match(Set dst (ReplicateD con)); 4628 format %{ "replicateD $dst,$con" %} 4629 ins_encode %{ 4630 InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, 1)); 4631 int vlen = Matcher::vector_length_in_bytes(this); 4632 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4633 %} 4634 ins_pipe( pipe_slow ); 4635 %} 4636 4637 instruct ReplD_zero(vec dst, immD0 zero) %{ 4638 match(Set dst (ReplicateD zero)); 4639 format %{ "replicateD $dst,$zero" %} 4640 ins_encode %{ 4641 int vlen_enc = vector_length_encoding(this); 4642 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4643 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4644 } else { 4645 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4646 } 4647 %} 4648 ins_pipe( fpu_reg_reg ); 4649 %} 4650 4651 // ====================VECTOR INSERT======================================= 4652 4653 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4654 predicate(Matcher::vector_length_in_bytes(n) < 32); 4655 match(Set dst (VectorInsert (Binary dst val) idx)); 4656 format %{ "vector_insert $dst,$val,$idx" %} 4657 ins_encode %{ 4658 assert(UseSSE >= 4, "required"); 4659 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4660 4661 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4662 4663 assert(is_integral_type(elem_bt), ""); 4664 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4665 4666 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4667 %} 4668 ins_pipe( pipe_slow ); 4669 %} 4670 4671 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4672 predicate(Matcher::vector_length_in_bytes(n) == 32); 4673 match(Set dst (VectorInsert (Binary src val) idx)); 4674 effect(TEMP vtmp); 4675 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4676 ins_encode %{ 4677 int vlen_enc = Assembler::AVX_256bit; 4678 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4679 int elem_per_lane = 16/type2aelembytes(elem_bt); 4680 int log2epr = log2(elem_per_lane); 4681 4682 assert(is_integral_type(elem_bt), "sanity"); 4683 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4684 4685 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4686 uint y_idx = ($idx$$constant >> log2epr) & 1; 4687 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4688 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4689 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4690 %} 4691 ins_pipe( pipe_slow ); 4692 %} 4693 4694 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4695 predicate(Matcher::vector_length_in_bytes(n) == 64); 4696 match(Set dst (VectorInsert (Binary src val) idx)); 4697 effect(TEMP vtmp); 4698 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4699 ins_encode %{ 4700 assert(UseAVX > 2, "sanity"); 4701 4702 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4703 int elem_per_lane = 16/type2aelembytes(elem_bt); 4704 int log2epr = log2(elem_per_lane); 4705 4706 assert(is_integral_type(elem_bt), ""); 4707 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4708 4709 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4710 uint y_idx = ($idx$$constant >> log2epr) & 3; 4711 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4712 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4713 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4714 %} 4715 ins_pipe( pipe_slow ); 4716 %} 4717 4718 #ifdef _LP64 4719 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4720 predicate(Matcher::vector_length(n) == 2); 4721 match(Set dst (VectorInsert (Binary dst val) idx)); 4722 format %{ "vector_insert $dst,$val,$idx" %} 4723 ins_encode %{ 4724 assert(UseSSE >= 4, "required"); 4725 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4726 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4727 4728 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4729 %} 4730 ins_pipe( pipe_slow ); 4731 %} 4732 4733 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4734 predicate(Matcher::vector_length(n) == 4); 4735 match(Set dst (VectorInsert (Binary src val) idx)); 4736 effect(TEMP vtmp); 4737 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4738 ins_encode %{ 4739 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4740 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4741 4742 uint x_idx = $idx$$constant & right_n_bits(1); 4743 uint y_idx = ($idx$$constant >> 1) & 1; 4744 int vlen_enc = Assembler::AVX_256bit; 4745 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4746 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4747 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4748 %} 4749 ins_pipe( pipe_slow ); 4750 %} 4751 4752 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4753 predicate(Matcher::vector_length(n) == 8); 4754 match(Set dst (VectorInsert (Binary src val) idx)); 4755 effect(TEMP vtmp); 4756 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4757 ins_encode %{ 4758 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4759 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4760 4761 uint x_idx = $idx$$constant & right_n_bits(1); 4762 uint y_idx = ($idx$$constant >> 1) & 3; 4763 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4764 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4765 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4766 %} 4767 ins_pipe( pipe_slow ); 4768 %} 4769 #endif 4770 4771 instruct insertF(vec dst, regF val, immU8 idx) %{ 4772 predicate(Matcher::vector_length(n) < 8); 4773 match(Set dst (VectorInsert (Binary dst val) idx)); 4774 format %{ "vector_insert $dst,$val,$idx" %} 4775 ins_encode %{ 4776 assert(UseSSE >= 4, "sanity"); 4777 4778 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4779 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4780 4781 uint x_idx = $idx$$constant & right_n_bits(2); 4782 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4783 %} 4784 ins_pipe( pipe_slow ); 4785 %} 4786 4787 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4788 predicate(Matcher::vector_length(n) >= 8); 4789 match(Set dst (VectorInsert (Binary src val) idx)); 4790 effect(TEMP vtmp); 4791 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4792 ins_encode %{ 4793 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4794 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4795 4796 int vlen = Matcher::vector_length(this); 4797 uint x_idx = $idx$$constant & right_n_bits(2); 4798 if (vlen == 8) { 4799 uint y_idx = ($idx$$constant >> 2) & 1; 4800 int vlen_enc = Assembler::AVX_256bit; 4801 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4802 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4803 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4804 } else { 4805 assert(vlen == 16, "sanity"); 4806 uint y_idx = ($idx$$constant >> 2) & 3; 4807 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4808 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4809 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4810 } 4811 %} 4812 ins_pipe( pipe_slow ); 4813 %} 4814 4815 #ifdef _LP64 4816 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4817 predicate(Matcher::vector_length(n) == 2); 4818 match(Set dst (VectorInsert (Binary dst val) idx)); 4819 effect(TEMP tmp); 4820 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4821 ins_encode %{ 4822 assert(UseSSE >= 4, "sanity"); 4823 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4824 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4825 4826 __ movq($tmp$$Register, $val$$XMMRegister); 4827 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4828 %} 4829 ins_pipe( pipe_slow ); 4830 %} 4831 4832 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4833 predicate(Matcher::vector_length(n) == 4); 4834 match(Set dst (VectorInsert (Binary src val) idx)); 4835 effect(TEMP vtmp, TEMP tmp); 4836 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4837 ins_encode %{ 4838 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4839 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4840 4841 uint x_idx = $idx$$constant & right_n_bits(1); 4842 uint y_idx = ($idx$$constant >> 1) & 1; 4843 int vlen_enc = Assembler::AVX_256bit; 4844 __ movq($tmp$$Register, $val$$XMMRegister); 4845 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4846 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4847 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4848 %} 4849 ins_pipe( pipe_slow ); 4850 %} 4851 4852 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4853 predicate(Matcher::vector_length(n) == 8); 4854 match(Set dst (VectorInsert (Binary src val) idx)); 4855 effect(TEMP tmp, TEMP vtmp); 4856 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4857 ins_encode %{ 4858 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4859 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4860 4861 uint x_idx = $idx$$constant & right_n_bits(1); 4862 uint y_idx = ($idx$$constant >> 1) & 3; 4863 __ movq($tmp$$Register, $val$$XMMRegister); 4864 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4865 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4866 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4867 %} 4868 ins_pipe( pipe_slow ); 4869 %} 4870 #endif 4871 4872 // ====================REDUCTION ARITHMETIC======================================= 4873 4874 // =======================Int Reduction========================================== 4875 4876 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4877 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 4878 match(Set dst (AddReductionVI src1 src2)); 4879 match(Set dst (MulReductionVI src1 src2)); 4880 match(Set dst (AndReductionV src1 src2)); 4881 match(Set dst ( OrReductionV src1 src2)); 4882 match(Set dst (XorReductionV src1 src2)); 4883 match(Set dst (MinReductionV src1 src2)); 4884 match(Set dst (MaxReductionV src1 src2)); 4885 effect(TEMP vtmp1, TEMP vtmp2); 4886 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4887 ins_encode %{ 4888 int opcode = this->ideal_Opcode(); 4889 int vlen = Matcher::vector_length(this, $src2); 4890 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4891 %} 4892 ins_pipe( pipe_slow ); 4893 %} 4894 4895 // =======================Long Reduction========================================== 4896 4897 #ifdef _LP64 4898 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4899 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 4900 match(Set dst (AddReductionVL src1 src2)); 4901 match(Set dst (MulReductionVL src1 src2)); 4902 match(Set dst (AndReductionV src1 src2)); 4903 match(Set dst ( OrReductionV src1 src2)); 4904 match(Set dst (XorReductionV src1 src2)); 4905 match(Set dst (MinReductionV src1 src2)); 4906 match(Set dst (MaxReductionV src1 src2)); 4907 effect(TEMP vtmp1, TEMP vtmp2); 4908 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4909 ins_encode %{ 4910 int opcode = this->ideal_Opcode(); 4911 int vlen = Matcher::vector_length(this, $src2); 4912 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4913 %} 4914 ins_pipe( pipe_slow ); 4915 %} 4916 4917 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 4918 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 4919 match(Set dst (AddReductionVL src1 src2)); 4920 match(Set dst (MulReductionVL src1 src2)); 4921 match(Set dst (AndReductionV src1 src2)); 4922 match(Set dst ( OrReductionV src1 src2)); 4923 match(Set dst (XorReductionV src1 src2)); 4924 match(Set dst (MinReductionV src1 src2)); 4925 match(Set dst (MaxReductionV src1 src2)); 4926 effect(TEMP vtmp1, TEMP vtmp2); 4927 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4928 ins_encode %{ 4929 int opcode = this->ideal_Opcode(); 4930 int vlen = Matcher::vector_length(this, $src2); 4931 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4932 %} 4933 ins_pipe( pipe_slow ); 4934 %} 4935 #endif // _LP64 4936 4937 // =======================Float Reduction========================================== 4938 4939 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 4940 predicate(Matcher::vector_length(n->in(2)) <= 4); // src 4941 match(Set dst (AddReductionVF dst src)); 4942 match(Set dst (MulReductionVF dst src)); 4943 effect(TEMP dst, TEMP vtmp); 4944 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 4945 ins_encode %{ 4946 int opcode = this->ideal_Opcode(); 4947 int vlen = Matcher::vector_length(this, $src); 4948 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4949 %} 4950 ins_pipe( pipe_slow ); 4951 %} 4952 4953 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 4954 predicate(Matcher::vector_length(n->in(2)) == 8); // src 4955 match(Set dst (AddReductionVF dst src)); 4956 match(Set dst (MulReductionVF dst src)); 4957 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4958 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4959 ins_encode %{ 4960 int opcode = this->ideal_Opcode(); 4961 int vlen = Matcher::vector_length(this, $src); 4962 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4963 %} 4964 ins_pipe( pipe_slow ); 4965 %} 4966 4967 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4968 predicate(Matcher::vector_length(n->in(2)) == 16); // src 4969 match(Set dst (AddReductionVF dst src)); 4970 match(Set dst (MulReductionVF dst src)); 4971 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4972 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4973 ins_encode %{ 4974 int opcode = this->ideal_Opcode(); 4975 int vlen = Matcher::vector_length(this, $src); 4976 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4977 %} 4978 ins_pipe( pipe_slow ); 4979 %} 4980 4981 // =======================Double Reduction========================================== 4982 4983 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 4984 predicate(Matcher::vector_length(n->in(2)) == 2); // src 4985 match(Set dst (AddReductionVD dst src)); 4986 match(Set dst (MulReductionVD dst src)); 4987 effect(TEMP dst, TEMP vtmp); 4988 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 4989 ins_encode %{ 4990 int opcode = this->ideal_Opcode(); 4991 int vlen = Matcher::vector_length(this, $src); 4992 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4993 %} 4994 ins_pipe( pipe_slow ); 4995 %} 4996 4997 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 4998 predicate(Matcher::vector_length(n->in(2)) == 4); // src 4999 match(Set dst (AddReductionVD dst src)); 5000 match(Set dst (MulReductionVD dst src)); 5001 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5002 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5003 ins_encode %{ 5004 int opcode = this->ideal_Opcode(); 5005 int vlen = Matcher::vector_length(this, $src); 5006 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5007 %} 5008 ins_pipe( pipe_slow ); 5009 %} 5010 5011 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5012 predicate(Matcher::vector_length(n->in(2)) == 8); // src 5013 match(Set dst (AddReductionVD dst src)); 5014 match(Set dst (MulReductionVD dst src)); 5015 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5016 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5017 ins_encode %{ 5018 int opcode = this->ideal_Opcode(); 5019 int vlen = Matcher::vector_length(this, $src); 5020 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5021 %} 5022 ins_pipe( pipe_slow ); 5023 %} 5024 5025 // =======================Byte Reduction========================================== 5026 5027 #ifdef _LP64 5028 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5029 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5030 match(Set dst (AddReductionVI src1 src2)); 5031 match(Set dst (AndReductionV src1 src2)); 5032 match(Set dst ( OrReductionV src1 src2)); 5033 match(Set dst (XorReductionV src1 src2)); 5034 match(Set dst (MinReductionV src1 src2)); 5035 match(Set dst (MaxReductionV src1 src2)); 5036 effect(TEMP vtmp1, TEMP vtmp2); 5037 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5038 ins_encode %{ 5039 int opcode = this->ideal_Opcode(); 5040 int vlen = Matcher::vector_length(this, $src2); 5041 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5042 %} 5043 ins_pipe( pipe_slow ); 5044 %} 5045 5046 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5047 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5048 match(Set dst (AddReductionVI src1 src2)); 5049 match(Set dst (AndReductionV src1 src2)); 5050 match(Set dst ( OrReductionV src1 src2)); 5051 match(Set dst (XorReductionV src1 src2)); 5052 match(Set dst (MinReductionV src1 src2)); 5053 match(Set dst (MaxReductionV src1 src2)); 5054 effect(TEMP vtmp1, TEMP vtmp2); 5055 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5056 ins_encode %{ 5057 int opcode = this->ideal_Opcode(); 5058 int vlen = Matcher::vector_length(this, $src2); 5059 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5060 %} 5061 ins_pipe( pipe_slow ); 5062 %} 5063 #endif 5064 5065 // =======================Short Reduction========================================== 5066 5067 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5068 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5069 match(Set dst (AddReductionVI src1 src2)); 5070 match(Set dst (MulReductionVI src1 src2)); 5071 match(Set dst (AndReductionV src1 src2)); 5072 match(Set dst ( OrReductionV src1 src2)); 5073 match(Set dst (XorReductionV src1 src2)); 5074 match(Set dst (MinReductionV src1 src2)); 5075 match(Set dst (MaxReductionV src1 src2)); 5076 effect(TEMP vtmp1, TEMP vtmp2); 5077 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5078 ins_encode %{ 5079 int opcode = this->ideal_Opcode(); 5080 int vlen = Matcher::vector_length(this, $src2); 5081 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5082 %} 5083 ins_pipe( pipe_slow ); 5084 %} 5085 5086 // =======================Mul Reduction========================================== 5087 5088 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5089 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5090 Matcher::vector_length(n->in(2)) <= 32); // src2 5091 match(Set dst (MulReductionVI src1 src2)); 5092 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5093 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5094 ins_encode %{ 5095 int opcode = this->ideal_Opcode(); 5096 int vlen = Matcher::vector_length(this, $src2); 5097 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5098 %} 5099 ins_pipe( pipe_slow ); 5100 %} 5101 5102 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5103 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5104 Matcher::vector_length(n->in(2)) == 64); // src2 5105 match(Set dst (MulReductionVI src1 src2)); 5106 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5107 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5108 ins_encode %{ 5109 int opcode = this->ideal_Opcode(); 5110 int vlen = Matcher::vector_length(this, $src2); 5111 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5112 %} 5113 ins_pipe( pipe_slow ); 5114 %} 5115 5116 //--------------------Min/Max Float Reduction -------------------- 5117 // Float Min Reduction 5118 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 5119 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5120 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5121 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5122 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5123 Matcher::vector_length(n->in(2)) == 2); 5124 match(Set dst (MinReductionV src1 src2)); 5125 match(Set dst (MaxReductionV src1 src2)); 5126 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5127 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5128 ins_encode %{ 5129 assert(UseAVX > 0, "sanity"); 5130 5131 int opcode = this->ideal_Opcode(); 5132 int vlen = Matcher::vector_length(this, $src2); 5133 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5134 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5135 %} 5136 ins_pipe( pipe_slow ); 5137 %} 5138 5139 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5140 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5141 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5142 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5143 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5144 Matcher::vector_length(n->in(2)) >= 4); 5145 match(Set dst (MinReductionV src1 src2)); 5146 match(Set dst (MaxReductionV src1 src2)); 5147 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5148 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5149 ins_encode %{ 5150 assert(UseAVX > 0, "sanity"); 5151 5152 int opcode = this->ideal_Opcode(); 5153 int vlen = Matcher::vector_length(this, $src2); 5154 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5155 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5156 %} 5157 ins_pipe( pipe_slow ); 5158 %} 5159 5160 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 5161 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5162 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5163 Matcher::vector_length(n->in(2)) == 2); 5164 match(Set dst (MinReductionV dst src)); 5165 match(Set dst (MaxReductionV dst src)); 5166 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5167 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5168 ins_encode %{ 5169 assert(UseAVX > 0, "sanity"); 5170 5171 int opcode = this->ideal_Opcode(); 5172 int vlen = Matcher::vector_length(this, $src); 5173 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5174 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5175 %} 5176 ins_pipe( pipe_slow ); 5177 %} 5178 5179 5180 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5181 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5182 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5183 Matcher::vector_length(n->in(2)) >= 4); 5184 match(Set dst (MinReductionV dst src)); 5185 match(Set dst (MaxReductionV dst src)); 5186 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5187 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5188 ins_encode %{ 5189 assert(UseAVX > 0, "sanity"); 5190 5191 int opcode = this->ideal_Opcode(); 5192 int vlen = Matcher::vector_length(this, $src); 5193 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5194 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5195 %} 5196 ins_pipe( pipe_slow ); 5197 %} 5198 5199 5200 //--------------------Min Double Reduction -------------------- 5201 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5202 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5203 rFlagsReg cr) %{ 5204 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5205 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5206 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5207 Matcher::vector_length(n->in(2)) == 2); 5208 match(Set dst (MinReductionV src1 src2)); 5209 match(Set dst (MaxReductionV src1 src2)); 5210 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5211 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5212 ins_encode %{ 5213 assert(UseAVX > 0, "sanity"); 5214 5215 int opcode = this->ideal_Opcode(); 5216 int vlen = Matcher::vector_length(this, $src2); 5217 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5218 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5219 %} 5220 ins_pipe( pipe_slow ); 5221 %} 5222 5223 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5224 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5225 rFlagsReg cr) %{ 5226 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5227 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5228 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5229 Matcher::vector_length(n->in(2)) >= 4); 5230 match(Set dst (MinReductionV src1 src2)); 5231 match(Set dst (MaxReductionV src1 src2)); 5232 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5233 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5234 ins_encode %{ 5235 assert(UseAVX > 0, "sanity"); 5236 5237 int opcode = this->ideal_Opcode(); 5238 int vlen = Matcher::vector_length(this, $src2); 5239 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5240 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5241 %} 5242 ins_pipe( pipe_slow ); 5243 %} 5244 5245 5246 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5247 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5248 rFlagsReg cr) %{ 5249 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5250 Matcher::vector_length(n->in(2)) == 2); 5251 match(Set dst (MinReductionV dst src)); 5252 match(Set dst (MaxReductionV dst src)); 5253 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5254 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5255 ins_encode %{ 5256 assert(UseAVX > 0, "sanity"); 5257 5258 int opcode = this->ideal_Opcode(); 5259 int vlen = Matcher::vector_length(this, $src); 5260 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5261 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5262 %} 5263 ins_pipe( pipe_slow ); 5264 %} 5265 5266 instruct minmax_reductionD_av(legRegD dst, legVec src, 5267 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5268 rFlagsReg cr) %{ 5269 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5270 Matcher::vector_length(n->in(2)) >= 4); 5271 match(Set dst (MinReductionV dst src)); 5272 match(Set dst (MaxReductionV dst src)); 5273 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5274 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5275 ins_encode %{ 5276 assert(UseAVX > 0, "sanity"); 5277 5278 int opcode = this->ideal_Opcode(); 5279 int vlen = Matcher::vector_length(this, $src); 5280 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5281 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5282 %} 5283 ins_pipe( pipe_slow ); 5284 %} 5285 5286 // ====================VECTOR ARITHMETIC======================================= 5287 5288 // --------------------------------- ADD -------------------------------------- 5289 5290 // Bytes vector add 5291 instruct vaddB(vec dst, vec src) %{ 5292 predicate(UseAVX == 0); 5293 match(Set dst (AddVB dst src)); 5294 format %{ "paddb $dst,$src\t! add packedB" %} 5295 ins_encode %{ 5296 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5297 %} 5298 ins_pipe( pipe_slow ); 5299 %} 5300 5301 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5302 predicate(UseAVX > 0); 5303 match(Set dst (AddVB src1 src2)); 5304 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5305 ins_encode %{ 5306 int vlen_enc = vector_length_encoding(this); 5307 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5308 %} 5309 ins_pipe( pipe_slow ); 5310 %} 5311 5312 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5313 predicate((UseAVX > 0) && 5314 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5315 match(Set dst (AddVB src (LoadVector mem))); 5316 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5317 ins_encode %{ 5318 int vlen_enc = vector_length_encoding(this); 5319 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5320 %} 5321 ins_pipe( pipe_slow ); 5322 %} 5323 5324 // Shorts/Chars vector add 5325 instruct vaddS(vec dst, vec src) %{ 5326 predicate(UseAVX == 0); 5327 match(Set dst (AddVS dst src)); 5328 format %{ "paddw $dst,$src\t! add packedS" %} 5329 ins_encode %{ 5330 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5331 %} 5332 ins_pipe( pipe_slow ); 5333 %} 5334 5335 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5336 predicate(UseAVX > 0); 5337 match(Set dst (AddVS src1 src2)); 5338 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5339 ins_encode %{ 5340 int vlen_enc = vector_length_encoding(this); 5341 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5342 %} 5343 ins_pipe( pipe_slow ); 5344 %} 5345 5346 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5347 predicate((UseAVX > 0) && 5348 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5349 match(Set dst (AddVS src (LoadVector mem))); 5350 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5351 ins_encode %{ 5352 int vlen_enc = vector_length_encoding(this); 5353 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5354 %} 5355 ins_pipe( pipe_slow ); 5356 %} 5357 5358 // Integers vector add 5359 instruct vaddI(vec dst, vec src) %{ 5360 predicate(UseAVX == 0); 5361 match(Set dst (AddVI dst src)); 5362 format %{ "paddd $dst,$src\t! add packedI" %} 5363 ins_encode %{ 5364 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5365 %} 5366 ins_pipe( pipe_slow ); 5367 %} 5368 5369 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5370 predicate(UseAVX > 0); 5371 match(Set dst (AddVI src1 src2)); 5372 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5373 ins_encode %{ 5374 int vlen_enc = vector_length_encoding(this); 5375 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5376 %} 5377 ins_pipe( pipe_slow ); 5378 %} 5379 5380 5381 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5382 predicate((UseAVX > 0) && 5383 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5384 match(Set dst (AddVI src (LoadVector mem))); 5385 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5386 ins_encode %{ 5387 int vlen_enc = vector_length_encoding(this); 5388 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5389 %} 5390 ins_pipe( pipe_slow ); 5391 %} 5392 5393 // Longs vector add 5394 instruct vaddL(vec dst, vec src) %{ 5395 predicate(UseAVX == 0); 5396 match(Set dst (AddVL dst src)); 5397 format %{ "paddq $dst,$src\t! add packedL" %} 5398 ins_encode %{ 5399 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5400 %} 5401 ins_pipe( pipe_slow ); 5402 %} 5403 5404 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5405 predicate(UseAVX > 0); 5406 match(Set dst (AddVL src1 src2)); 5407 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5408 ins_encode %{ 5409 int vlen_enc = vector_length_encoding(this); 5410 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5411 %} 5412 ins_pipe( pipe_slow ); 5413 %} 5414 5415 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5416 predicate((UseAVX > 0) && 5417 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5418 match(Set dst (AddVL src (LoadVector mem))); 5419 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5420 ins_encode %{ 5421 int vlen_enc = vector_length_encoding(this); 5422 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5423 %} 5424 ins_pipe( pipe_slow ); 5425 %} 5426 5427 // Floats vector add 5428 instruct vaddF(vec dst, vec src) %{ 5429 predicate(UseAVX == 0); 5430 match(Set dst (AddVF dst src)); 5431 format %{ "addps $dst,$src\t! add packedF" %} 5432 ins_encode %{ 5433 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5434 %} 5435 ins_pipe( pipe_slow ); 5436 %} 5437 5438 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5439 predicate(UseAVX > 0); 5440 match(Set dst (AddVF src1 src2)); 5441 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5442 ins_encode %{ 5443 int vlen_enc = vector_length_encoding(this); 5444 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5445 %} 5446 ins_pipe( pipe_slow ); 5447 %} 5448 5449 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5450 predicate((UseAVX > 0) && 5451 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5452 match(Set dst (AddVF src (LoadVector mem))); 5453 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5454 ins_encode %{ 5455 int vlen_enc = vector_length_encoding(this); 5456 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5457 %} 5458 ins_pipe( pipe_slow ); 5459 %} 5460 5461 // Doubles vector add 5462 instruct vaddD(vec dst, vec src) %{ 5463 predicate(UseAVX == 0); 5464 match(Set dst (AddVD dst src)); 5465 format %{ "addpd $dst,$src\t! add packedD" %} 5466 ins_encode %{ 5467 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5468 %} 5469 ins_pipe( pipe_slow ); 5470 %} 5471 5472 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5473 predicate(UseAVX > 0); 5474 match(Set dst (AddVD src1 src2)); 5475 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5476 ins_encode %{ 5477 int vlen_enc = vector_length_encoding(this); 5478 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5479 %} 5480 ins_pipe( pipe_slow ); 5481 %} 5482 5483 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5484 predicate((UseAVX > 0) && 5485 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5486 match(Set dst (AddVD src (LoadVector mem))); 5487 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5488 ins_encode %{ 5489 int vlen_enc = vector_length_encoding(this); 5490 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5491 %} 5492 ins_pipe( pipe_slow ); 5493 %} 5494 5495 // --------------------------------- SUB -------------------------------------- 5496 5497 // Bytes vector sub 5498 instruct vsubB(vec dst, vec src) %{ 5499 predicate(UseAVX == 0); 5500 match(Set dst (SubVB dst src)); 5501 format %{ "psubb $dst,$src\t! sub packedB" %} 5502 ins_encode %{ 5503 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5504 %} 5505 ins_pipe( pipe_slow ); 5506 %} 5507 5508 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5509 predicate(UseAVX > 0); 5510 match(Set dst (SubVB src1 src2)); 5511 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5512 ins_encode %{ 5513 int vlen_enc = vector_length_encoding(this); 5514 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5515 %} 5516 ins_pipe( pipe_slow ); 5517 %} 5518 5519 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5520 predicate((UseAVX > 0) && 5521 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5522 match(Set dst (SubVB src (LoadVector mem))); 5523 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5524 ins_encode %{ 5525 int vlen_enc = vector_length_encoding(this); 5526 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5527 %} 5528 ins_pipe( pipe_slow ); 5529 %} 5530 5531 // Shorts/Chars vector sub 5532 instruct vsubS(vec dst, vec src) %{ 5533 predicate(UseAVX == 0); 5534 match(Set dst (SubVS dst src)); 5535 format %{ "psubw $dst,$src\t! sub packedS" %} 5536 ins_encode %{ 5537 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5538 %} 5539 ins_pipe( pipe_slow ); 5540 %} 5541 5542 5543 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5544 predicate(UseAVX > 0); 5545 match(Set dst (SubVS src1 src2)); 5546 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5547 ins_encode %{ 5548 int vlen_enc = vector_length_encoding(this); 5549 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5550 %} 5551 ins_pipe( pipe_slow ); 5552 %} 5553 5554 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5555 predicate((UseAVX > 0) && 5556 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5557 match(Set dst (SubVS src (LoadVector mem))); 5558 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5559 ins_encode %{ 5560 int vlen_enc = vector_length_encoding(this); 5561 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5562 %} 5563 ins_pipe( pipe_slow ); 5564 %} 5565 5566 // Integers vector sub 5567 instruct vsubI(vec dst, vec src) %{ 5568 predicate(UseAVX == 0); 5569 match(Set dst (SubVI dst src)); 5570 format %{ "psubd $dst,$src\t! sub packedI" %} 5571 ins_encode %{ 5572 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5573 %} 5574 ins_pipe( pipe_slow ); 5575 %} 5576 5577 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5578 predicate(UseAVX > 0); 5579 match(Set dst (SubVI src1 src2)); 5580 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5581 ins_encode %{ 5582 int vlen_enc = vector_length_encoding(this); 5583 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5584 %} 5585 ins_pipe( pipe_slow ); 5586 %} 5587 5588 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5589 predicate((UseAVX > 0) && 5590 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5591 match(Set dst (SubVI src (LoadVector mem))); 5592 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5593 ins_encode %{ 5594 int vlen_enc = vector_length_encoding(this); 5595 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5596 %} 5597 ins_pipe( pipe_slow ); 5598 %} 5599 5600 // Longs vector sub 5601 instruct vsubL(vec dst, vec src) %{ 5602 predicate(UseAVX == 0); 5603 match(Set dst (SubVL dst src)); 5604 format %{ "psubq $dst,$src\t! sub packedL" %} 5605 ins_encode %{ 5606 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5607 %} 5608 ins_pipe( pipe_slow ); 5609 %} 5610 5611 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5612 predicate(UseAVX > 0); 5613 match(Set dst (SubVL src1 src2)); 5614 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5615 ins_encode %{ 5616 int vlen_enc = vector_length_encoding(this); 5617 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5618 %} 5619 ins_pipe( pipe_slow ); 5620 %} 5621 5622 5623 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5624 predicate((UseAVX > 0) && 5625 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5626 match(Set dst (SubVL src (LoadVector mem))); 5627 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5628 ins_encode %{ 5629 int vlen_enc = vector_length_encoding(this); 5630 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5631 %} 5632 ins_pipe( pipe_slow ); 5633 %} 5634 5635 // Floats vector sub 5636 instruct vsubF(vec dst, vec src) %{ 5637 predicate(UseAVX == 0); 5638 match(Set dst (SubVF dst src)); 5639 format %{ "subps $dst,$src\t! sub packedF" %} 5640 ins_encode %{ 5641 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5642 %} 5643 ins_pipe( pipe_slow ); 5644 %} 5645 5646 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5647 predicate(UseAVX > 0); 5648 match(Set dst (SubVF src1 src2)); 5649 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5650 ins_encode %{ 5651 int vlen_enc = vector_length_encoding(this); 5652 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5653 %} 5654 ins_pipe( pipe_slow ); 5655 %} 5656 5657 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5658 predicate((UseAVX > 0) && 5659 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5660 match(Set dst (SubVF src (LoadVector mem))); 5661 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5662 ins_encode %{ 5663 int vlen_enc = vector_length_encoding(this); 5664 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5665 %} 5666 ins_pipe( pipe_slow ); 5667 %} 5668 5669 // Doubles vector sub 5670 instruct vsubD(vec dst, vec src) %{ 5671 predicate(UseAVX == 0); 5672 match(Set dst (SubVD dst src)); 5673 format %{ "subpd $dst,$src\t! sub packedD" %} 5674 ins_encode %{ 5675 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5676 %} 5677 ins_pipe( pipe_slow ); 5678 %} 5679 5680 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5681 predicate(UseAVX > 0); 5682 match(Set dst (SubVD src1 src2)); 5683 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5684 ins_encode %{ 5685 int vlen_enc = vector_length_encoding(this); 5686 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5687 %} 5688 ins_pipe( pipe_slow ); 5689 %} 5690 5691 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 5692 predicate((UseAVX > 0) && 5693 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5694 match(Set dst (SubVD src (LoadVector mem))); 5695 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 5696 ins_encode %{ 5697 int vlen_enc = vector_length_encoding(this); 5698 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5699 %} 5700 ins_pipe( pipe_slow ); 5701 %} 5702 5703 // --------------------------------- MUL -------------------------------------- 5704 5705 // Byte vector mul 5706 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 5707 predicate(Matcher::vector_length_in_bytes(n) <= 8); 5708 match(Set dst (MulVB src1 src2)); 5709 effect(TEMP dst, TEMP xtmp); 5710 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5711 ins_encode %{ 5712 assert(UseSSE > 3, "required"); 5713 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 5714 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 5715 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5716 __ psllw($dst$$XMMRegister, 8); 5717 __ psrlw($dst$$XMMRegister, 8); 5718 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5719 %} 5720 ins_pipe( pipe_slow ); 5721 %} 5722 5723 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 5724 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 5725 match(Set dst (MulVB src1 src2)); 5726 effect(TEMP dst, TEMP xtmp); 5727 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5728 ins_encode %{ 5729 assert(UseSSE > 3, "required"); 5730 // Odd-index elements 5731 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 5732 __ psrlw($dst$$XMMRegister, 8); 5733 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 5734 __ psrlw($xtmp$$XMMRegister, 8); 5735 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5736 __ psllw($dst$$XMMRegister, 8); 5737 // Even-index elements 5738 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 5739 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 5740 __ psllw($xtmp$$XMMRegister, 8); 5741 __ psrlw($xtmp$$XMMRegister, 8); 5742 // Combine 5743 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 5744 %} 5745 ins_pipe( pipe_slow ); 5746 %} 5747 5748 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 5749 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 5750 match(Set dst (MulVB src1 src2)); 5751 effect(TEMP xtmp1, TEMP xtmp2); 5752 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 5753 ins_encode %{ 5754 int vlen_enc = vector_length_encoding(this); 5755 // Odd-index elements 5756 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 5757 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 5758 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5759 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 5760 // Even-index elements 5761 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5762 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5763 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5764 // Combine 5765 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5766 %} 5767 ins_pipe( pipe_slow ); 5768 %} 5769 5770 // Shorts/Chars vector mul 5771 instruct vmulS(vec dst, vec src) %{ 5772 predicate(UseAVX == 0); 5773 match(Set dst (MulVS dst src)); 5774 format %{ "pmullw $dst,$src\t! mul packedS" %} 5775 ins_encode %{ 5776 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 5777 %} 5778 ins_pipe( pipe_slow ); 5779 %} 5780 5781 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 5782 predicate(UseAVX > 0); 5783 match(Set dst (MulVS src1 src2)); 5784 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 5785 ins_encode %{ 5786 int vlen_enc = vector_length_encoding(this); 5787 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5788 %} 5789 ins_pipe( pipe_slow ); 5790 %} 5791 5792 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 5793 predicate((UseAVX > 0) && 5794 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5795 match(Set dst (MulVS src (LoadVector mem))); 5796 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 5797 ins_encode %{ 5798 int vlen_enc = vector_length_encoding(this); 5799 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5800 %} 5801 ins_pipe( pipe_slow ); 5802 %} 5803 5804 // Integers vector mul 5805 instruct vmulI(vec dst, vec src) %{ 5806 predicate(UseAVX == 0); 5807 match(Set dst (MulVI dst src)); 5808 format %{ "pmulld $dst,$src\t! mul packedI" %} 5809 ins_encode %{ 5810 assert(UseSSE > 3, "required"); 5811 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 5812 %} 5813 ins_pipe( pipe_slow ); 5814 %} 5815 5816 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 5817 predicate(UseAVX > 0); 5818 match(Set dst (MulVI src1 src2)); 5819 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 5820 ins_encode %{ 5821 int vlen_enc = vector_length_encoding(this); 5822 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5823 %} 5824 ins_pipe( pipe_slow ); 5825 %} 5826 5827 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 5828 predicate((UseAVX > 0) && 5829 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5830 match(Set dst (MulVI src (LoadVector mem))); 5831 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 5832 ins_encode %{ 5833 int vlen_enc = vector_length_encoding(this); 5834 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5835 %} 5836 ins_pipe( pipe_slow ); 5837 %} 5838 5839 // Longs vector mul 5840 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 5841 predicate((Matcher::vector_length_in_bytes(n) == 64 && 5842 VM_Version::supports_avx512dq()) || 5843 VM_Version::supports_avx512vldq()); 5844 match(Set dst (MulVL src1 src2)); 5845 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 5846 ins_encode %{ 5847 assert(UseAVX > 2, "required"); 5848 int vlen_enc = vector_length_encoding(this); 5849 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5850 %} 5851 ins_pipe( pipe_slow ); 5852 %} 5853 5854 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 5855 predicate((Matcher::vector_length_in_bytes(n) == 64 && 5856 VM_Version::supports_avx512dq()) || 5857 (Matcher::vector_length_in_bytes(n) > 8 && 5858 VM_Version::supports_avx512vldq())); 5859 match(Set dst (MulVL src (LoadVector mem))); 5860 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 5861 ins_encode %{ 5862 assert(UseAVX > 2, "required"); 5863 int vlen_enc = vector_length_encoding(this); 5864 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5865 %} 5866 ins_pipe( pipe_slow ); 5867 %} 5868 5869 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 5870 predicate(UseAVX == 0); 5871 match(Set dst (MulVL src1 src2)); 5872 effect(TEMP dst, TEMP xtmp); 5873 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5874 ins_encode %{ 5875 assert(VM_Version::supports_sse4_1(), "required"); 5876 // Get the lo-hi products, only the lower 32 bits is in concerns 5877 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 5878 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 5879 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 5880 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 5881 __ psllq($dst$$XMMRegister, 32); 5882 // Get the lo-lo products 5883 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 5884 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 5885 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 5886 %} 5887 ins_pipe( pipe_slow ); 5888 %} 5889 5890 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 5891 predicate(UseAVX > 0 && 5892 ((Matcher::vector_length_in_bytes(n) == 64 && 5893 !VM_Version::supports_avx512dq()) || 5894 (Matcher::vector_length_in_bytes(n) < 64 && 5895 !VM_Version::supports_avx512vldq()))); 5896 match(Set dst (MulVL src1 src2)); 5897 effect(TEMP xtmp1, TEMP xtmp2); 5898 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 5899 ins_encode %{ 5900 int vlen_enc = vector_length_encoding(this); 5901 // Get the lo-hi products, only the lower 32 bits is in concerns 5902 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 5903 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 5904 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 5905 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 5906 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 5907 // Get the lo-lo products 5908 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5909 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5910 %} 5911 ins_pipe( pipe_slow ); 5912 %} 5913 5914 // Floats vector mul 5915 instruct vmulF(vec dst, vec src) %{ 5916 predicate(UseAVX == 0); 5917 match(Set dst (MulVF dst src)); 5918 format %{ "mulps $dst,$src\t! mul packedF" %} 5919 ins_encode %{ 5920 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 5921 %} 5922 ins_pipe( pipe_slow ); 5923 %} 5924 5925 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 5926 predicate(UseAVX > 0); 5927 match(Set dst (MulVF src1 src2)); 5928 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 5929 ins_encode %{ 5930 int vlen_enc = vector_length_encoding(this); 5931 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5932 %} 5933 ins_pipe( pipe_slow ); 5934 %} 5935 5936 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 5937 predicate((UseAVX > 0) && 5938 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5939 match(Set dst (MulVF src (LoadVector mem))); 5940 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 5941 ins_encode %{ 5942 int vlen_enc = vector_length_encoding(this); 5943 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5944 %} 5945 ins_pipe( pipe_slow ); 5946 %} 5947 5948 // Doubles vector mul 5949 instruct vmulD(vec dst, vec src) %{ 5950 predicate(UseAVX == 0); 5951 match(Set dst (MulVD dst src)); 5952 format %{ "mulpd $dst,$src\t! mul packedD" %} 5953 ins_encode %{ 5954 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 5955 %} 5956 ins_pipe( pipe_slow ); 5957 %} 5958 5959 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 5960 predicate(UseAVX > 0); 5961 match(Set dst (MulVD src1 src2)); 5962 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 5963 ins_encode %{ 5964 int vlen_enc = vector_length_encoding(this); 5965 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5966 %} 5967 ins_pipe( pipe_slow ); 5968 %} 5969 5970 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 5971 predicate((UseAVX > 0) && 5972 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5973 match(Set dst (MulVD src (LoadVector mem))); 5974 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 5975 ins_encode %{ 5976 int vlen_enc = vector_length_encoding(this); 5977 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5978 %} 5979 ins_pipe( pipe_slow ); 5980 %} 5981 5982 // --------------------------------- DIV -------------------------------------- 5983 5984 // Floats vector div 5985 instruct vdivF(vec dst, vec src) %{ 5986 predicate(UseAVX == 0); 5987 match(Set dst (DivVF dst src)); 5988 format %{ "divps $dst,$src\t! div packedF" %} 5989 ins_encode %{ 5990 __ divps($dst$$XMMRegister, $src$$XMMRegister); 5991 %} 5992 ins_pipe( pipe_slow ); 5993 %} 5994 5995 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 5996 predicate(UseAVX > 0); 5997 match(Set dst (DivVF src1 src2)); 5998 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 5999 ins_encode %{ 6000 int vlen_enc = vector_length_encoding(this); 6001 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6002 %} 6003 ins_pipe( pipe_slow ); 6004 %} 6005 6006 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6007 predicate((UseAVX > 0) && 6008 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6009 match(Set dst (DivVF src (LoadVector mem))); 6010 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6011 ins_encode %{ 6012 int vlen_enc = vector_length_encoding(this); 6013 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6014 %} 6015 ins_pipe( pipe_slow ); 6016 %} 6017 6018 // Doubles vector div 6019 instruct vdivD(vec dst, vec src) %{ 6020 predicate(UseAVX == 0); 6021 match(Set dst (DivVD dst src)); 6022 format %{ "divpd $dst,$src\t! div packedD" %} 6023 ins_encode %{ 6024 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6025 %} 6026 ins_pipe( pipe_slow ); 6027 %} 6028 6029 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6030 predicate(UseAVX > 0); 6031 match(Set dst (DivVD src1 src2)); 6032 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6033 ins_encode %{ 6034 int vlen_enc = vector_length_encoding(this); 6035 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6036 %} 6037 ins_pipe( pipe_slow ); 6038 %} 6039 6040 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6041 predicate((UseAVX > 0) && 6042 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6043 match(Set dst (DivVD src (LoadVector mem))); 6044 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6045 ins_encode %{ 6046 int vlen_enc = vector_length_encoding(this); 6047 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6048 %} 6049 ins_pipe( pipe_slow ); 6050 %} 6051 6052 // ------------------------------ MinMax --------------------------------------- 6053 6054 // Byte, Short, Int vector Min/Max 6055 instruct minmax_reg_sse(vec dst, vec src) %{ 6056 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6057 UseAVX == 0); 6058 match(Set dst (MinV dst src)); 6059 match(Set dst (MaxV dst src)); 6060 format %{ "vector_minmax $dst,$src\t! " %} 6061 ins_encode %{ 6062 assert(UseSSE >= 4, "required"); 6063 6064 int opcode = this->ideal_Opcode(); 6065 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6066 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6067 %} 6068 ins_pipe( pipe_slow ); 6069 %} 6070 6071 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6072 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6073 UseAVX > 0); 6074 match(Set dst (MinV src1 src2)); 6075 match(Set dst (MaxV src1 src2)); 6076 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6077 ins_encode %{ 6078 int opcode = this->ideal_Opcode(); 6079 int vlen_enc = vector_length_encoding(this); 6080 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6081 6082 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6083 %} 6084 ins_pipe( pipe_slow ); 6085 %} 6086 6087 // Long vector Min/Max 6088 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6089 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6090 UseAVX == 0); 6091 match(Set dst (MinV dst src)); 6092 match(Set dst (MaxV src dst)); 6093 effect(TEMP dst, TEMP tmp); 6094 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6095 ins_encode %{ 6096 assert(UseSSE >= 4, "required"); 6097 6098 int opcode = this->ideal_Opcode(); 6099 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6100 assert(elem_bt == T_LONG, "sanity"); 6101 6102 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6103 %} 6104 ins_pipe( pipe_slow ); 6105 %} 6106 6107 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6108 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6109 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6110 match(Set dst (MinV src1 src2)); 6111 match(Set dst (MaxV src1 src2)); 6112 effect(TEMP dst); 6113 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6114 ins_encode %{ 6115 int vlen_enc = vector_length_encoding(this); 6116 int opcode = this->ideal_Opcode(); 6117 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6118 assert(elem_bt == T_LONG, "sanity"); 6119 6120 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6121 %} 6122 ins_pipe( pipe_slow ); 6123 %} 6124 6125 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6126 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6127 Matcher::vector_element_basic_type(n) == T_LONG); 6128 match(Set dst (MinV src1 src2)); 6129 match(Set dst (MaxV src1 src2)); 6130 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6131 ins_encode %{ 6132 assert(UseAVX > 2, "required"); 6133 6134 int vlen_enc = vector_length_encoding(this); 6135 int opcode = this->ideal_Opcode(); 6136 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6137 assert(elem_bt == T_LONG, "sanity"); 6138 6139 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6140 %} 6141 ins_pipe( pipe_slow ); 6142 %} 6143 6144 // Float/Double vector Min/Max 6145 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6146 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6147 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6148 UseAVX > 0); 6149 match(Set dst (MinV a b)); 6150 match(Set dst (MaxV a b)); 6151 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6152 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6153 ins_encode %{ 6154 assert(UseAVX > 0, "required"); 6155 6156 int opcode = this->ideal_Opcode(); 6157 int vlen_enc = vector_length_encoding(this); 6158 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6159 6160 __ vminmax_fp(opcode, elem_bt, 6161 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6162 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6163 %} 6164 ins_pipe( pipe_slow ); 6165 %} 6166 6167 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6168 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6169 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6170 match(Set dst (MinV a b)); 6171 match(Set dst (MaxV a b)); 6172 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6173 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6174 ins_encode %{ 6175 assert(UseAVX > 2, "required"); 6176 6177 int opcode = this->ideal_Opcode(); 6178 int vlen_enc = vector_length_encoding(this); 6179 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6180 6181 __ evminmax_fp(opcode, elem_bt, 6182 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6183 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6184 %} 6185 ins_pipe( pipe_slow ); 6186 %} 6187 6188 // --------------------------------- Signum/CopySign --------------------------- 6189 6190 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6191 match(Set dst (SignumF dst (Binary zero one))); 6192 effect(KILL cr); 6193 format %{ "signumF $dst, $dst" %} 6194 ins_encode %{ 6195 int opcode = this->ideal_Opcode(); 6196 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6197 %} 6198 ins_pipe( pipe_slow ); 6199 %} 6200 6201 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6202 match(Set dst (SignumD dst (Binary zero one))); 6203 effect(KILL cr); 6204 format %{ "signumD $dst, $dst" %} 6205 ins_encode %{ 6206 int opcode = this->ideal_Opcode(); 6207 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6208 %} 6209 ins_pipe( pipe_slow ); 6210 %} 6211 6212 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6213 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6214 match(Set dst (SignumVF src (Binary zero one))); 6215 match(Set dst (SignumVD src (Binary zero one))); 6216 effect(TEMP dst, TEMP xtmp1); 6217 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6218 ins_encode %{ 6219 int opcode = this->ideal_Opcode(); 6220 int vec_enc = vector_length_encoding(this); 6221 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6222 $xtmp1$$XMMRegister, vec_enc); 6223 %} 6224 ins_pipe( pipe_slow ); 6225 %} 6226 6227 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6228 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6229 match(Set dst (SignumVF src (Binary zero one))); 6230 match(Set dst (SignumVD src (Binary zero one))); 6231 effect(TEMP dst, TEMP ktmp1); 6232 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6233 ins_encode %{ 6234 int opcode = this->ideal_Opcode(); 6235 int vec_enc = vector_length_encoding(this); 6236 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6237 $ktmp1$$KRegister, vec_enc); 6238 %} 6239 ins_pipe( pipe_slow ); 6240 %} 6241 6242 // --------------------------------------- 6243 // For copySign use 0xE4 as writemask for vpternlog 6244 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6245 // C (xmm2) is set to 0x7FFFFFFF 6246 // Wherever xmm2 is 0, we want to pick from B (sign) 6247 // Wherever xmm2 is 1, we want to pick from A (src) 6248 // 6249 // A B C Result 6250 // 0 0 0 0 6251 // 0 0 1 0 6252 // 0 1 0 1 6253 // 0 1 1 0 6254 // 1 0 0 0 6255 // 1 0 1 1 6256 // 1 1 0 1 6257 // 1 1 1 1 6258 // 6259 // Result going from high bit to low bit is 0x11100100 = 0xe4 6260 // --------------------------------------- 6261 6262 #ifdef _LP64 6263 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6264 match(Set dst (CopySignF dst src)); 6265 effect(TEMP tmp1, TEMP tmp2); 6266 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6267 ins_encode %{ 6268 __ movl($tmp2$$Register, 0x7FFFFFFF); 6269 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6270 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6271 %} 6272 ins_pipe( pipe_slow ); 6273 %} 6274 6275 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6276 match(Set dst (CopySignD dst (Binary src zero))); 6277 ins_cost(100); 6278 effect(TEMP tmp1, TEMP tmp2); 6279 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6280 ins_encode %{ 6281 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6282 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6283 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6284 %} 6285 ins_pipe( pipe_slow ); 6286 %} 6287 6288 #endif // _LP64 6289 6290 //----------------------------- CompressBits/ExpandBits ------------------------ 6291 6292 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6293 predicate(n->bottom_type()->isa_int()); 6294 match(Set dst (CompressBits src mask)); 6295 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6296 ins_encode %{ 6297 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6298 %} 6299 ins_pipe( pipe_slow ); 6300 %} 6301 6302 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6303 predicate(n->bottom_type()->isa_int()); 6304 match(Set dst (ExpandBits src mask)); 6305 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6306 ins_encode %{ 6307 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6308 %} 6309 ins_pipe( pipe_slow ); 6310 %} 6311 6312 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6313 predicate(n->bottom_type()->isa_int()); 6314 match(Set dst (CompressBits src (LoadI mask))); 6315 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6316 ins_encode %{ 6317 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6318 %} 6319 ins_pipe( pipe_slow ); 6320 %} 6321 6322 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6323 predicate(n->bottom_type()->isa_int()); 6324 match(Set dst (ExpandBits src (LoadI mask))); 6325 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6326 ins_encode %{ 6327 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6328 %} 6329 ins_pipe( pipe_slow ); 6330 %} 6331 6332 // --------------------------------- Sqrt -------------------------------------- 6333 6334 instruct vsqrtF_reg(vec dst, vec src) %{ 6335 match(Set dst (SqrtVF src)); 6336 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6337 ins_encode %{ 6338 assert(UseAVX > 0, "required"); 6339 int vlen_enc = vector_length_encoding(this); 6340 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6341 %} 6342 ins_pipe( pipe_slow ); 6343 %} 6344 6345 instruct vsqrtF_mem(vec dst, memory mem) %{ 6346 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6347 match(Set dst (SqrtVF (LoadVector mem))); 6348 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6349 ins_encode %{ 6350 assert(UseAVX > 0, "required"); 6351 int vlen_enc = vector_length_encoding(this); 6352 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6353 %} 6354 ins_pipe( pipe_slow ); 6355 %} 6356 6357 // Floating point vector sqrt 6358 instruct vsqrtD_reg(vec dst, vec src) %{ 6359 match(Set dst (SqrtVD src)); 6360 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6361 ins_encode %{ 6362 assert(UseAVX > 0, "required"); 6363 int vlen_enc = vector_length_encoding(this); 6364 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6365 %} 6366 ins_pipe( pipe_slow ); 6367 %} 6368 6369 instruct vsqrtD_mem(vec dst, memory mem) %{ 6370 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6371 match(Set dst (SqrtVD (LoadVector mem))); 6372 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6373 ins_encode %{ 6374 assert(UseAVX > 0, "required"); 6375 int vlen_enc = vector_length_encoding(this); 6376 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6377 %} 6378 ins_pipe( pipe_slow ); 6379 %} 6380 6381 // ------------------------------ Shift --------------------------------------- 6382 6383 // Left and right shift count vectors are the same on x86 6384 // (only lowest bits of xmm reg are used for count). 6385 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6386 match(Set dst (LShiftCntV cnt)); 6387 match(Set dst (RShiftCntV cnt)); 6388 format %{ "movdl $dst,$cnt\t! load shift count" %} 6389 ins_encode %{ 6390 __ movdl($dst$$XMMRegister, $cnt$$Register); 6391 %} 6392 ins_pipe( pipe_slow ); 6393 %} 6394 6395 // Byte vector shift 6396 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6397 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6398 match(Set dst ( LShiftVB src shift)); 6399 match(Set dst ( RShiftVB src shift)); 6400 match(Set dst (URShiftVB src shift)); 6401 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6402 format %{"vector_byte_shift $dst,$src,$shift" %} 6403 ins_encode %{ 6404 assert(UseSSE > 3, "required"); 6405 int opcode = this->ideal_Opcode(); 6406 bool sign = (opcode != Op_URShiftVB); 6407 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6408 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6409 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6410 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6411 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6412 %} 6413 ins_pipe( pipe_slow ); 6414 %} 6415 6416 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6417 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6418 UseAVX <= 1); 6419 match(Set dst ( LShiftVB src shift)); 6420 match(Set dst ( RShiftVB src shift)); 6421 match(Set dst (URShiftVB src shift)); 6422 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6423 format %{"vector_byte_shift $dst,$src,$shift" %} 6424 ins_encode %{ 6425 assert(UseSSE > 3, "required"); 6426 int opcode = this->ideal_Opcode(); 6427 bool sign = (opcode != Op_URShiftVB); 6428 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6429 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6430 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6431 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6432 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6433 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6434 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6435 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6436 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6437 %} 6438 ins_pipe( pipe_slow ); 6439 %} 6440 6441 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6442 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6443 UseAVX > 1); 6444 match(Set dst ( LShiftVB src shift)); 6445 match(Set dst ( RShiftVB src shift)); 6446 match(Set dst (URShiftVB src shift)); 6447 effect(TEMP dst, TEMP tmp); 6448 format %{"vector_byte_shift $dst,$src,$shift" %} 6449 ins_encode %{ 6450 int opcode = this->ideal_Opcode(); 6451 bool sign = (opcode != Op_URShiftVB); 6452 int vlen_enc = Assembler::AVX_256bit; 6453 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6454 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6455 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6456 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6457 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6458 %} 6459 ins_pipe( pipe_slow ); 6460 %} 6461 6462 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6463 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6464 match(Set dst ( LShiftVB src shift)); 6465 match(Set dst ( RShiftVB src shift)); 6466 match(Set dst (URShiftVB src shift)); 6467 effect(TEMP dst, TEMP tmp); 6468 format %{"vector_byte_shift $dst,$src,$shift" %} 6469 ins_encode %{ 6470 assert(UseAVX > 1, "required"); 6471 int opcode = this->ideal_Opcode(); 6472 bool sign = (opcode != Op_URShiftVB); 6473 int vlen_enc = Assembler::AVX_256bit; 6474 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6475 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6476 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6477 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6478 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6479 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6480 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6481 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6482 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6483 %} 6484 ins_pipe( pipe_slow ); 6485 %} 6486 6487 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6488 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6489 match(Set dst ( LShiftVB src shift)); 6490 match(Set dst (RShiftVB src shift)); 6491 match(Set dst (URShiftVB src shift)); 6492 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6493 format %{"vector_byte_shift $dst,$src,$shift" %} 6494 ins_encode %{ 6495 assert(UseAVX > 2, "required"); 6496 int opcode = this->ideal_Opcode(); 6497 bool sign = (opcode != Op_URShiftVB); 6498 int vlen_enc = Assembler::AVX_512bit; 6499 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6500 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6501 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6502 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6503 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6504 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6505 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6506 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6507 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6508 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6509 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 6510 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6511 %} 6512 ins_pipe( pipe_slow ); 6513 %} 6514 6515 // Shorts vector logical right shift produces incorrect Java result 6516 // for negative data because java code convert short value into int with 6517 // sign extension before a shift. But char vectors are fine since chars are 6518 // unsigned values. 6519 // Shorts/Chars vector left shift 6520 instruct vshiftS(vec dst, vec src, vec shift) %{ 6521 predicate(!n->as_ShiftV()->is_var_shift()); 6522 match(Set dst ( LShiftVS src shift)); 6523 match(Set dst ( RShiftVS src shift)); 6524 match(Set dst (URShiftVS src shift)); 6525 effect(TEMP dst, USE src, USE shift); 6526 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6527 ins_encode %{ 6528 int opcode = this->ideal_Opcode(); 6529 if (UseAVX > 0) { 6530 int vlen_enc = vector_length_encoding(this); 6531 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6532 } else { 6533 int vlen = Matcher::vector_length(this); 6534 if (vlen == 2) { 6535 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6536 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6537 } else if (vlen == 4) { 6538 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6539 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6540 } else { 6541 assert (vlen == 8, "sanity"); 6542 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6543 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6544 } 6545 } 6546 %} 6547 ins_pipe( pipe_slow ); 6548 %} 6549 6550 // Integers vector left shift 6551 instruct vshiftI(vec dst, vec src, vec shift) %{ 6552 predicate(!n->as_ShiftV()->is_var_shift()); 6553 match(Set dst ( LShiftVI src shift)); 6554 match(Set dst ( RShiftVI src shift)); 6555 match(Set dst (URShiftVI src shift)); 6556 effect(TEMP dst, USE src, USE shift); 6557 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6558 ins_encode %{ 6559 int opcode = this->ideal_Opcode(); 6560 if (UseAVX > 0) { 6561 int vlen_enc = vector_length_encoding(this); 6562 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6563 } else { 6564 int vlen = Matcher::vector_length(this); 6565 if (vlen == 2) { 6566 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6567 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6568 } else { 6569 assert(vlen == 4, "sanity"); 6570 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6571 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6572 } 6573 } 6574 %} 6575 ins_pipe( pipe_slow ); 6576 %} 6577 6578 // Integers vector left constant shift 6579 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6580 match(Set dst (LShiftVI src (LShiftCntV shift))); 6581 match(Set dst (RShiftVI src (RShiftCntV shift))); 6582 match(Set dst (URShiftVI src (RShiftCntV shift))); 6583 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6584 ins_encode %{ 6585 int opcode = this->ideal_Opcode(); 6586 if (UseAVX > 0) { 6587 int vector_len = vector_length_encoding(this); 6588 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6589 } else { 6590 int vlen = Matcher::vector_length(this); 6591 if (vlen == 2) { 6592 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6593 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6594 } else { 6595 assert(vlen == 4, "sanity"); 6596 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6597 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6598 } 6599 } 6600 %} 6601 ins_pipe( pipe_slow ); 6602 %} 6603 6604 // Longs vector shift 6605 instruct vshiftL(vec dst, vec src, vec shift) %{ 6606 predicate(!n->as_ShiftV()->is_var_shift()); 6607 match(Set dst ( LShiftVL src shift)); 6608 match(Set dst (URShiftVL src shift)); 6609 effect(TEMP dst, USE src, USE shift); 6610 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6611 ins_encode %{ 6612 int opcode = this->ideal_Opcode(); 6613 if (UseAVX > 0) { 6614 int vlen_enc = vector_length_encoding(this); 6615 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6616 } else { 6617 assert(Matcher::vector_length(this) == 2, ""); 6618 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6619 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6620 } 6621 %} 6622 ins_pipe( pipe_slow ); 6623 %} 6624 6625 // Longs vector constant shift 6626 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 6627 match(Set dst (LShiftVL src (LShiftCntV shift))); 6628 match(Set dst (URShiftVL src (RShiftCntV shift))); 6629 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 6630 ins_encode %{ 6631 int opcode = this->ideal_Opcode(); 6632 if (UseAVX > 0) { 6633 int vector_len = vector_length_encoding(this); 6634 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6635 } else { 6636 assert(Matcher::vector_length(this) == 2, ""); 6637 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6638 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6639 } 6640 %} 6641 ins_pipe( pipe_slow ); 6642 %} 6643 6644 // -------------------ArithmeticRightShift ----------------------------------- 6645 // Long vector arithmetic right shift 6646 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 6647 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 6648 match(Set dst (RShiftVL src shift)); 6649 effect(TEMP dst, TEMP tmp); 6650 format %{ "vshiftq $dst,$src,$shift" %} 6651 ins_encode %{ 6652 uint vlen = Matcher::vector_length(this); 6653 if (vlen == 2) { 6654 assert(UseSSE >= 2, "required"); 6655 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6656 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6657 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6658 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6659 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6660 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 6661 } else { 6662 assert(vlen == 4, "sanity"); 6663 assert(UseAVX > 1, "required"); 6664 int vlen_enc = Assembler::AVX_256bit; 6665 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6666 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6667 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6668 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6669 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6670 } 6671 %} 6672 ins_pipe( pipe_slow ); 6673 %} 6674 6675 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 6676 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 6677 match(Set dst (RShiftVL src shift)); 6678 format %{ "vshiftq $dst,$src,$shift" %} 6679 ins_encode %{ 6680 int vlen_enc = vector_length_encoding(this); 6681 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6682 %} 6683 ins_pipe( pipe_slow ); 6684 %} 6685 6686 // ------------------- Variable Shift ----------------------------- 6687 // Byte variable shift 6688 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 6689 predicate(Matcher::vector_length(n) <= 8 && 6690 n->as_ShiftV()->is_var_shift() && 6691 !VM_Version::supports_avx512bw()); 6692 match(Set dst ( LShiftVB src shift)); 6693 match(Set dst ( RShiftVB src shift)); 6694 match(Set dst (URShiftVB src shift)); 6695 effect(TEMP dst, TEMP vtmp); 6696 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 6697 ins_encode %{ 6698 assert(UseAVX >= 2, "required"); 6699 6700 int opcode = this->ideal_Opcode(); 6701 int vlen_enc = Assembler::AVX_128bit; 6702 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 6703 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 6704 %} 6705 ins_pipe( pipe_slow ); 6706 %} 6707 6708 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 6709 predicate(Matcher::vector_length(n) == 16 && 6710 n->as_ShiftV()->is_var_shift() && 6711 !VM_Version::supports_avx512bw()); 6712 match(Set dst ( LShiftVB src shift)); 6713 match(Set dst ( RShiftVB src shift)); 6714 match(Set dst (URShiftVB src shift)); 6715 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 6716 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 6717 ins_encode %{ 6718 assert(UseAVX >= 2, "required"); 6719 6720 int opcode = this->ideal_Opcode(); 6721 int vlen_enc = Assembler::AVX_128bit; 6722 // Shift lower half and get word result in dst 6723 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 6724 6725 // Shift upper half and get word result in vtmp1 6726 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6727 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6728 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6729 6730 // Merge and down convert the two word results to byte in dst 6731 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6732 %} 6733 ins_pipe( pipe_slow ); 6734 %} 6735 6736 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 6737 predicate(Matcher::vector_length(n) == 32 && 6738 n->as_ShiftV()->is_var_shift() && 6739 !VM_Version::supports_avx512bw()); 6740 match(Set dst ( LShiftVB src shift)); 6741 match(Set dst ( RShiftVB src shift)); 6742 match(Set dst (URShiftVB src shift)); 6743 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 6744 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 6745 ins_encode %{ 6746 assert(UseAVX >= 2, "required"); 6747 6748 int opcode = this->ideal_Opcode(); 6749 int vlen_enc = Assembler::AVX_128bit; 6750 // Process lower 128 bits and get result in dst 6751 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 6752 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6753 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6754 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6755 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6756 6757 // Process higher 128 bits and get result in vtmp3 6758 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6759 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6760 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 6761 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 6762 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 6763 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6764 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 6765 6766 // Merge the two results in dst 6767 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6768 %} 6769 ins_pipe( pipe_slow ); 6770 %} 6771 6772 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 6773 predicate(Matcher::vector_length(n) <= 32 && 6774 n->as_ShiftV()->is_var_shift() && 6775 VM_Version::supports_avx512bw()); 6776 match(Set dst ( LShiftVB src shift)); 6777 match(Set dst ( RShiftVB src shift)); 6778 match(Set dst (URShiftVB src shift)); 6779 effect(TEMP dst, TEMP vtmp); 6780 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 6781 ins_encode %{ 6782 assert(UseAVX > 2, "required"); 6783 6784 int opcode = this->ideal_Opcode(); 6785 int vlen_enc = vector_length_encoding(this); 6786 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 6787 %} 6788 ins_pipe( pipe_slow ); 6789 %} 6790 6791 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 6792 predicate(Matcher::vector_length(n) == 64 && 6793 n->as_ShiftV()->is_var_shift() && 6794 VM_Version::supports_avx512bw()); 6795 match(Set dst ( LShiftVB src shift)); 6796 match(Set dst ( RShiftVB src shift)); 6797 match(Set dst (URShiftVB src shift)); 6798 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 6799 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 6800 ins_encode %{ 6801 assert(UseAVX > 2, "required"); 6802 6803 int opcode = this->ideal_Opcode(); 6804 int vlen_enc = Assembler::AVX_256bit; 6805 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 6806 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6807 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6808 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6809 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6810 %} 6811 ins_pipe( pipe_slow ); 6812 %} 6813 6814 // Short variable shift 6815 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 6816 predicate(Matcher::vector_length(n) <= 8 && 6817 n->as_ShiftV()->is_var_shift() && 6818 !VM_Version::supports_avx512bw()); 6819 match(Set dst ( LShiftVS src shift)); 6820 match(Set dst ( RShiftVS src shift)); 6821 match(Set dst (URShiftVS src shift)); 6822 effect(TEMP dst, TEMP vtmp); 6823 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6824 ins_encode %{ 6825 assert(UseAVX >= 2, "required"); 6826 6827 int opcode = this->ideal_Opcode(); 6828 bool sign = (opcode != Op_URShiftVS); 6829 int vlen_enc = Assembler::AVX_256bit; 6830 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 6831 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 6832 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 6833 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 6834 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 6835 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 6836 %} 6837 ins_pipe( pipe_slow ); 6838 %} 6839 6840 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 6841 predicate(Matcher::vector_length(n) == 16 && 6842 n->as_ShiftV()->is_var_shift() && 6843 !VM_Version::supports_avx512bw()); 6844 match(Set dst ( LShiftVS src shift)); 6845 match(Set dst ( RShiftVS src shift)); 6846 match(Set dst (URShiftVS src shift)); 6847 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 6848 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6849 ins_encode %{ 6850 assert(UseAVX >= 2, "required"); 6851 6852 int opcode = this->ideal_Opcode(); 6853 bool sign = (opcode != Op_URShiftVS); 6854 int vlen_enc = Assembler::AVX_256bit; 6855 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 6856 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6857 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6858 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6859 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 6860 6861 // Shift upper half, with result in dst using vtmp1 as TEMP 6862 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 6863 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 6864 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6865 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6866 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6867 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 6868 6869 // Merge lower and upper half result into dst 6870 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6871 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6872 %} 6873 ins_pipe( pipe_slow ); 6874 %} 6875 6876 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 6877 predicate(n->as_ShiftV()->is_var_shift() && 6878 VM_Version::supports_avx512bw()); 6879 match(Set dst ( LShiftVS src shift)); 6880 match(Set dst ( RShiftVS src shift)); 6881 match(Set dst (URShiftVS src shift)); 6882 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 6883 ins_encode %{ 6884 assert(UseAVX > 2, "required"); 6885 6886 int opcode = this->ideal_Opcode(); 6887 int vlen_enc = vector_length_encoding(this); 6888 if (!VM_Version::supports_avx512vl()) { 6889 vlen_enc = Assembler::AVX_512bit; 6890 } 6891 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6892 %} 6893 ins_pipe( pipe_slow ); 6894 %} 6895 6896 //Integer variable shift 6897 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 6898 predicate(n->as_ShiftV()->is_var_shift()); 6899 match(Set dst ( LShiftVI src shift)); 6900 match(Set dst ( RShiftVI src shift)); 6901 match(Set dst (URShiftVI src shift)); 6902 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 6903 ins_encode %{ 6904 assert(UseAVX >= 2, "required"); 6905 6906 int opcode = this->ideal_Opcode(); 6907 int vlen_enc = vector_length_encoding(this); 6908 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6909 %} 6910 ins_pipe( pipe_slow ); 6911 %} 6912 6913 //Long variable shift 6914 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 6915 predicate(n->as_ShiftV()->is_var_shift()); 6916 match(Set dst ( LShiftVL src shift)); 6917 match(Set dst (URShiftVL src shift)); 6918 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 6919 ins_encode %{ 6920 assert(UseAVX >= 2, "required"); 6921 6922 int opcode = this->ideal_Opcode(); 6923 int vlen_enc = vector_length_encoding(this); 6924 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6925 %} 6926 ins_pipe( pipe_slow ); 6927 %} 6928 6929 //Long variable right shift arithmetic 6930 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 6931 predicate(Matcher::vector_length(n) <= 4 && 6932 n->as_ShiftV()->is_var_shift() && 6933 UseAVX == 2); 6934 match(Set dst (RShiftVL src shift)); 6935 effect(TEMP dst, TEMP vtmp); 6936 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 6937 ins_encode %{ 6938 int opcode = this->ideal_Opcode(); 6939 int vlen_enc = vector_length_encoding(this); 6940 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 6941 $vtmp$$XMMRegister); 6942 %} 6943 ins_pipe( pipe_slow ); 6944 %} 6945 6946 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 6947 predicate(n->as_ShiftV()->is_var_shift() && 6948 UseAVX > 2); 6949 match(Set dst (RShiftVL src shift)); 6950 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 6951 ins_encode %{ 6952 int opcode = this->ideal_Opcode(); 6953 int vlen_enc = vector_length_encoding(this); 6954 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6955 %} 6956 ins_pipe( pipe_slow ); 6957 %} 6958 6959 // --------------------------------- AND -------------------------------------- 6960 6961 instruct vand(vec dst, vec src) %{ 6962 predicate(UseAVX == 0); 6963 match(Set dst (AndV dst src)); 6964 format %{ "pand $dst,$src\t! and vectors" %} 6965 ins_encode %{ 6966 __ pand($dst$$XMMRegister, $src$$XMMRegister); 6967 %} 6968 ins_pipe( pipe_slow ); 6969 %} 6970 6971 instruct vand_reg(vec dst, vec src1, vec src2) %{ 6972 predicate(UseAVX > 0); 6973 match(Set dst (AndV src1 src2)); 6974 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 6975 ins_encode %{ 6976 int vlen_enc = vector_length_encoding(this); 6977 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6978 %} 6979 ins_pipe( pipe_slow ); 6980 %} 6981 6982 instruct vand_mem(vec dst, vec src, memory mem) %{ 6983 predicate((UseAVX > 0) && 6984 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6985 match(Set dst (AndV src (LoadVector mem))); 6986 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 6987 ins_encode %{ 6988 int vlen_enc = vector_length_encoding(this); 6989 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6990 %} 6991 ins_pipe( pipe_slow ); 6992 %} 6993 6994 // --------------------------------- OR --------------------------------------- 6995 6996 instruct vor(vec dst, vec src) %{ 6997 predicate(UseAVX == 0); 6998 match(Set dst (OrV dst src)); 6999 format %{ "por $dst,$src\t! or vectors" %} 7000 ins_encode %{ 7001 __ por($dst$$XMMRegister, $src$$XMMRegister); 7002 %} 7003 ins_pipe( pipe_slow ); 7004 %} 7005 7006 instruct vor_reg(vec dst, vec src1, vec src2) %{ 7007 predicate(UseAVX > 0); 7008 match(Set dst (OrV src1 src2)); 7009 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7010 ins_encode %{ 7011 int vlen_enc = vector_length_encoding(this); 7012 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7013 %} 7014 ins_pipe( pipe_slow ); 7015 %} 7016 7017 instruct vor_mem(vec dst, vec src, memory mem) %{ 7018 predicate((UseAVX > 0) && 7019 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7020 match(Set dst (OrV src (LoadVector mem))); 7021 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7022 ins_encode %{ 7023 int vlen_enc = vector_length_encoding(this); 7024 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7025 %} 7026 ins_pipe( pipe_slow ); 7027 %} 7028 7029 // --------------------------------- XOR -------------------------------------- 7030 7031 instruct vxor(vec dst, vec src) %{ 7032 predicate(UseAVX == 0); 7033 match(Set dst (XorV dst src)); 7034 format %{ "pxor $dst,$src\t! xor vectors" %} 7035 ins_encode %{ 7036 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7037 %} 7038 ins_pipe( pipe_slow ); 7039 %} 7040 7041 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7042 predicate(UseAVX > 0); 7043 match(Set dst (XorV src1 src2)); 7044 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7045 ins_encode %{ 7046 int vlen_enc = vector_length_encoding(this); 7047 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7048 %} 7049 ins_pipe( pipe_slow ); 7050 %} 7051 7052 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7053 predicate((UseAVX > 0) && 7054 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7055 match(Set dst (XorV src (LoadVector mem))); 7056 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7057 ins_encode %{ 7058 int vlen_enc = vector_length_encoding(this); 7059 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7060 %} 7061 ins_pipe( pipe_slow ); 7062 %} 7063 7064 // --------------------------------- VectorCast -------------------------------------- 7065 7066 instruct vcastBtoX(vec dst, vec src) %{ 7067 match(Set dst (VectorCastB2X src)); 7068 format %{ "vector_cast_b2x $dst,$src\t!" %} 7069 ins_encode %{ 7070 assert(UseAVX > 0, "required"); 7071 7072 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7073 int vlen_enc = vector_length_encoding(this); 7074 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7075 %} 7076 ins_pipe( pipe_slow ); 7077 %} 7078 7079 instruct castStoX(vec dst, vec src) %{ 7080 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7081 Matcher::vector_length(n->in(1)) <= 8 && // src 7082 Matcher::vector_element_basic_type(n) == T_BYTE); 7083 match(Set dst (VectorCastS2X src)); 7084 format %{ "vector_cast_s2x $dst,$src" %} 7085 ins_encode %{ 7086 assert(UseAVX > 0, "required"); 7087 7088 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7089 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7090 %} 7091 ins_pipe( pipe_slow ); 7092 %} 7093 7094 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7095 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7096 Matcher::vector_length(n->in(1)) == 16 && // src 7097 Matcher::vector_element_basic_type(n) == T_BYTE); 7098 effect(TEMP dst, TEMP vtmp); 7099 match(Set dst (VectorCastS2X src)); 7100 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7101 ins_encode %{ 7102 assert(UseAVX > 0, "required"); 7103 7104 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7105 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7106 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7107 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7108 %} 7109 ins_pipe( pipe_slow ); 7110 %} 7111 7112 instruct vcastStoX_evex(vec dst, vec src) %{ 7113 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7114 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7115 match(Set dst (VectorCastS2X src)); 7116 format %{ "vector_cast_s2x $dst,$src\t!" %} 7117 ins_encode %{ 7118 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7119 int src_vlen_enc = vector_length_encoding(this, $src); 7120 int vlen_enc = vector_length_encoding(this); 7121 switch (to_elem_bt) { 7122 case T_BYTE: 7123 if (!VM_Version::supports_avx512vl()) { 7124 vlen_enc = Assembler::AVX_512bit; 7125 } 7126 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7127 break; 7128 case T_INT: 7129 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7130 break; 7131 case T_FLOAT: 7132 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7133 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7134 break; 7135 case T_LONG: 7136 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7137 break; 7138 case T_DOUBLE: { 7139 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7140 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7141 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7142 break; 7143 } 7144 default: 7145 ShouldNotReachHere(); 7146 } 7147 %} 7148 ins_pipe( pipe_slow ); 7149 %} 7150 7151 instruct castItoX(vec dst, vec src) %{ 7152 predicate(UseAVX <= 2 && 7153 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7154 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7155 match(Set dst (VectorCastI2X src)); 7156 format %{ "vector_cast_i2x $dst,$src" %} 7157 ins_encode %{ 7158 assert(UseAVX > 0, "required"); 7159 7160 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7161 int vlen_enc = vector_length_encoding(this, $src); 7162 7163 if (to_elem_bt == T_BYTE) { 7164 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7165 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7166 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7167 } else { 7168 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7169 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7170 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7171 } 7172 %} 7173 ins_pipe( pipe_slow ); 7174 %} 7175 7176 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7177 predicate(UseAVX <= 2 && 7178 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7179 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7180 match(Set dst (VectorCastI2X src)); 7181 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7182 effect(TEMP dst, TEMP vtmp); 7183 ins_encode %{ 7184 assert(UseAVX > 0, "required"); 7185 7186 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7187 int vlen_enc = vector_length_encoding(this, $src); 7188 7189 if (to_elem_bt == T_BYTE) { 7190 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7191 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7192 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7193 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7194 } else { 7195 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7196 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7197 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7198 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7199 } 7200 %} 7201 ins_pipe( pipe_slow ); 7202 %} 7203 7204 instruct vcastItoX_evex(vec dst, vec src) %{ 7205 predicate(UseAVX > 2 || 7206 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7207 match(Set dst (VectorCastI2X src)); 7208 format %{ "vector_cast_i2x $dst,$src\t!" %} 7209 ins_encode %{ 7210 assert(UseAVX > 0, "required"); 7211 7212 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7213 int src_vlen_enc = vector_length_encoding(this, $src); 7214 int dst_vlen_enc = vector_length_encoding(this); 7215 switch (dst_elem_bt) { 7216 case T_BYTE: 7217 if (!VM_Version::supports_avx512vl()) { 7218 src_vlen_enc = Assembler::AVX_512bit; 7219 } 7220 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7221 break; 7222 case T_SHORT: 7223 if (!VM_Version::supports_avx512vl()) { 7224 src_vlen_enc = Assembler::AVX_512bit; 7225 } 7226 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7227 break; 7228 case T_FLOAT: 7229 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7230 break; 7231 case T_LONG: 7232 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7233 break; 7234 case T_DOUBLE: 7235 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7236 break; 7237 default: 7238 ShouldNotReachHere(); 7239 } 7240 %} 7241 ins_pipe( pipe_slow ); 7242 %} 7243 7244 instruct vcastLtoBS(vec dst, vec src) %{ 7245 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7246 UseAVX <= 2); 7247 match(Set dst (VectorCastL2X src)); 7248 format %{ "vector_cast_l2x $dst,$src" %} 7249 ins_encode %{ 7250 assert(UseAVX > 0, "required"); 7251 7252 int vlen = Matcher::vector_length_in_bytes(this, $src); 7253 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7254 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7255 : ExternalAddress(vector_int_to_short_mask()); 7256 if (vlen <= 16) { 7257 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7258 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7259 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7260 } else { 7261 assert(vlen <= 32, "required"); 7262 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7263 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7264 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7265 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7266 } 7267 if (to_elem_bt == T_BYTE) { 7268 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7269 } 7270 %} 7271 ins_pipe( pipe_slow ); 7272 %} 7273 7274 instruct vcastLtoX_evex(vec dst, vec src) %{ 7275 predicate(UseAVX > 2 || 7276 (Matcher::vector_element_basic_type(n) == T_INT || 7277 Matcher::vector_element_basic_type(n) == T_FLOAT || 7278 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7279 match(Set dst (VectorCastL2X src)); 7280 format %{ "vector_cast_l2x $dst,$src\t!" %} 7281 ins_encode %{ 7282 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7283 int vlen = Matcher::vector_length_in_bytes(this, $src); 7284 int vlen_enc = vector_length_encoding(this, $src); 7285 switch (to_elem_bt) { 7286 case T_BYTE: 7287 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7288 vlen_enc = Assembler::AVX_512bit; 7289 } 7290 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7291 break; 7292 case T_SHORT: 7293 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7294 vlen_enc = Assembler::AVX_512bit; 7295 } 7296 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7297 break; 7298 case T_INT: 7299 if (vlen == 8) { 7300 if ($dst$$XMMRegister != $src$$XMMRegister) { 7301 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7302 } 7303 } else if (vlen == 16) { 7304 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7305 } else if (vlen == 32) { 7306 if (UseAVX > 2) { 7307 if (!VM_Version::supports_avx512vl()) { 7308 vlen_enc = Assembler::AVX_512bit; 7309 } 7310 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7311 } else { 7312 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7313 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7314 } 7315 } else { // vlen == 64 7316 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7317 } 7318 break; 7319 case T_FLOAT: 7320 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7321 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7322 break; 7323 case T_DOUBLE: 7324 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7325 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7326 break; 7327 7328 default: assert(false, "%s", type2name(to_elem_bt)); 7329 } 7330 %} 7331 ins_pipe( pipe_slow ); 7332 %} 7333 7334 instruct vcastFtoD_reg(vec dst, vec src) %{ 7335 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7336 match(Set dst (VectorCastF2X src)); 7337 format %{ "vector_cast_f2d $dst,$src\t!" %} 7338 ins_encode %{ 7339 int vlen_enc = vector_length_encoding(this); 7340 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7341 %} 7342 ins_pipe( pipe_slow ); 7343 %} 7344 7345 7346 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7347 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7348 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7349 match(Set dst (VectorCastF2X src)); 7350 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7351 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7352 ins_encode %{ 7353 int vlen_enc = vector_length_encoding(this, $src); 7354 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7355 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7356 // 32 bit addresses for register indirect addressing mode since stub constants 7357 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7358 // However, targets are free to increase this limit, but having a large code cache size 7359 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7360 // cap we save a temporary register allocation which in limiting case can prevent 7361 // spilling in high register pressure blocks. 7362 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7363 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7364 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7365 %} 7366 ins_pipe( pipe_slow ); 7367 %} 7368 7369 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7370 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7371 is_integral_type(Matcher::vector_element_basic_type(n))); 7372 match(Set dst (VectorCastF2X src)); 7373 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7374 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7375 ins_encode %{ 7376 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7377 if (to_elem_bt == T_LONG) { 7378 int vlen_enc = vector_length_encoding(this); 7379 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7380 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7381 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7382 } else { 7383 int vlen_enc = vector_length_encoding(this, $src); 7384 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7385 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7386 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7387 } 7388 %} 7389 ins_pipe( pipe_slow ); 7390 %} 7391 7392 instruct vcastDtoF_reg(vec dst, vec src) %{ 7393 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7394 match(Set dst (VectorCastD2X src)); 7395 format %{ "vector_cast_d2x $dst,$src\t!" %} 7396 ins_encode %{ 7397 int vlen_enc = vector_length_encoding(this, $src); 7398 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7399 %} 7400 ins_pipe( pipe_slow ); 7401 %} 7402 7403 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7404 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7405 is_integral_type(Matcher::vector_element_basic_type(n))); 7406 match(Set dst (VectorCastD2X src)); 7407 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7408 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7409 ins_encode %{ 7410 int vlen_enc = vector_length_encoding(this, $src); 7411 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7412 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7413 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7414 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7415 %} 7416 ins_pipe( pipe_slow ); 7417 %} 7418 7419 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7420 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7421 is_integral_type(Matcher::vector_element_basic_type(n))); 7422 match(Set dst (VectorCastD2X src)); 7423 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7424 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7425 ins_encode %{ 7426 int vlen_enc = vector_length_encoding(this, $src); 7427 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7428 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7429 ExternalAddress(vector_float_signflip()); 7430 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7431 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7432 %} 7433 ins_pipe( pipe_slow ); 7434 %} 7435 7436 instruct vucast(vec dst, vec src) %{ 7437 match(Set dst (VectorUCastB2X src)); 7438 match(Set dst (VectorUCastS2X src)); 7439 match(Set dst (VectorUCastI2X src)); 7440 format %{ "vector_ucast $dst,$src\t!" %} 7441 ins_encode %{ 7442 assert(UseAVX > 0, "required"); 7443 7444 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7445 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7446 int vlen_enc = vector_length_encoding(this); 7447 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7448 %} 7449 ins_pipe( pipe_slow ); 7450 %} 7451 7452 #ifdef _LP64 7453 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7454 predicate(!VM_Version::supports_avx512vl() && 7455 Matcher::vector_length_in_bytes(n) < 64 && 7456 Matcher::vector_element_basic_type(n) == T_INT); 7457 match(Set dst (RoundVF src)); 7458 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7459 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7460 ins_encode %{ 7461 int vlen_enc = vector_length_encoding(this); 7462 InternalAddress new_mxcsr = $constantaddress((jint)0x3F80); 7463 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7464 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7465 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7466 %} 7467 ins_pipe( pipe_slow ); 7468 %} 7469 7470 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7471 predicate((VM_Version::supports_avx512vl() || 7472 Matcher::vector_length_in_bytes(n) == 64) && 7473 Matcher::vector_element_basic_type(n) == T_INT); 7474 match(Set dst (RoundVF src)); 7475 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7476 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7477 ins_encode %{ 7478 int vlen_enc = vector_length_encoding(this); 7479 InternalAddress new_mxcsr = $constantaddress((jint)0x3F80); 7480 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7481 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7482 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7483 %} 7484 ins_pipe( pipe_slow ); 7485 %} 7486 7487 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7488 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7489 match(Set dst (RoundVD src)); 7490 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7491 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7492 ins_encode %{ 7493 int vlen_enc = vector_length_encoding(this); 7494 InternalAddress new_mxcsr = $constantaddress((jint)0x3F80); 7495 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 7496 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 7497 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7498 %} 7499 ins_pipe( pipe_slow ); 7500 %} 7501 7502 #endif // _LP64 7503 7504 // --------------------------------- VectorMaskCmp -------------------------------------- 7505 7506 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7507 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7508 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7509 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7510 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7511 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7512 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7513 ins_encode %{ 7514 int vlen_enc = vector_length_encoding(this, $src1); 7515 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7516 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7517 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7518 } else { 7519 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7520 } 7521 %} 7522 ins_pipe( pipe_slow ); 7523 %} 7524 7525 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7526 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7527 n->bottom_type()->isa_vectmask() == nullptr && 7528 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7529 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7530 effect(TEMP ktmp); 7531 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7532 ins_encode %{ 7533 int vlen_enc = Assembler::AVX_512bit; 7534 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7535 KRegister mask = k0; // The comparison itself is not being masked. 7536 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7537 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7538 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7539 } else { 7540 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7541 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7542 } 7543 %} 7544 ins_pipe( pipe_slow ); 7545 %} 7546 7547 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7548 predicate(n->bottom_type()->isa_vectmask() && 7549 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7550 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7551 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7552 ins_encode %{ 7553 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7554 int vlen_enc = vector_length_encoding(this, $src1); 7555 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7556 KRegister mask = k0; // The comparison itself is not being masked. 7557 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7558 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7559 } else { 7560 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7561 } 7562 %} 7563 ins_pipe( pipe_slow ); 7564 %} 7565 7566 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7567 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7568 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7569 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7570 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7571 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7572 (n->in(2)->get_int() == BoolTest::eq || 7573 n->in(2)->get_int() == BoolTest::lt || 7574 n->in(2)->get_int() == BoolTest::gt)); // cond 7575 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7576 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7577 ins_encode %{ 7578 int vlen_enc = vector_length_encoding(this, $src1); 7579 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7580 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7581 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 7582 %} 7583 ins_pipe( pipe_slow ); 7584 %} 7585 7586 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7587 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7588 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7589 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7590 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7591 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7592 (n->in(2)->get_int() == BoolTest::ne || 7593 n->in(2)->get_int() == BoolTest::le || 7594 n->in(2)->get_int() == BoolTest::ge)); // cond 7595 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7596 effect(TEMP dst, TEMP xtmp); 7597 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7598 ins_encode %{ 7599 int vlen_enc = vector_length_encoding(this, $src1); 7600 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7601 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7602 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7603 %} 7604 ins_pipe( pipe_slow ); 7605 %} 7606 7607 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7608 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7609 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7610 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7611 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7612 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7613 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7614 effect(TEMP dst, TEMP xtmp); 7615 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7616 ins_encode %{ 7617 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 7618 int vlen_enc = vector_length_encoding(this, $src1); 7619 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7620 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7621 7622 if (vlen_enc == Assembler::AVX_128bit) { 7623 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7624 } else { 7625 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7626 } 7627 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7628 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7629 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7630 %} 7631 ins_pipe( pipe_slow ); 7632 %} 7633 7634 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7635 predicate((n->bottom_type()->isa_vectmask() == nullptr && 7636 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 7637 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7638 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7639 effect(TEMP ktmp); 7640 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7641 ins_encode %{ 7642 assert(UseAVX > 2, "required"); 7643 7644 int vlen_enc = vector_length_encoding(this, $src1); 7645 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7646 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7647 KRegister mask = k0; // The comparison itself is not being masked. 7648 bool merge = false; 7649 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7650 7651 switch (src1_elem_bt) { 7652 case T_INT: { 7653 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7654 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7655 break; 7656 } 7657 case T_LONG: { 7658 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7659 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7660 break; 7661 } 7662 default: assert(false, "%s", type2name(src1_elem_bt)); 7663 } 7664 %} 7665 ins_pipe( pipe_slow ); 7666 %} 7667 7668 7669 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 7670 predicate(n->bottom_type()->isa_vectmask() && 7671 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7672 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7673 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 7674 ins_encode %{ 7675 assert(UseAVX > 2, "required"); 7676 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7677 7678 int vlen_enc = vector_length_encoding(this, $src1); 7679 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7680 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7681 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7682 7683 // Comparison i 7684 switch (src1_elem_bt) { 7685 case T_BYTE: { 7686 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7687 break; 7688 } 7689 case T_SHORT: { 7690 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7691 break; 7692 } 7693 case T_INT: { 7694 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7695 break; 7696 } 7697 case T_LONG: { 7698 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7699 break; 7700 } 7701 default: assert(false, "%s", type2name(src1_elem_bt)); 7702 } 7703 %} 7704 ins_pipe( pipe_slow ); 7705 %} 7706 7707 // Extract 7708 7709 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 7710 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 7711 match(Set dst (ExtractI src idx)); 7712 match(Set dst (ExtractS src idx)); 7713 #ifdef _LP64 7714 match(Set dst (ExtractB src idx)); 7715 #endif 7716 format %{ "extractI $dst,$src,$idx\t!" %} 7717 ins_encode %{ 7718 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7719 7720 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 7721 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7722 %} 7723 ins_pipe( pipe_slow ); 7724 %} 7725 7726 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 7727 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 7728 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 7729 match(Set dst (ExtractI src idx)); 7730 match(Set dst (ExtractS src idx)); 7731 #ifdef _LP64 7732 match(Set dst (ExtractB src idx)); 7733 #endif 7734 effect(TEMP vtmp); 7735 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 7736 ins_encode %{ 7737 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7738 7739 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 7740 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7741 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 7742 %} 7743 ins_pipe( pipe_slow ); 7744 %} 7745 7746 #ifdef _LP64 7747 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 7748 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 7749 match(Set dst (ExtractL src idx)); 7750 format %{ "extractL $dst,$src,$idx\t!" %} 7751 ins_encode %{ 7752 assert(UseSSE >= 4, "required"); 7753 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7754 7755 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7756 %} 7757 ins_pipe( pipe_slow ); 7758 %} 7759 7760 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 7761 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 7762 Matcher::vector_length(n->in(1)) == 8); // src 7763 match(Set dst (ExtractL src idx)); 7764 effect(TEMP vtmp); 7765 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 7766 ins_encode %{ 7767 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7768 7769 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7770 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 7771 %} 7772 ins_pipe( pipe_slow ); 7773 %} 7774 #endif 7775 7776 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 7777 predicate(Matcher::vector_length(n->in(1)) <= 4); 7778 match(Set dst (ExtractF src idx)); 7779 effect(TEMP dst, TEMP vtmp); 7780 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 7781 ins_encode %{ 7782 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7783 7784 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 7785 %} 7786 ins_pipe( pipe_slow ); 7787 %} 7788 7789 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 7790 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 7791 Matcher::vector_length(n->in(1)/*src*/) == 16); 7792 match(Set dst (ExtractF src idx)); 7793 effect(TEMP vtmp); 7794 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 7795 ins_encode %{ 7796 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7797 7798 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7799 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 7800 %} 7801 ins_pipe( pipe_slow ); 7802 %} 7803 7804 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 7805 predicate(Matcher::vector_length(n->in(1)) == 2); // src 7806 match(Set dst (ExtractD src idx)); 7807 format %{ "extractD $dst,$src,$idx\t!" %} 7808 ins_encode %{ 7809 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7810 7811 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7812 %} 7813 ins_pipe( pipe_slow ); 7814 %} 7815 7816 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 7817 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 7818 Matcher::vector_length(n->in(1)) == 8); // src 7819 match(Set dst (ExtractD src idx)); 7820 effect(TEMP vtmp); 7821 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 7822 ins_encode %{ 7823 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7824 7825 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7826 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 7827 %} 7828 ins_pipe( pipe_slow ); 7829 %} 7830 7831 // --------------------------------- Vector Blend -------------------------------------- 7832 7833 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 7834 predicate(UseAVX == 0); 7835 match(Set dst (VectorBlend (Binary dst src) mask)); 7836 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 7837 effect(TEMP tmp); 7838 ins_encode %{ 7839 assert(UseSSE >= 4, "required"); 7840 7841 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 7842 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 7843 } 7844 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 7845 %} 7846 ins_pipe( pipe_slow ); 7847 %} 7848 7849 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7850 predicate(UseAVX > 0 && 7851 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 7852 Matcher::vector_length_in_bytes(n) <= 32 && 7853 is_integral_type(Matcher::vector_element_basic_type(n))); 7854 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7855 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7856 ins_encode %{ 7857 int vlen_enc = vector_length_encoding(this); 7858 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7859 %} 7860 ins_pipe( pipe_slow ); 7861 %} 7862 7863 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7864 predicate(UseAVX > 0 && 7865 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 7866 Matcher::vector_length_in_bytes(n) <= 32 && 7867 !is_integral_type(Matcher::vector_element_basic_type(n))); 7868 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7869 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7870 ins_encode %{ 7871 int vlen_enc = vector_length_encoding(this); 7872 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7873 %} 7874 ins_pipe( pipe_slow ); 7875 %} 7876 7877 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 7878 predicate(Matcher::vector_length_in_bytes(n) == 64 && 7879 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 7880 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7881 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 7882 effect(TEMP ktmp); 7883 ins_encode %{ 7884 int vlen_enc = Assembler::AVX_512bit; 7885 BasicType elem_bt = Matcher::vector_element_basic_type(this); 7886 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 7887 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7888 %} 7889 ins_pipe( pipe_slow ); 7890 %} 7891 7892 7893 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 7894 predicate(n->in(2)->bottom_type()->isa_vectmask() && 7895 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 7896 VM_Version::supports_avx512bw())); 7897 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7898 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 7899 ins_encode %{ 7900 int vlen_enc = vector_length_encoding(this); 7901 BasicType elem_bt = Matcher::vector_element_basic_type(this); 7902 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7903 %} 7904 ins_pipe( pipe_slow ); 7905 %} 7906 7907 // --------------------------------- ABS -------------------------------------- 7908 // a = |a| 7909 instruct vabsB_reg(vec dst, vec src) %{ 7910 match(Set dst (AbsVB src)); 7911 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 7912 ins_encode %{ 7913 uint vlen = Matcher::vector_length(this); 7914 if (vlen <= 16) { 7915 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 7916 } else { 7917 int vlen_enc = vector_length_encoding(this); 7918 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7919 } 7920 %} 7921 ins_pipe( pipe_slow ); 7922 %} 7923 7924 instruct vabsS_reg(vec dst, vec src) %{ 7925 match(Set dst (AbsVS src)); 7926 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 7927 ins_encode %{ 7928 uint vlen = Matcher::vector_length(this); 7929 if (vlen <= 8) { 7930 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 7931 } else { 7932 int vlen_enc = vector_length_encoding(this); 7933 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7934 } 7935 %} 7936 ins_pipe( pipe_slow ); 7937 %} 7938 7939 instruct vabsI_reg(vec dst, vec src) %{ 7940 match(Set dst (AbsVI src)); 7941 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 7942 ins_encode %{ 7943 uint vlen = Matcher::vector_length(this); 7944 if (vlen <= 4) { 7945 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 7946 } else { 7947 int vlen_enc = vector_length_encoding(this); 7948 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7949 } 7950 %} 7951 ins_pipe( pipe_slow ); 7952 %} 7953 7954 instruct vabsL_reg(vec dst, vec src) %{ 7955 match(Set dst (AbsVL src)); 7956 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 7957 ins_encode %{ 7958 assert(UseAVX > 2, "required"); 7959 int vlen_enc = vector_length_encoding(this); 7960 if (!VM_Version::supports_avx512vl()) { 7961 vlen_enc = Assembler::AVX_512bit; 7962 } 7963 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7964 %} 7965 ins_pipe( pipe_slow ); 7966 %} 7967 7968 // --------------------------------- ABSNEG -------------------------------------- 7969 7970 instruct vabsnegF(vec dst, vec src) %{ 7971 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 7972 match(Set dst (AbsVF src)); 7973 match(Set dst (NegVF src)); 7974 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 7975 ins_cost(150); 7976 ins_encode %{ 7977 int opcode = this->ideal_Opcode(); 7978 int vlen = Matcher::vector_length(this); 7979 if (vlen == 2) { 7980 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 7981 } else { 7982 assert(vlen == 8 || vlen == 16, "required"); 7983 int vlen_enc = vector_length_encoding(this); 7984 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7985 } 7986 %} 7987 ins_pipe( pipe_slow ); 7988 %} 7989 7990 instruct vabsneg4F(vec dst) %{ 7991 predicate(Matcher::vector_length(n) == 4); 7992 match(Set dst (AbsVF dst)); 7993 match(Set dst (NegVF dst)); 7994 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 7995 ins_cost(150); 7996 ins_encode %{ 7997 int opcode = this->ideal_Opcode(); 7998 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 7999 %} 8000 ins_pipe( pipe_slow ); 8001 %} 8002 8003 instruct vabsnegD(vec dst, vec src) %{ 8004 match(Set dst (AbsVD src)); 8005 match(Set dst (NegVD src)); 8006 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 8007 ins_encode %{ 8008 int opcode = this->ideal_Opcode(); 8009 uint vlen = Matcher::vector_length(this); 8010 if (vlen == 2) { 8011 assert(UseSSE >= 2, "required"); 8012 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8013 } else { 8014 int vlen_enc = vector_length_encoding(this); 8015 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8016 } 8017 %} 8018 ins_pipe( pipe_slow ); 8019 %} 8020 8021 //------------------------------------- VectorTest -------------------------------------------- 8022 8023 #ifdef _LP64 8024 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8025 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8026 match(Set cr (VectorTest src1 src2)); 8027 effect(TEMP vtmp); 8028 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8029 ins_encode %{ 8030 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8031 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8032 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8033 %} 8034 ins_pipe( pipe_slow ); 8035 %} 8036 8037 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8038 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8039 match(Set cr (VectorTest src1 src2)); 8040 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8041 ins_encode %{ 8042 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8043 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8044 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8045 %} 8046 ins_pipe( pipe_slow ); 8047 %} 8048 8049 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8050 predicate((Matcher::vector_length(n->in(1)) < 8 || 8051 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8052 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8053 match(Set cr (VectorTest src1 src2)); 8054 effect(TEMP tmp); 8055 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8056 ins_encode %{ 8057 uint masklen = Matcher::vector_length(this, $src1); 8058 __ kmovwl($tmp$$Register, $src1$$KRegister); 8059 __ andl($tmp$$Register, (1 << masklen) - 1); 8060 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8061 %} 8062 ins_pipe( pipe_slow ); 8063 %} 8064 8065 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8066 predicate((Matcher::vector_length(n->in(1)) < 8 || 8067 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8068 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8069 match(Set cr (VectorTest src1 src2)); 8070 effect(TEMP tmp); 8071 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8072 ins_encode %{ 8073 uint masklen = Matcher::vector_length(this, $src1); 8074 __ kmovwl($tmp$$Register, $src1$$KRegister); 8075 __ andl($tmp$$Register, (1 << masklen) - 1); 8076 %} 8077 ins_pipe( pipe_slow ); 8078 %} 8079 8080 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8081 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8082 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8083 match(Set cr (VectorTest src1 src2)); 8084 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8085 ins_encode %{ 8086 uint masklen = Matcher::vector_length(this, $src1); 8087 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8088 %} 8089 ins_pipe( pipe_slow ); 8090 %} 8091 #endif 8092 8093 //------------------------------------- LoadMask -------------------------------------------- 8094 8095 instruct loadMask(legVec dst, legVec src) %{ 8096 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8097 match(Set dst (VectorLoadMask src)); 8098 effect(TEMP dst); 8099 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8100 ins_encode %{ 8101 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8102 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8103 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8104 %} 8105 ins_pipe( pipe_slow ); 8106 %} 8107 8108 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8109 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8110 match(Set dst (VectorLoadMask src)); 8111 effect(TEMP xtmp); 8112 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8113 ins_encode %{ 8114 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8115 true, Assembler::AVX_512bit); 8116 %} 8117 ins_pipe( pipe_slow ); 8118 %} 8119 8120 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8121 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8122 match(Set dst (VectorLoadMask src)); 8123 effect(TEMP xtmp); 8124 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8125 ins_encode %{ 8126 int vlen_enc = vector_length_encoding(in(1)); 8127 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8128 false, vlen_enc); 8129 %} 8130 ins_pipe( pipe_slow ); 8131 %} 8132 8133 //------------------------------------- StoreMask -------------------------------------------- 8134 8135 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8136 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8137 match(Set dst (VectorStoreMask src size)); 8138 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8139 ins_encode %{ 8140 int vlen = Matcher::vector_length(this); 8141 if (vlen <= 16 && UseAVX <= 2) { 8142 assert(UseSSE >= 3, "required"); 8143 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8144 } else { 8145 assert(UseAVX > 0, "required"); 8146 int src_vlen_enc = vector_length_encoding(this, $src); 8147 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8148 } 8149 %} 8150 ins_pipe( pipe_slow ); 8151 %} 8152 8153 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8154 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8155 match(Set dst (VectorStoreMask src size)); 8156 effect(TEMP_DEF dst, TEMP xtmp); 8157 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8158 ins_encode %{ 8159 int vlen_enc = Assembler::AVX_128bit; 8160 int vlen = Matcher::vector_length(this); 8161 if (vlen <= 8) { 8162 assert(UseSSE >= 3, "required"); 8163 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8164 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8165 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8166 } else { 8167 assert(UseAVX > 0, "required"); 8168 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8169 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8170 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8171 } 8172 %} 8173 ins_pipe( pipe_slow ); 8174 %} 8175 8176 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8177 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8178 match(Set dst (VectorStoreMask src size)); 8179 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8180 effect(TEMP_DEF dst, TEMP xtmp); 8181 ins_encode %{ 8182 int vlen_enc = Assembler::AVX_128bit; 8183 int vlen = Matcher::vector_length(this); 8184 if (vlen <= 4) { 8185 assert(UseSSE >= 3, "required"); 8186 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8187 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8188 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8189 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8190 } else { 8191 assert(UseAVX > 0, "required"); 8192 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8193 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8194 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8195 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8196 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8197 } 8198 %} 8199 ins_pipe( pipe_slow ); 8200 %} 8201 8202 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8203 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8204 match(Set dst (VectorStoreMask src size)); 8205 effect(TEMP_DEF dst, TEMP xtmp); 8206 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8207 ins_encode %{ 8208 assert(UseSSE >= 3, "required"); 8209 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8210 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8211 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8212 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8213 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8214 %} 8215 ins_pipe( pipe_slow ); 8216 %} 8217 8218 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8219 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8220 match(Set dst (VectorStoreMask src size)); 8221 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8222 effect(TEMP_DEF dst, TEMP vtmp); 8223 ins_encode %{ 8224 int vlen_enc = Assembler::AVX_128bit; 8225 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8226 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8227 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8228 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8229 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8230 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8231 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8232 %} 8233 ins_pipe( pipe_slow ); 8234 %} 8235 8236 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8237 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8238 match(Set dst (VectorStoreMask src size)); 8239 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8240 ins_encode %{ 8241 int src_vlen_enc = vector_length_encoding(this, $src); 8242 int dst_vlen_enc = vector_length_encoding(this); 8243 if (!VM_Version::supports_avx512vl()) { 8244 src_vlen_enc = Assembler::AVX_512bit; 8245 } 8246 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8247 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8248 %} 8249 ins_pipe( pipe_slow ); 8250 %} 8251 8252 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8253 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8254 match(Set dst (VectorStoreMask src size)); 8255 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8256 ins_encode %{ 8257 int src_vlen_enc = vector_length_encoding(this, $src); 8258 int dst_vlen_enc = vector_length_encoding(this); 8259 if (!VM_Version::supports_avx512vl()) { 8260 src_vlen_enc = Assembler::AVX_512bit; 8261 } 8262 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8263 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8264 %} 8265 ins_pipe( pipe_slow ); 8266 %} 8267 8268 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8269 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8270 match(Set dst (VectorStoreMask mask size)); 8271 effect(TEMP_DEF dst); 8272 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8273 ins_encode %{ 8274 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8275 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8276 false, Assembler::AVX_512bit, noreg); 8277 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8278 %} 8279 ins_pipe( pipe_slow ); 8280 %} 8281 8282 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8283 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8284 match(Set dst (VectorStoreMask mask size)); 8285 effect(TEMP_DEF dst); 8286 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8287 ins_encode %{ 8288 int dst_vlen_enc = vector_length_encoding(this); 8289 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8290 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8291 %} 8292 ins_pipe( pipe_slow ); 8293 %} 8294 8295 instruct vmaskcast_evex(kReg dst) %{ 8296 match(Set dst (VectorMaskCast dst)); 8297 ins_cost(0); 8298 format %{ "vector_mask_cast $dst" %} 8299 ins_encode %{ 8300 // empty 8301 %} 8302 ins_pipe(empty); 8303 %} 8304 8305 instruct vmaskcast(vec dst) %{ 8306 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8307 match(Set dst (VectorMaskCast dst)); 8308 ins_cost(0); 8309 format %{ "vector_mask_cast $dst" %} 8310 ins_encode %{ 8311 // empty 8312 %} 8313 ins_pipe(empty); 8314 %} 8315 8316 instruct vmaskcast_avx(vec dst, vec src) %{ 8317 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8318 match(Set dst (VectorMaskCast src)); 8319 format %{ "vector_mask_cast $dst, $src" %} 8320 ins_encode %{ 8321 int vlen = Matcher::vector_length(this); 8322 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8323 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8324 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8325 %} 8326 ins_pipe(pipe_slow); 8327 %} 8328 8329 //-------------------------------- Load Iota Indices ---------------------------------- 8330 8331 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8332 match(Set dst (VectorLoadConst src)); 8333 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8334 ins_encode %{ 8335 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8336 BasicType bt = Matcher::vector_element_basic_type(this); 8337 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8338 %} 8339 ins_pipe( pipe_slow ); 8340 %} 8341 8342 #ifdef _LP64 8343 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8344 match(Set dst (PopulateIndex src1 src2)); 8345 effect(TEMP dst, TEMP vtmp); 8346 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8347 ins_encode %{ 8348 assert($src2$$constant == 1, "required"); 8349 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8350 int vlen_enc = vector_length_encoding(this); 8351 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8352 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8353 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8354 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8355 %} 8356 ins_pipe( pipe_slow ); 8357 %} 8358 8359 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8360 match(Set dst (PopulateIndex src1 src2)); 8361 effect(TEMP dst, TEMP vtmp); 8362 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8363 ins_encode %{ 8364 assert($src2$$constant == 1, "required"); 8365 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8366 int vlen_enc = vector_length_encoding(this); 8367 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8368 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8369 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8370 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8371 %} 8372 ins_pipe( pipe_slow ); 8373 %} 8374 #endif 8375 //-------------------------------- Rearrange ---------------------------------- 8376 8377 // LoadShuffle/Rearrange for Byte 8378 8379 instruct loadShuffleB(vec dst) %{ 8380 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 8381 match(Set dst (VectorLoadShuffle dst)); 8382 format %{ "vector_load_shuffle $dst, $dst" %} 8383 ins_encode %{ 8384 // empty 8385 %} 8386 ins_pipe( pipe_slow ); 8387 %} 8388 8389 instruct rearrangeB(vec dst, vec shuffle) %{ 8390 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8391 Matcher::vector_length(n) < 32); 8392 match(Set dst (VectorRearrange dst shuffle)); 8393 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8394 ins_encode %{ 8395 assert(UseSSE >= 4, "required"); 8396 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8397 %} 8398 ins_pipe( pipe_slow ); 8399 %} 8400 8401 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8402 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8403 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8404 match(Set dst (VectorRearrange src shuffle)); 8405 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8406 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8407 ins_encode %{ 8408 assert(UseAVX >= 2, "required"); 8409 // Swap src into vtmp1 8410 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8411 // Shuffle swapped src to get entries from other 128 bit lane 8412 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8413 // Shuffle original src to get entries from self 128 bit lane 8414 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8415 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8416 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8417 // Perform the blend 8418 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8419 %} 8420 ins_pipe( pipe_slow ); 8421 %} 8422 8423 8424 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8425 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8426 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8427 match(Set dst (VectorRearrange src shuffle)); 8428 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8429 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8430 ins_encode %{ 8431 int vlen_enc = vector_length_encoding(this); 8432 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8433 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8434 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8435 %} 8436 ins_pipe( pipe_slow ); 8437 %} 8438 8439 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8440 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8441 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8442 match(Set dst (VectorRearrange src shuffle)); 8443 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8444 ins_encode %{ 8445 int vlen_enc = vector_length_encoding(this); 8446 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8447 %} 8448 ins_pipe( pipe_slow ); 8449 %} 8450 8451 // LoadShuffle/Rearrange for Short 8452 8453 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8454 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8455 Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS 8456 match(Set dst (VectorLoadShuffle src)); 8457 effect(TEMP dst, TEMP vtmp); 8458 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8459 ins_encode %{ 8460 // Create a byte shuffle mask from short shuffle mask 8461 // only byte shuffle instruction available on these platforms 8462 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8463 if (UseAVX == 0) { 8464 assert(vlen_in_bytes <= 16, "required"); 8465 // Multiply each shuffle by two to get byte index 8466 __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); 8467 __ psllw($vtmp$$XMMRegister, 1); 8468 8469 // Duplicate to create 2 copies of byte index 8470 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8471 __ psllw($dst$$XMMRegister, 8); 8472 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8473 8474 // Add one to get alternate byte index 8475 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8476 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8477 } else { 8478 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8479 int vlen_enc = vector_length_encoding(this); 8480 // Multiply each shuffle by two to get byte index 8481 __ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8482 __ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8483 8484 // Duplicate to create 2 copies of byte index 8485 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8486 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8487 8488 // Add one to get alternate byte index 8489 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 8490 } 8491 %} 8492 ins_pipe( pipe_slow ); 8493 %} 8494 8495 instruct rearrangeS(vec dst, vec shuffle) %{ 8496 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8497 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8498 match(Set dst (VectorRearrange dst shuffle)); 8499 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8500 ins_encode %{ 8501 assert(UseSSE >= 4, "required"); 8502 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8503 %} 8504 ins_pipe( pipe_slow ); 8505 %} 8506 8507 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8508 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8509 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8510 match(Set dst (VectorRearrange src shuffle)); 8511 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8512 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8513 ins_encode %{ 8514 assert(UseAVX >= 2, "required"); 8515 // Swap src into vtmp1 8516 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8517 // Shuffle swapped src to get entries from other 128 bit lane 8518 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8519 // Shuffle original src to get entries from self 128 bit lane 8520 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8521 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8522 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8523 // Perform the blend 8524 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8525 %} 8526 ins_pipe( pipe_slow ); 8527 %} 8528 8529 instruct loadShuffleS_evex(vec dst, vec src) %{ 8530 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8531 VM_Version::supports_avx512bw()); 8532 match(Set dst (VectorLoadShuffle src)); 8533 format %{ "vector_load_shuffle $dst, $src" %} 8534 ins_encode %{ 8535 int vlen_enc = vector_length_encoding(this); 8536 if (!VM_Version::supports_avx512vl()) { 8537 vlen_enc = Assembler::AVX_512bit; 8538 } 8539 __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8540 %} 8541 ins_pipe( pipe_slow ); 8542 %} 8543 8544 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8545 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8546 VM_Version::supports_avx512bw()); 8547 match(Set dst (VectorRearrange src shuffle)); 8548 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8549 ins_encode %{ 8550 int vlen_enc = vector_length_encoding(this); 8551 if (!VM_Version::supports_avx512vl()) { 8552 vlen_enc = Assembler::AVX_512bit; 8553 } 8554 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8555 %} 8556 ins_pipe( pipe_slow ); 8557 %} 8558 8559 // LoadShuffle/Rearrange for Integer and Float 8560 8561 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 8562 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8563 Matcher::vector_length(n) == 4 && UseAVX == 0); 8564 match(Set dst (VectorLoadShuffle src)); 8565 effect(TEMP dst, TEMP vtmp); 8566 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8567 ins_encode %{ 8568 assert(UseSSE >= 4, "required"); 8569 8570 // Create a byte shuffle mask from int shuffle mask 8571 // only byte shuffle instruction available on these platforms 8572 8573 // Duplicate and multiply each shuffle by 4 8574 __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); 8575 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8576 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8577 __ psllw($vtmp$$XMMRegister, 2); 8578 8579 // Duplicate again to create 4 copies of byte index 8580 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8581 __ psllw($dst$$XMMRegister, 8); 8582 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 8583 8584 // Add 3,2,1,0 to get alternate byte index 8585 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 8586 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8587 %} 8588 ins_pipe( pipe_slow ); 8589 %} 8590 8591 instruct rearrangeI(vec dst, vec shuffle) %{ 8592 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8593 UseAVX == 0); 8594 match(Set dst (VectorRearrange dst shuffle)); 8595 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8596 ins_encode %{ 8597 assert(UseSSE >= 4, "required"); 8598 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8599 %} 8600 ins_pipe( pipe_slow ); 8601 %} 8602 8603 instruct loadShuffleI_avx(vec dst, vec src) %{ 8604 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8605 UseAVX > 0); 8606 match(Set dst (VectorLoadShuffle src)); 8607 format %{ "vector_load_shuffle $dst, $src" %} 8608 ins_encode %{ 8609 int vlen_enc = vector_length_encoding(this); 8610 __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8611 %} 8612 ins_pipe( pipe_slow ); 8613 %} 8614 8615 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 8616 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8617 UseAVX > 0); 8618 match(Set dst (VectorRearrange src shuffle)); 8619 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8620 ins_encode %{ 8621 int vlen_enc = vector_length_encoding(this); 8622 BasicType bt = Matcher::vector_element_basic_type(this); 8623 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8624 %} 8625 ins_pipe( pipe_slow ); 8626 %} 8627 8628 // LoadShuffle/Rearrange for Long and Double 8629 8630 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 8631 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8632 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8633 match(Set dst (VectorLoadShuffle src)); 8634 effect(TEMP dst, TEMP vtmp); 8635 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8636 ins_encode %{ 8637 assert(UseAVX >= 2, "required"); 8638 8639 int vlen_enc = vector_length_encoding(this); 8640 // Create a double word shuffle mask from long shuffle mask 8641 // only double word shuffle instruction available on these platforms 8642 8643 // Multiply each shuffle by two to get double word index 8644 __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8645 __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8646 8647 // Duplicate each double word shuffle 8648 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 8649 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8650 8651 // Add one to get alternate double word index 8652 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 8653 %} 8654 ins_pipe( pipe_slow ); 8655 %} 8656 8657 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 8658 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8659 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8660 match(Set dst (VectorRearrange src shuffle)); 8661 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8662 ins_encode %{ 8663 assert(UseAVX >= 2, "required"); 8664 8665 int vlen_enc = vector_length_encoding(this); 8666 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8667 %} 8668 ins_pipe( pipe_slow ); 8669 %} 8670 8671 instruct loadShuffleL_evex(vec dst, vec src) %{ 8672 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8673 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8674 match(Set dst (VectorLoadShuffle src)); 8675 format %{ "vector_load_shuffle $dst, $src" %} 8676 ins_encode %{ 8677 assert(UseAVX > 2, "required"); 8678 8679 int vlen_enc = vector_length_encoding(this); 8680 __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8681 %} 8682 ins_pipe( pipe_slow ); 8683 %} 8684 8685 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 8686 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8687 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8688 match(Set dst (VectorRearrange src shuffle)); 8689 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8690 ins_encode %{ 8691 assert(UseAVX > 2, "required"); 8692 8693 int vlen_enc = vector_length_encoding(this); 8694 if (vlen_enc == Assembler::AVX_128bit) { 8695 vlen_enc = Assembler::AVX_256bit; 8696 } 8697 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8698 %} 8699 ins_pipe( pipe_slow ); 8700 %} 8701 8702 // --------------------------------- FMA -------------------------------------- 8703 // a * b + c 8704 8705 instruct vfmaF_reg(vec a, vec b, vec c) %{ 8706 match(Set c (FmaVF c (Binary a b))); 8707 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8708 ins_cost(150); 8709 ins_encode %{ 8710 assert(UseFMA, "not enabled"); 8711 int vlen_enc = vector_length_encoding(this); 8712 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8713 %} 8714 ins_pipe( pipe_slow ); 8715 %} 8716 8717 instruct vfmaF_mem(vec a, memory b, vec c) %{ 8718 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8719 match(Set c (FmaVF c (Binary a (LoadVector b)))); 8720 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8721 ins_cost(150); 8722 ins_encode %{ 8723 assert(UseFMA, "not enabled"); 8724 int vlen_enc = vector_length_encoding(this); 8725 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8726 %} 8727 ins_pipe( pipe_slow ); 8728 %} 8729 8730 instruct vfmaD_reg(vec a, vec b, vec c) %{ 8731 match(Set c (FmaVD c (Binary a b))); 8732 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8733 ins_cost(150); 8734 ins_encode %{ 8735 assert(UseFMA, "not enabled"); 8736 int vlen_enc = vector_length_encoding(this); 8737 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8738 %} 8739 ins_pipe( pipe_slow ); 8740 %} 8741 8742 instruct vfmaD_mem(vec a, memory b, vec c) %{ 8743 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8744 match(Set c (FmaVD c (Binary a (LoadVector b)))); 8745 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8746 ins_cost(150); 8747 ins_encode %{ 8748 assert(UseFMA, "not enabled"); 8749 int vlen_enc = vector_length_encoding(this); 8750 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8751 %} 8752 ins_pipe( pipe_slow ); 8753 %} 8754 8755 // --------------------------------- Vector Multiply Add -------------------------------------- 8756 8757 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 8758 predicate(UseAVX == 0); 8759 match(Set dst (MulAddVS2VI dst src1)); 8760 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 8761 ins_encode %{ 8762 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 8763 %} 8764 ins_pipe( pipe_slow ); 8765 %} 8766 8767 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 8768 predicate(UseAVX > 0); 8769 match(Set dst (MulAddVS2VI src1 src2)); 8770 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 8771 ins_encode %{ 8772 int vlen_enc = vector_length_encoding(this); 8773 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8774 %} 8775 ins_pipe( pipe_slow ); 8776 %} 8777 8778 // --------------------------------- Vector Multiply Add Add ---------------------------------- 8779 8780 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 8781 predicate(VM_Version::supports_avx512_vnni()); 8782 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 8783 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 8784 ins_encode %{ 8785 assert(UseAVX > 2, "required"); 8786 int vlen_enc = vector_length_encoding(this); 8787 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8788 %} 8789 ins_pipe( pipe_slow ); 8790 ins_cost(10); 8791 %} 8792 8793 // --------------------------------- PopCount -------------------------------------- 8794 8795 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 8796 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8797 match(Set dst (PopCountVI src)); 8798 match(Set dst (PopCountVL src)); 8799 format %{ "vector_popcount_integral $dst, $src" %} 8800 ins_encode %{ 8801 int opcode = this->ideal_Opcode(); 8802 int vlen_enc = vector_length_encoding(this, $src); 8803 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8804 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 8805 %} 8806 ins_pipe( pipe_slow ); 8807 %} 8808 8809 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 8810 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8811 match(Set dst (PopCountVI src mask)); 8812 match(Set dst (PopCountVL src mask)); 8813 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 8814 ins_encode %{ 8815 int vlen_enc = vector_length_encoding(this, $src); 8816 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8817 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8818 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 8819 %} 8820 ins_pipe( pipe_slow ); 8821 %} 8822 8823 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 8824 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8825 match(Set dst (PopCountVI src)); 8826 match(Set dst (PopCountVL src)); 8827 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 8828 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 8829 ins_encode %{ 8830 int opcode = this->ideal_Opcode(); 8831 int vlen_enc = vector_length_encoding(this, $src); 8832 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8833 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8834 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 8835 %} 8836 ins_pipe( pipe_slow ); 8837 %} 8838 8839 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 8840 8841 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 8842 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 8843 Matcher::vector_length_in_bytes(n->in(1)))); 8844 match(Set dst (CountTrailingZerosV src)); 8845 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 8846 ins_cost(400); 8847 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 8848 ins_encode %{ 8849 int vlen_enc = vector_length_encoding(this, $src); 8850 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8851 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 8852 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 8853 %} 8854 ins_pipe( pipe_slow ); 8855 %} 8856 8857 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 8858 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 8859 VM_Version::supports_avx512cd() && 8860 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 8861 match(Set dst (CountTrailingZerosV src)); 8862 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 8863 ins_cost(400); 8864 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 8865 ins_encode %{ 8866 int vlen_enc = vector_length_encoding(this, $src); 8867 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8868 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8869 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 8870 %} 8871 ins_pipe( pipe_slow ); 8872 %} 8873 8874 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 8875 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 8876 match(Set dst (CountTrailingZerosV src)); 8877 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 8878 ins_cost(400); 8879 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 8880 ins_encode %{ 8881 int vlen_enc = vector_length_encoding(this, $src); 8882 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8883 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8884 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 8885 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 8886 %} 8887 ins_pipe( pipe_slow ); 8888 %} 8889 8890 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 8891 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 8892 match(Set dst (CountTrailingZerosV src)); 8893 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 8894 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 8895 ins_encode %{ 8896 int vlen_enc = vector_length_encoding(this, $src); 8897 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8898 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8899 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 8900 %} 8901 ins_pipe( pipe_slow ); 8902 %} 8903 8904 8905 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 8906 8907 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 8908 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 8909 effect(TEMP dst); 8910 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8911 ins_encode %{ 8912 int vector_len = vector_length_encoding(this); 8913 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 8914 %} 8915 ins_pipe( pipe_slow ); 8916 %} 8917 8918 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 8919 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 8920 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 8921 effect(TEMP dst); 8922 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8923 ins_encode %{ 8924 int vector_len = vector_length_encoding(this); 8925 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 8926 %} 8927 ins_pipe( pipe_slow ); 8928 %} 8929 8930 // --------------------------------- Rotation Operations ---------------------------------- 8931 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 8932 match(Set dst (RotateLeftV src shift)); 8933 match(Set dst (RotateRightV src shift)); 8934 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 8935 ins_encode %{ 8936 int opcode = this->ideal_Opcode(); 8937 int vector_len = vector_length_encoding(this); 8938 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 8939 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 8940 %} 8941 ins_pipe( pipe_slow ); 8942 %} 8943 8944 instruct vprorate(vec dst, vec src, vec shift) %{ 8945 match(Set dst (RotateLeftV src shift)); 8946 match(Set dst (RotateRightV src shift)); 8947 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 8948 ins_encode %{ 8949 int opcode = this->ideal_Opcode(); 8950 int vector_len = vector_length_encoding(this); 8951 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 8952 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8953 %} 8954 ins_pipe( pipe_slow ); 8955 %} 8956 8957 // ---------------------------------- Masked Operations ------------------------------------ 8958 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 8959 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 8960 match(Set dst (LoadVectorMasked mem mask)); 8961 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 8962 ins_encode %{ 8963 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 8964 int vlen_enc = vector_length_encoding(this); 8965 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 8966 %} 8967 ins_pipe( pipe_slow ); 8968 %} 8969 8970 8971 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 8972 predicate(n->in(3)->bottom_type()->isa_vectmask()); 8973 match(Set dst (LoadVectorMasked mem mask)); 8974 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 8975 ins_encode %{ 8976 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 8977 int vector_len = vector_length_encoding(this); 8978 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 8979 %} 8980 ins_pipe( pipe_slow ); 8981 %} 8982 8983 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 8984 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 8985 match(Set mem (StoreVectorMasked mem (Binary src mask))); 8986 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 8987 ins_encode %{ 8988 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 8989 int vlen_enc = vector_length_encoding(src_node); 8990 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 8991 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8992 %} 8993 ins_pipe( pipe_slow ); 8994 %} 8995 8996 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 8997 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 8998 match(Set mem (StoreVectorMasked mem (Binary src mask))); 8999 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9000 ins_encode %{ 9001 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9002 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9003 int vlen_enc = vector_length_encoding(src_node); 9004 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 9005 %} 9006 ins_pipe( pipe_slow ); 9007 %} 9008 9009 #ifdef _LP64 9010 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9011 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9012 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9013 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9014 ins_encode %{ 9015 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9016 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9017 9018 Label DONE; 9019 int vlen_enc = vector_length_encoding(this, $src1); 9020 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9021 9022 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9023 __ mov64($dst$$Register, -1L); 9024 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9025 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9026 __ jccb(Assembler::carrySet, DONE); 9027 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9028 __ notq($dst$$Register); 9029 __ tzcntq($dst$$Register, $dst$$Register); 9030 __ bind(DONE); 9031 %} 9032 ins_pipe( pipe_slow ); 9033 %} 9034 9035 9036 instruct vmask_gen(kReg dst, rRegL len, rRegL temp) %{ 9037 match(Set dst (VectorMaskGen len)); 9038 effect(TEMP temp); 9039 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9040 ins_encode %{ 9041 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9042 %} 9043 ins_pipe( pipe_slow ); 9044 %} 9045 9046 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9047 match(Set dst (VectorMaskGen len)); 9048 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9049 effect(TEMP temp); 9050 ins_encode %{ 9051 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9052 __ kmovql($dst$$KRegister, $temp$$Register); 9053 %} 9054 ins_pipe( pipe_slow ); 9055 %} 9056 9057 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9058 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9059 match(Set dst (VectorMaskToLong mask)); 9060 effect(TEMP dst, KILL cr); 9061 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9062 ins_encode %{ 9063 int opcode = this->ideal_Opcode(); 9064 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9065 int mask_len = Matcher::vector_length(this, $mask); 9066 int mask_size = mask_len * type2aelembytes(mbt); 9067 int vlen_enc = vector_length_encoding(this, $mask); 9068 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9069 $dst$$Register, mask_len, mask_size, vlen_enc); 9070 %} 9071 ins_pipe( pipe_slow ); 9072 %} 9073 9074 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9075 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9076 match(Set dst (VectorMaskToLong mask)); 9077 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9078 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9079 ins_encode %{ 9080 int opcode = this->ideal_Opcode(); 9081 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9082 int mask_len = Matcher::vector_length(this, $mask); 9083 int vlen_enc = vector_length_encoding(this, $mask); 9084 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9085 $dst$$Register, mask_len, mbt, vlen_enc); 9086 %} 9087 ins_pipe( pipe_slow ); 9088 %} 9089 9090 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9091 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9092 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9093 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9094 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9095 ins_encode %{ 9096 int opcode = this->ideal_Opcode(); 9097 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9098 int mask_len = Matcher::vector_length(this, $mask); 9099 int vlen_enc = vector_length_encoding(this, $mask); 9100 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9101 $dst$$Register, mask_len, mbt, vlen_enc); 9102 %} 9103 ins_pipe( pipe_slow ); 9104 %} 9105 9106 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9107 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9108 match(Set dst (VectorMaskTrueCount mask)); 9109 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9110 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9111 ins_encode %{ 9112 int opcode = this->ideal_Opcode(); 9113 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9114 int mask_len = Matcher::vector_length(this, $mask); 9115 int mask_size = mask_len * type2aelembytes(mbt); 9116 int vlen_enc = vector_length_encoding(this, $mask); 9117 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9118 $tmp$$Register, mask_len, mask_size, vlen_enc); 9119 %} 9120 ins_pipe( pipe_slow ); 9121 %} 9122 9123 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9124 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9125 match(Set dst (VectorMaskTrueCount mask)); 9126 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9127 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9128 ins_encode %{ 9129 int opcode = this->ideal_Opcode(); 9130 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9131 int mask_len = Matcher::vector_length(this, $mask); 9132 int vlen_enc = vector_length_encoding(this, $mask); 9133 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9134 $tmp$$Register, mask_len, mbt, vlen_enc); 9135 %} 9136 ins_pipe( pipe_slow ); 9137 %} 9138 9139 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9140 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9141 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9142 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9143 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9144 ins_encode %{ 9145 int opcode = this->ideal_Opcode(); 9146 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9147 int mask_len = Matcher::vector_length(this, $mask); 9148 int vlen_enc = vector_length_encoding(this, $mask); 9149 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9150 $tmp$$Register, mask_len, mbt, vlen_enc); 9151 %} 9152 ins_pipe( pipe_slow ); 9153 %} 9154 9155 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9156 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9157 match(Set dst (VectorMaskFirstTrue mask)); 9158 match(Set dst (VectorMaskLastTrue mask)); 9159 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9160 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9161 ins_encode %{ 9162 int opcode = this->ideal_Opcode(); 9163 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9164 int mask_len = Matcher::vector_length(this, $mask); 9165 int mask_size = mask_len * type2aelembytes(mbt); 9166 int vlen_enc = vector_length_encoding(this, $mask); 9167 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9168 $tmp$$Register, mask_len, mask_size, vlen_enc); 9169 %} 9170 ins_pipe( pipe_slow ); 9171 %} 9172 9173 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9174 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9175 match(Set dst (VectorMaskFirstTrue mask)); 9176 match(Set dst (VectorMaskLastTrue mask)); 9177 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9178 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9179 ins_encode %{ 9180 int opcode = this->ideal_Opcode(); 9181 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9182 int mask_len = Matcher::vector_length(this, $mask); 9183 int vlen_enc = vector_length_encoding(this, $mask); 9184 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9185 $tmp$$Register, mask_len, mbt, vlen_enc); 9186 %} 9187 ins_pipe( pipe_slow ); 9188 %} 9189 9190 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9191 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9192 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9193 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9194 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9195 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9196 ins_encode %{ 9197 int opcode = this->ideal_Opcode(); 9198 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9199 int mask_len = Matcher::vector_length(this, $mask); 9200 int vlen_enc = vector_length_encoding(this, $mask); 9201 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9202 $tmp$$Register, mask_len, mbt, vlen_enc); 9203 %} 9204 ins_pipe( pipe_slow ); 9205 %} 9206 9207 // --------------------------------- Compress/Expand Operations --------------------------- 9208 9209 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9210 match(Set dst (CompressV src mask)); 9211 match(Set dst (ExpandV src mask)); 9212 format %{ "vector_compress_expand $dst, $src, $mask" %} 9213 ins_encode %{ 9214 int opcode = this->ideal_Opcode(); 9215 int vector_len = vector_length_encoding(this); 9216 BasicType bt = Matcher::vector_element_basic_type(this); 9217 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9218 %} 9219 ins_pipe( pipe_slow ); 9220 %} 9221 9222 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9223 match(Set dst (CompressM mask)); 9224 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9225 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9226 ins_encode %{ 9227 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9228 int mask_len = Matcher::vector_length(this); 9229 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9230 %} 9231 ins_pipe( pipe_slow ); 9232 %} 9233 9234 #endif // _LP64 9235 9236 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9237 9238 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9239 predicate(!VM_Version::supports_gfni()); 9240 match(Set dst (ReverseV src)); 9241 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9242 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9243 ins_encode %{ 9244 int vec_enc = vector_length_encoding(this); 9245 BasicType bt = Matcher::vector_element_basic_type(this); 9246 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9247 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9248 %} 9249 ins_pipe( pipe_slow ); 9250 %} 9251 9252 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9253 predicate(VM_Version::supports_gfni()); 9254 match(Set dst (ReverseV src)); 9255 effect(TEMP dst, TEMP xtmp); 9256 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9257 ins_encode %{ 9258 int vec_enc = vector_length_encoding(this); 9259 BasicType bt = Matcher::vector_element_basic_type(this); 9260 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1)); 9261 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9262 $xtmp$$XMMRegister); 9263 %} 9264 ins_pipe( pipe_slow ); 9265 %} 9266 9267 instruct vreverse_byte_reg(vec dst, vec src) %{ 9268 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9269 match(Set dst (ReverseBytesV src)); 9270 effect(TEMP dst); 9271 format %{ "vector_reverse_byte $dst, $src" %} 9272 ins_encode %{ 9273 int vec_enc = vector_length_encoding(this); 9274 BasicType bt = Matcher::vector_element_basic_type(this); 9275 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9276 %} 9277 ins_pipe( pipe_slow ); 9278 %} 9279 9280 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9281 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9282 match(Set dst (ReverseBytesV src)); 9283 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9284 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9285 ins_encode %{ 9286 int vec_enc = vector_length_encoding(this); 9287 BasicType bt = Matcher::vector_element_basic_type(this); 9288 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9289 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9290 %} 9291 ins_pipe( pipe_slow ); 9292 %} 9293 9294 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9295 9296 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9297 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9298 Matcher::vector_length_in_bytes(n->in(1)))); 9299 match(Set dst (CountLeadingZerosV src)); 9300 format %{ "vector_count_leading_zeros $dst, $src" %} 9301 ins_encode %{ 9302 int vlen_enc = vector_length_encoding(this, $src); 9303 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9304 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9305 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9306 %} 9307 ins_pipe( pipe_slow ); 9308 %} 9309 9310 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9311 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9312 Matcher::vector_length_in_bytes(n->in(1)))); 9313 match(Set dst (CountLeadingZerosV src mask)); 9314 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9315 ins_encode %{ 9316 int vlen_enc = vector_length_encoding(this, $src); 9317 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9318 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9319 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9320 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9321 %} 9322 ins_pipe( pipe_slow ); 9323 %} 9324 9325 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9326 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9327 VM_Version::supports_avx512cd() && 9328 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9329 match(Set dst (CountLeadingZerosV src)); 9330 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9331 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9332 ins_encode %{ 9333 int vlen_enc = vector_length_encoding(this, $src); 9334 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9335 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9336 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9337 %} 9338 ins_pipe( pipe_slow ); 9339 %} 9340 9341 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9342 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9343 match(Set dst (CountLeadingZerosV src)); 9344 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9345 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9346 ins_encode %{ 9347 int vlen_enc = vector_length_encoding(this, $src); 9348 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9349 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9350 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9351 $rtmp$$Register, true, vlen_enc); 9352 %} 9353 ins_pipe( pipe_slow ); 9354 %} 9355 9356 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9357 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9358 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9359 match(Set dst (CountLeadingZerosV src)); 9360 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9361 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9362 ins_encode %{ 9363 int vlen_enc = vector_length_encoding(this, $src); 9364 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9365 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9366 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9367 %} 9368 ins_pipe( pipe_slow ); 9369 %} 9370 9371 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9372 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9373 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9374 match(Set dst (CountLeadingZerosV src)); 9375 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9376 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9377 ins_encode %{ 9378 int vlen_enc = vector_length_encoding(this, $src); 9379 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9380 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9381 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9382 %} 9383 ins_pipe( pipe_slow ); 9384 %} 9385 9386 // ---------------------------------- Vector Masked Operations ------------------------------------ 9387 9388 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9389 match(Set dst (AddVB (Binary dst src2) mask)); 9390 match(Set dst (AddVS (Binary dst src2) mask)); 9391 match(Set dst (AddVI (Binary dst src2) mask)); 9392 match(Set dst (AddVL (Binary dst src2) mask)); 9393 match(Set dst (AddVF (Binary dst src2) mask)); 9394 match(Set dst (AddVD (Binary dst src2) mask)); 9395 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9396 ins_encode %{ 9397 int vlen_enc = vector_length_encoding(this); 9398 BasicType bt = Matcher::vector_element_basic_type(this); 9399 int opc = this->ideal_Opcode(); 9400 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9401 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9402 %} 9403 ins_pipe( pipe_slow ); 9404 %} 9405 9406 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9407 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9408 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9409 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9410 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9411 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9412 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9413 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9414 ins_encode %{ 9415 int vlen_enc = vector_length_encoding(this); 9416 BasicType bt = Matcher::vector_element_basic_type(this); 9417 int opc = this->ideal_Opcode(); 9418 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9419 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9420 %} 9421 ins_pipe( pipe_slow ); 9422 %} 9423 9424 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9425 match(Set dst (XorV (Binary dst src2) mask)); 9426 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9427 ins_encode %{ 9428 int vlen_enc = vector_length_encoding(this); 9429 BasicType bt = Matcher::vector_element_basic_type(this); 9430 int opc = this->ideal_Opcode(); 9431 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9432 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9433 %} 9434 ins_pipe( pipe_slow ); 9435 %} 9436 9437 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9438 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9439 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9440 ins_encode %{ 9441 int vlen_enc = vector_length_encoding(this); 9442 BasicType bt = Matcher::vector_element_basic_type(this); 9443 int opc = this->ideal_Opcode(); 9444 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9445 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9446 %} 9447 ins_pipe( pipe_slow ); 9448 %} 9449 9450 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9451 match(Set dst (OrV (Binary dst src2) mask)); 9452 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9453 ins_encode %{ 9454 int vlen_enc = vector_length_encoding(this); 9455 BasicType bt = Matcher::vector_element_basic_type(this); 9456 int opc = this->ideal_Opcode(); 9457 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9458 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9459 %} 9460 ins_pipe( pipe_slow ); 9461 %} 9462 9463 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9464 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9465 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9466 ins_encode %{ 9467 int vlen_enc = vector_length_encoding(this); 9468 BasicType bt = Matcher::vector_element_basic_type(this); 9469 int opc = this->ideal_Opcode(); 9470 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9471 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9472 %} 9473 ins_pipe( pipe_slow ); 9474 %} 9475 9476 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9477 match(Set dst (AndV (Binary dst src2) mask)); 9478 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9479 ins_encode %{ 9480 int vlen_enc = vector_length_encoding(this); 9481 BasicType bt = Matcher::vector_element_basic_type(this); 9482 int opc = this->ideal_Opcode(); 9483 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9484 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9485 %} 9486 ins_pipe( pipe_slow ); 9487 %} 9488 9489 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9490 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9491 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9492 ins_encode %{ 9493 int vlen_enc = vector_length_encoding(this); 9494 BasicType bt = Matcher::vector_element_basic_type(this); 9495 int opc = this->ideal_Opcode(); 9496 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9497 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9498 %} 9499 ins_pipe( pipe_slow ); 9500 %} 9501 9502 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9503 match(Set dst (SubVB (Binary dst src2) mask)); 9504 match(Set dst (SubVS (Binary dst src2) mask)); 9505 match(Set dst (SubVI (Binary dst src2) mask)); 9506 match(Set dst (SubVL (Binary dst src2) mask)); 9507 match(Set dst (SubVF (Binary dst src2) mask)); 9508 match(Set dst (SubVD (Binary dst src2) mask)); 9509 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9510 ins_encode %{ 9511 int vlen_enc = vector_length_encoding(this); 9512 BasicType bt = Matcher::vector_element_basic_type(this); 9513 int opc = this->ideal_Opcode(); 9514 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9515 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9516 %} 9517 ins_pipe( pipe_slow ); 9518 %} 9519 9520 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9521 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9522 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9523 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9524 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9525 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9526 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9527 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9528 ins_encode %{ 9529 int vlen_enc = vector_length_encoding(this); 9530 BasicType bt = Matcher::vector_element_basic_type(this); 9531 int opc = this->ideal_Opcode(); 9532 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9533 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9534 %} 9535 ins_pipe( pipe_slow ); 9536 %} 9537 9538 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9539 match(Set dst (MulVS (Binary dst src2) mask)); 9540 match(Set dst (MulVI (Binary dst src2) mask)); 9541 match(Set dst (MulVL (Binary dst src2) mask)); 9542 match(Set dst (MulVF (Binary dst src2) mask)); 9543 match(Set dst (MulVD (Binary dst src2) mask)); 9544 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9545 ins_encode %{ 9546 int vlen_enc = vector_length_encoding(this); 9547 BasicType bt = Matcher::vector_element_basic_type(this); 9548 int opc = this->ideal_Opcode(); 9549 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9550 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9551 %} 9552 ins_pipe( pipe_slow ); 9553 %} 9554 9555 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 9556 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 9557 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 9558 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 9559 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 9560 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 9561 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9562 ins_encode %{ 9563 int vlen_enc = vector_length_encoding(this); 9564 BasicType bt = Matcher::vector_element_basic_type(this); 9565 int opc = this->ideal_Opcode(); 9566 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9567 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9568 %} 9569 ins_pipe( pipe_slow ); 9570 %} 9571 9572 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 9573 match(Set dst (SqrtVF dst mask)); 9574 match(Set dst (SqrtVD dst mask)); 9575 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 9576 ins_encode %{ 9577 int vlen_enc = vector_length_encoding(this); 9578 BasicType bt = Matcher::vector_element_basic_type(this); 9579 int opc = this->ideal_Opcode(); 9580 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9581 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9582 %} 9583 ins_pipe( pipe_slow ); 9584 %} 9585 9586 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 9587 match(Set dst (DivVF (Binary dst src2) mask)); 9588 match(Set dst (DivVD (Binary dst src2) mask)); 9589 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9590 ins_encode %{ 9591 int vlen_enc = vector_length_encoding(this); 9592 BasicType bt = Matcher::vector_element_basic_type(this); 9593 int opc = this->ideal_Opcode(); 9594 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9595 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9596 %} 9597 ins_pipe( pipe_slow ); 9598 %} 9599 9600 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 9601 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 9602 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 9603 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9604 ins_encode %{ 9605 int vlen_enc = vector_length_encoding(this); 9606 BasicType bt = Matcher::vector_element_basic_type(this); 9607 int opc = this->ideal_Opcode(); 9608 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9609 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9610 %} 9611 ins_pipe( pipe_slow ); 9612 %} 9613 9614 9615 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9616 match(Set dst (RotateLeftV (Binary dst shift) mask)); 9617 match(Set dst (RotateRightV (Binary dst shift) mask)); 9618 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 9619 ins_encode %{ 9620 int vlen_enc = vector_length_encoding(this); 9621 BasicType bt = Matcher::vector_element_basic_type(this); 9622 int opc = this->ideal_Opcode(); 9623 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9624 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9625 %} 9626 ins_pipe( pipe_slow ); 9627 %} 9628 9629 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 9630 match(Set dst (RotateLeftV (Binary dst src2) mask)); 9631 match(Set dst (RotateRightV (Binary dst src2) mask)); 9632 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 9633 ins_encode %{ 9634 int vlen_enc = vector_length_encoding(this); 9635 BasicType bt = Matcher::vector_element_basic_type(this); 9636 int opc = this->ideal_Opcode(); 9637 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9638 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9639 %} 9640 ins_pipe( pipe_slow ); 9641 %} 9642 9643 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9644 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 9645 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 9646 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 9647 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 9648 ins_encode %{ 9649 int vlen_enc = vector_length_encoding(this); 9650 BasicType bt = Matcher::vector_element_basic_type(this); 9651 int opc = this->ideal_Opcode(); 9652 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9653 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9654 %} 9655 ins_pipe( pipe_slow ); 9656 %} 9657 9658 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9659 predicate(!n->as_ShiftV()->is_var_shift()); 9660 match(Set dst (LShiftVS (Binary dst src2) mask)); 9661 match(Set dst (LShiftVI (Binary dst src2) mask)); 9662 match(Set dst (LShiftVL (Binary dst src2) mask)); 9663 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9664 ins_encode %{ 9665 int vlen_enc = vector_length_encoding(this); 9666 BasicType bt = Matcher::vector_element_basic_type(this); 9667 int opc = this->ideal_Opcode(); 9668 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9669 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9670 %} 9671 ins_pipe( pipe_slow ); 9672 %} 9673 9674 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9675 predicate(n->as_ShiftV()->is_var_shift()); 9676 match(Set dst (LShiftVS (Binary dst src2) mask)); 9677 match(Set dst (LShiftVI (Binary dst src2) mask)); 9678 match(Set dst (LShiftVL (Binary dst src2) mask)); 9679 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9680 ins_encode %{ 9681 int vlen_enc = vector_length_encoding(this); 9682 BasicType bt = Matcher::vector_element_basic_type(this); 9683 int opc = this->ideal_Opcode(); 9684 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9685 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9686 %} 9687 ins_pipe( pipe_slow ); 9688 %} 9689 9690 instruct vlshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9691 match(Set dst (LShiftVS (Binary dst (LoadVector src2)) mask)); 9692 match(Set dst (LShiftVI (Binary dst (LoadVector src2)) mask)); 9693 match(Set dst (LShiftVL (Binary dst (LoadVector src2)) mask)); 9694 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9695 ins_encode %{ 9696 int vlen_enc = vector_length_encoding(this); 9697 BasicType bt = Matcher::vector_element_basic_type(this); 9698 int opc = this->ideal_Opcode(); 9699 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9700 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9701 %} 9702 ins_pipe( pipe_slow ); 9703 %} 9704 9705 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9706 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 9707 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 9708 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 9709 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 9710 ins_encode %{ 9711 int vlen_enc = vector_length_encoding(this); 9712 BasicType bt = Matcher::vector_element_basic_type(this); 9713 int opc = this->ideal_Opcode(); 9714 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9715 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9716 %} 9717 ins_pipe( pipe_slow ); 9718 %} 9719 9720 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9721 predicate(!n->as_ShiftV()->is_var_shift()); 9722 match(Set dst (RShiftVS (Binary dst src2) mask)); 9723 match(Set dst (RShiftVI (Binary dst src2) mask)); 9724 match(Set dst (RShiftVL (Binary dst src2) mask)); 9725 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9726 ins_encode %{ 9727 int vlen_enc = vector_length_encoding(this); 9728 BasicType bt = Matcher::vector_element_basic_type(this); 9729 int opc = this->ideal_Opcode(); 9730 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9731 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9732 %} 9733 ins_pipe( pipe_slow ); 9734 %} 9735 9736 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9737 predicate(n->as_ShiftV()->is_var_shift()); 9738 match(Set dst (RShiftVS (Binary dst src2) mask)); 9739 match(Set dst (RShiftVI (Binary dst src2) mask)); 9740 match(Set dst (RShiftVL (Binary dst src2) mask)); 9741 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9742 ins_encode %{ 9743 int vlen_enc = vector_length_encoding(this); 9744 BasicType bt = Matcher::vector_element_basic_type(this); 9745 int opc = this->ideal_Opcode(); 9746 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9747 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9748 %} 9749 ins_pipe( pipe_slow ); 9750 %} 9751 9752 instruct vrshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9753 match(Set dst (RShiftVS (Binary dst (LoadVector src2)) mask)); 9754 match(Set dst (RShiftVI (Binary dst (LoadVector src2)) mask)); 9755 match(Set dst (RShiftVL (Binary dst (LoadVector src2)) mask)); 9756 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9757 ins_encode %{ 9758 int vlen_enc = vector_length_encoding(this); 9759 BasicType bt = Matcher::vector_element_basic_type(this); 9760 int opc = this->ideal_Opcode(); 9761 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9762 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9763 %} 9764 ins_pipe( pipe_slow ); 9765 %} 9766 9767 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9768 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 9769 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 9770 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 9771 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 9772 ins_encode %{ 9773 int vlen_enc = vector_length_encoding(this); 9774 BasicType bt = Matcher::vector_element_basic_type(this); 9775 int opc = this->ideal_Opcode(); 9776 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9777 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9778 %} 9779 ins_pipe( pipe_slow ); 9780 %} 9781 9782 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9783 predicate(!n->as_ShiftV()->is_var_shift()); 9784 match(Set dst (URShiftVS (Binary dst src2) mask)); 9785 match(Set dst (URShiftVI (Binary dst src2) mask)); 9786 match(Set dst (URShiftVL (Binary dst src2) mask)); 9787 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9788 ins_encode %{ 9789 int vlen_enc = vector_length_encoding(this); 9790 BasicType bt = Matcher::vector_element_basic_type(this); 9791 int opc = this->ideal_Opcode(); 9792 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9793 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9794 %} 9795 ins_pipe( pipe_slow ); 9796 %} 9797 9798 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9799 predicate(n->as_ShiftV()->is_var_shift()); 9800 match(Set dst (URShiftVS (Binary dst src2) mask)); 9801 match(Set dst (URShiftVI (Binary dst src2) mask)); 9802 match(Set dst (URShiftVL (Binary dst src2) mask)); 9803 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9804 ins_encode %{ 9805 int vlen_enc = vector_length_encoding(this); 9806 BasicType bt = Matcher::vector_element_basic_type(this); 9807 int opc = this->ideal_Opcode(); 9808 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9809 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9810 %} 9811 ins_pipe( pipe_slow ); 9812 %} 9813 9814 instruct vurshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9815 match(Set dst (URShiftVS (Binary dst (LoadVector src2)) mask)); 9816 match(Set dst (URShiftVI (Binary dst (LoadVector src2)) mask)); 9817 match(Set dst (URShiftVL (Binary dst (LoadVector src2)) mask)); 9818 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9819 ins_encode %{ 9820 int vlen_enc = vector_length_encoding(this); 9821 BasicType bt = Matcher::vector_element_basic_type(this); 9822 int opc = this->ideal_Opcode(); 9823 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9824 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9825 %} 9826 ins_pipe( pipe_slow ); 9827 %} 9828 9829 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 9830 match(Set dst (MaxV (Binary dst src2) mask)); 9831 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 9832 ins_encode %{ 9833 int vlen_enc = vector_length_encoding(this); 9834 BasicType bt = Matcher::vector_element_basic_type(this); 9835 int opc = this->ideal_Opcode(); 9836 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9837 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9838 %} 9839 ins_pipe( pipe_slow ); 9840 %} 9841 9842 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 9843 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 9844 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 9845 ins_encode %{ 9846 int vlen_enc = vector_length_encoding(this); 9847 BasicType bt = Matcher::vector_element_basic_type(this); 9848 int opc = this->ideal_Opcode(); 9849 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9850 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9851 %} 9852 ins_pipe( pipe_slow ); 9853 %} 9854 9855 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 9856 match(Set dst (MinV (Binary dst src2) mask)); 9857 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 9858 ins_encode %{ 9859 int vlen_enc = vector_length_encoding(this); 9860 BasicType bt = Matcher::vector_element_basic_type(this); 9861 int opc = this->ideal_Opcode(); 9862 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9863 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9864 %} 9865 ins_pipe( pipe_slow ); 9866 %} 9867 9868 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 9869 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 9870 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 9871 ins_encode %{ 9872 int vlen_enc = vector_length_encoding(this); 9873 BasicType bt = Matcher::vector_element_basic_type(this); 9874 int opc = this->ideal_Opcode(); 9875 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9876 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9877 %} 9878 ins_pipe( pipe_slow ); 9879 %} 9880 9881 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 9882 match(Set dst (VectorRearrange (Binary dst src2) mask)); 9883 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 9884 ins_encode %{ 9885 int vlen_enc = vector_length_encoding(this); 9886 BasicType bt = Matcher::vector_element_basic_type(this); 9887 int opc = this->ideal_Opcode(); 9888 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9889 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 9890 %} 9891 ins_pipe( pipe_slow ); 9892 %} 9893 9894 instruct vabs_masked(vec dst, kReg mask) %{ 9895 match(Set dst (AbsVB dst mask)); 9896 match(Set dst (AbsVS dst mask)); 9897 match(Set dst (AbsVI dst mask)); 9898 match(Set dst (AbsVL dst mask)); 9899 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 9900 ins_encode %{ 9901 int vlen_enc = vector_length_encoding(this); 9902 BasicType bt = Matcher::vector_element_basic_type(this); 9903 int opc = this->ideal_Opcode(); 9904 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9905 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9906 %} 9907 ins_pipe( pipe_slow ); 9908 %} 9909 9910 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 9911 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 9912 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 9913 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 9914 ins_encode %{ 9915 assert(UseFMA, "Needs FMA instructions support."); 9916 int vlen_enc = vector_length_encoding(this); 9917 BasicType bt = Matcher::vector_element_basic_type(this); 9918 int opc = this->ideal_Opcode(); 9919 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9920 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 9921 %} 9922 ins_pipe( pipe_slow ); 9923 %} 9924 9925 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 9926 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 9927 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 9928 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 9929 ins_encode %{ 9930 assert(UseFMA, "Needs FMA instructions support."); 9931 int vlen_enc = vector_length_encoding(this); 9932 BasicType bt = Matcher::vector_element_basic_type(this); 9933 int opc = this->ideal_Opcode(); 9934 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9935 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 9936 %} 9937 ins_pipe( pipe_slow ); 9938 %} 9939 9940 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 9941 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 9942 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 9943 ins_encode %{ 9944 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 9945 int vlen_enc = vector_length_encoding(this, $src1); 9946 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 9947 9948 // Comparison i 9949 switch (src1_elem_bt) { 9950 case T_BYTE: { 9951 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 9952 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9953 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9954 break; 9955 } 9956 case T_SHORT: { 9957 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 9958 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9959 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9960 break; 9961 } 9962 case T_INT: { 9963 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 9964 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9965 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9966 break; 9967 } 9968 case T_LONG: { 9969 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 9970 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9971 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9972 break; 9973 } 9974 case T_FLOAT: { 9975 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 9976 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 9977 break; 9978 } 9979 case T_DOUBLE: { 9980 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 9981 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 9982 break; 9983 } 9984 default: assert(false, "%s", type2name(src1_elem_bt)); break; 9985 } 9986 %} 9987 ins_pipe( pipe_slow ); 9988 %} 9989 9990 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 9991 predicate(Matcher::vector_length(n) <= 32); 9992 match(Set dst (MaskAll src)); 9993 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 9994 ins_encode %{ 9995 int mask_len = Matcher::vector_length(this); 9996 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 9997 %} 9998 ins_pipe( pipe_slow ); 9999 %} 10000 10001 #ifdef _LP64 10002 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10003 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10004 match(Set dst (XorVMask src (MaskAll cnt))); 10005 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10006 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10007 ins_encode %{ 10008 uint masklen = Matcher::vector_length(this); 10009 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10010 %} 10011 ins_pipe( pipe_slow ); 10012 %} 10013 10014 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10015 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10016 (Matcher::vector_length(n) == 16) || 10017 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10018 match(Set dst (XorVMask src (MaskAll cnt))); 10019 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10020 ins_encode %{ 10021 uint masklen = Matcher::vector_length(this); 10022 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10023 %} 10024 ins_pipe( pipe_slow ); 10025 %} 10026 10027 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10028 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10029 match(Set dst (VectorLongToMask src)); 10030 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10031 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10032 ins_encode %{ 10033 int mask_len = Matcher::vector_length(this); 10034 int vec_enc = vector_length_encoding(mask_len); 10035 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10036 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10037 %} 10038 ins_pipe( pipe_slow ); 10039 %} 10040 10041 10042 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10043 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10044 match(Set dst (VectorLongToMask src)); 10045 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10046 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10047 ins_encode %{ 10048 int mask_len = Matcher::vector_length(this); 10049 assert(mask_len <= 32, "invalid mask length"); 10050 int vec_enc = vector_length_encoding(mask_len); 10051 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10052 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10053 %} 10054 ins_pipe( pipe_slow ); 10055 %} 10056 10057 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10058 predicate(n->bottom_type()->isa_vectmask()); 10059 match(Set dst (VectorLongToMask src)); 10060 format %{ "long_to_mask_evex $dst, $src\t!" %} 10061 ins_encode %{ 10062 __ kmov($dst$$KRegister, $src$$Register); 10063 %} 10064 ins_pipe( pipe_slow ); 10065 %} 10066 #endif 10067 10068 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10069 match(Set dst (AndVMask src1 src2)); 10070 match(Set dst (OrVMask src1 src2)); 10071 match(Set dst (XorVMask src1 src2)); 10072 effect(TEMP kscratch); 10073 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10074 ins_encode %{ 10075 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10076 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10077 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 10078 uint masklen = Matcher::vector_length(this); 10079 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10080 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10081 %} 10082 ins_pipe( pipe_slow ); 10083 %} 10084 10085 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10086 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10087 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10088 ins_encode %{ 10089 int vlen_enc = vector_length_encoding(this); 10090 BasicType bt = Matcher::vector_element_basic_type(this); 10091 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10092 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10093 %} 10094 ins_pipe( pipe_slow ); 10095 %} 10096 10097 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10098 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10099 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10100 ins_encode %{ 10101 int vlen_enc = vector_length_encoding(this); 10102 BasicType bt = Matcher::vector_element_basic_type(this); 10103 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10104 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10105 %} 10106 ins_pipe( pipe_slow ); 10107 %} 10108 10109 instruct castMM(kReg dst) 10110 %{ 10111 match(Set dst (CastVV dst)); 10112 10113 size(0); 10114 format %{ "# castVV of $dst" %} 10115 ins_encode(/* empty encoding */); 10116 ins_cost(0); 10117 ins_pipe(empty); 10118 %} 10119 10120 instruct castVV(vec dst) 10121 %{ 10122 match(Set dst (CastVV dst)); 10123 10124 size(0); 10125 format %{ "# castVV of $dst" %} 10126 ins_encode(/* empty encoding */); 10127 ins_cost(0); 10128 ins_pipe(empty); 10129 %} 10130 10131 instruct castVVLeg(legVec dst) 10132 %{ 10133 match(Set dst (CastVV dst)); 10134 10135 size(0); 10136 format %{ "# castVV of $dst" %} 10137 ins_encode(/* empty encoding */); 10138 ins_cost(0); 10139 ins_pipe(empty); 10140 %} 10141 10142 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10143 %{ 10144 match(Set dst (IsInfiniteF src)); 10145 effect(TEMP ktmp, KILL cr); 10146 format %{ "float_class_check $dst, $src" %} 10147 ins_encode %{ 10148 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10149 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10150 %} 10151 ins_pipe(pipe_slow); 10152 %} 10153 10154 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10155 %{ 10156 match(Set dst (IsInfiniteD src)); 10157 effect(TEMP ktmp, KILL cr); 10158 format %{ "double_class_check $dst, $src" %} 10159 ins_encode %{ 10160 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10161 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10162 %} 10163 ins_pipe(pipe_slow); 10164 %} 10165 10166