1 // 2 // Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 // AVX3 Mask Registers. 632 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 633 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 634 635 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 636 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 637 638 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 639 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 640 641 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 642 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 643 644 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 645 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 646 647 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 648 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 649 650 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 651 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 652 653 654 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 655 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 656 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 657 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 658 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 659 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 660 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 661 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 662 #ifdef _LP64 663 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 664 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 665 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 666 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 667 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 668 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 669 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 670 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 671 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 672 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 673 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 674 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 675 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 676 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 677 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 678 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 679 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 680 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 681 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 682 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 683 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 684 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 685 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 686 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 687 #endif 688 ); 689 690 alloc_class chunk2(K7, K7_H, 691 K6, K6_H, 692 K5, K5_H, 693 K4, K4_H, 694 K3, K3_H, 695 K2, K2_H, 696 K1, K1_H); 697 698 reg_class vectmask_reg(K1, K1_H, 699 K2, K2_H, 700 K3, K3_H, 701 K4, K4_H, 702 K5, K5_H, 703 K6, K6_H, 704 K7, K7_H); 705 706 reg_class vectmask_reg_K1(K1, K1_H); 707 reg_class vectmask_reg_K2(K2, K2_H); 708 reg_class vectmask_reg_K3(K3, K3_H); 709 reg_class vectmask_reg_K4(K4, K4_H); 710 reg_class vectmask_reg_K5(K5, K5_H); 711 reg_class vectmask_reg_K6(K6, K6_H); 712 reg_class vectmask_reg_K7(K7, K7_H); 713 714 // flags allocation class should be last. 715 alloc_class chunk3(RFLAGS); 716 717 718 // Singleton class for condition codes 719 reg_class int_flags(RFLAGS); 720 721 // Class for pre evex float registers 722 reg_class float_reg_legacy(XMM0, 723 XMM1, 724 XMM2, 725 XMM3, 726 XMM4, 727 XMM5, 728 XMM6, 729 XMM7 730 #ifdef _LP64 731 ,XMM8, 732 XMM9, 733 XMM10, 734 XMM11, 735 XMM12, 736 XMM13, 737 XMM14, 738 XMM15 739 #endif 740 ); 741 742 // Class for evex float registers 743 reg_class float_reg_evex(XMM0, 744 XMM1, 745 XMM2, 746 XMM3, 747 XMM4, 748 XMM5, 749 XMM6, 750 XMM7 751 #ifdef _LP64 752 ,XMM8, 753 XMM9, 754 XMM10, 755 XMM11, 756 XMM12, 757 XMM13, 758 XMM14, 759 XMM15, 760 XMM16, 761 XMM17, 762 XMM18, 763 XMM19, 764 XMM20, 765 XMM21, 766 XMM22, 767 XMM23, 768 XMM24, 769 XMM25, 770 XMM26, 771 XMM27, 772 XMM28, 773 XMM29, 774 XMM30, 775 XMM31 776 #endif 777 ); 778 779 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 780 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 781 782 // Class for pre evex double registers 783 reg_class double_reg_legacy(XMM0, XMM0b, 784 XMM1, XMM1b, 785 XMM2, XMM2b, 786 XMM3, XMM3b, 787 XMM4, XMM4b, 788 XMM5, XMM5b, 789 XMM6, XMM6b, 790 XMM7, XMM7b 791 #ifdef _LP64 792 ,XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b 800 #endif 801 ); 802 803 // Class for evex double registers 804 reg_class double_reg_evex(XMM0, XMM0b, 805 XMM1, XMM1b, 806 XMM2, XMM2b, 807 XMM3, XMM3b, 808 XMM4, XMM4b, 809 XMM5, XMM5b, 810 XMM6, XMM6b, 811 XMM7, XMM7b 812 #ifdef _LP64 813 ,XMM8, XMM8b, 814 XMM9, XMM9b, 815 XMM10, XMM10b, 816 XMM11, XMM11b, 817 XMM12, XMM12b, 818 XMM13, XMM13b, 819 XMM14, XMM14b, 820 XMM15, XMM15b, 821 XMM16, XMM16b, 822 XMM17, XMM17b, 823 XMM18, XMM18b, 824 XMM19, XMM19b, 825 XMM20, XMM20b, 826 XMM21, XMM21b, 827 XMM22, XMM22b, 828 XMM23, XMM23b, 829 XMM24, XMM24b, 830 XMM25, XMM25b, 831 XMM26, XMM26b, 832 XMM27, XMM27b, 833 XMM28, XMM28b, 834 XMM29, XMM29b, 835 XMM30, XMM30b, 836 XMM31, XMM31b 837 #endif 838 ); 839 840 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 841 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 842 843 // Class for pre evex 32bit vector registers 844 reg_class vectors_reg_legacy(XMM0, 845 XMM1, 846 XMM2, 847 XMM3, 848 XMM4, 849 XMM5, 850 XMM6, 851 XMM7 852 #ifdef _LP64 853 ,XMM8, 854 XMM9, 855 XMM10, 856 XMM11, 857 XMM12, 858 XMM13, 859 XMM14, 860 XMM15 861 #endif 862 ); 863 864 // Class for evex 32bit vector registers 865 reg_class vectors_reg_evex(XMM0, 866 XMM1, 867 XMM2, 868 XMM3, 869 XMM4, 870 XMM5, 871 XMM6, 872 XMM7 873 #ifdef _LP64 874 ,XMM8, 875 XMM9, 876 XMM10, 877 XMM11, 878 XMM12, 879 XMM13, 880 XMM14, 881 XMM15, 882 XMM16, 883 XMM17, 884 XMM18, 885 XMM19, 886 XMM20, 887 XMM21, 888 XMM22, 889 XMM23, 890 XMM24, 891 XMM25, 892 XMM26, 893 XMM27, 894 XMM28, 895 XMM29, 896 XMM30, 897 XMM31 898 #endif 899 ); 900 901 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 902 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 903 904 // Class for all 64bit vector registers 905 reg_class vectord_reg_legacy(XMM0, XMM0b, 906 XMM1, XMM1b, 907 XMM2, XMM2b, 908 XMM3, XMM3b, 909 XMM4, XMM4b, 910 XMM5, XMM5b, 911 XMM6, XMM6b, 912 XMM7, XMM7b 913 #ifdef _LP64 914 ,XMM8, XMM8b, 915 XMM9, XMM9b, 916 XMM10, XMM10b, 917 XMM11, XMM11b, 918 XMM12, XMM12b, 919 XMM13, XMM13b, 920 XMM14, XMM14b, 921 XMM15, XMM15b 922 #endif 923 ); 924 925 // Class for all 64bit vector registers 926 reg_class vectord_reg_evex(XMM0, XMM0b, 927 XMM1, XMM1b, 928 XMM2, XMM2b, 929 XMM3, XMM3b, 930 XMM4, XMM4b, 931 XMM5, XMM5b, 932 XMM6, XMM6b, 933 XMM7, XMM7b 934 #ifdef _LP64 935 ,XMM8, XMM8b, 936 XMM9, XMM9b, 937 XMM10, XMM10b, 938 XMM11, XMM11b, 939 XMM12, XMM12b, 940 XMM13, XMM13b, 941 XMM14, XMM14b, 942 XMM15, XMM15b, 943 XMM16, XMM16b, 944 XMM17, XMM17b, 945 XMM18, XMM18b, 946 XMM19, XMM19b, 947 XMM20, XMM20b, 948 XMM21, XMM21b, 949 XMM22, XMM22b, 950 XMM23, XMM23b, 951 XMM24, XMM24b, 952 XMM25, XMM25b, 953 XMM26, XMM26b, 954 XMM27, XMM27b, 955 XMM28, XMM28b, 956 XMM29, XMM29b, 957 XMM30, XMM30b, 958 XMM31, XMM31b 959 #endif 960 ); 961 962 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 963 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 964 965 // Class for all 128bit vector registers 966 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 967 XMM1, XMM1b, XMM1c, XMM1d, 968 XMM2, XMM2b, XMM2c, XMM2d, 969 XMM3, XMM3b, XMM3c, XMM3d, 970 XMM4, XMM4b, XMM4c, XMM4d, 971 XMM5, XMM5b, XMM5c, XMM5d, 972 XMM6, XMM6b, XMM6c, XMM6d, 973 XMM7, XMM7b, XMM7c, XMM7d 974 #ifdef _LP64 975 ,XMM8, XMM8b, XMM8c, XMM8d, 976 XMM9, XMM9b, XMM9c, XMM9d, 977 XMM10, XMM10b, XMM10c, XMM10d, 978 XMM11, XMM11b, XMM11c, XMM11d, 979 XMM12, XMM12b, XMM12c, XMM12d, 980 XMM13, XMM13b, XMM13c, XMM13d, 981 XMM14, XMM14b, XMM14c, XMM14d, 982 XMM15, XMM15b, XMM15c, XMM15d 983 #endif 984 ); 985 986 // Class for all 128bit vector registers 987 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 988 XMM1, XMM1b, XMM1c, XMM1d, 989 XMM2, XMM2b, XMM2c, XMM2d, 990 XMM3, XMM3b, XMM3c, XMM3d, 991 XMM4, XMM4b, XMM4c, XMM4d, 992 XMM5, XMM5b, XMM5c, XMM5d, 993 XMM6, XMM6b, XMM6c, XMM6d, 994 XMM7, XMM7b, XMM7c, XMM7d 995 #ifdef _LP64 996 ,XMM8, XMM8b, XMM8c, XMM8d, 997 XMM9, XMM9b, XMM9c, XMM9d, 998 XMM10, XMM10b, XMM10c, XMM10d, 999 XMM11, XMM11b, XMM11c, XMM11d, 1000 XMM12, XMM12b, XMM12c, XMM12d, 1001 XMM13, XMM13b, XMM13c, XMM13d, 1002 XMM14, XMM14b, XMM14c, XMM14d, 1003 XMM15, XMM15b, XMM15c, XMM15d, 1004 XMM16, XMM16b, XMM16c, XMM16d, 1005 XMM17, XMM17b, XMM17c, XMM17d, 1006 XMM18, XMM18b, XMM18c, XMM18d, 1007 XMM19, XMM19b, XMM19c, XMM19d, 1008 XMM20, XMM20b, XMM20c, XMM20d, 1009 XMM21, XMM21b, XMM21c, XMM21d, 1010 XMM22, XMM22b, XMM22c, XMM22d, 1011 XMM23, XMM23b, XMM23c, XMM23d, 1012 XMM24, XMM24b, XMM24c, XMM24d, 1013 XMM25, XMM25b, XMM25c, XMM25d, 1014 XMM26, XMM26b, XMM26c, XMM26d, 1015 XMM27, XMM27b, XMM27c, XMM27d, 1016 XMM28, XMM28b, XMM28c, XMM28d, 1017 XMM29, XMM29b, XMM29c, XMM29d, 1018 XMM30, XMM30b, XMM30c, XMM30d, 1019 XMM31, XMM31b, XMM31c, XMM31d 1020 #endif 1021 ); 1022 1023 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1024 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1025 1026 // Class for all 256bit vector registers 1027 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1028 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1029 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1030 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1031 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1032 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1033 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1034 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1035 #ifdef _LP64 1036 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1037 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1038 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1039 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1040 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1041 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1042 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1043 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1044 #endif 1045 ); 1046 1047 // Class for all 256bit vector registers 1048 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1049 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1050 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1051 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1052 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1053 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1054 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1055 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1056 #ifdef _LP64 1057 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1058 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1059 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1060 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1061 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1062 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1063 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1064 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1065 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1066 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1067 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1068 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1069 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1070 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1071 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1072 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1073 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1074 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1075 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1076 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1077 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1078 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1079 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1080 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1081 #endif 1082 ); 1083 1084 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1085 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1086 1087 // Class for all 512bit vector registers 1088 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1089 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1090 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1091 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1092 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1093 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1094 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1095 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1096 #ifdef _LP64 1097 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1098 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1099 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1100 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1101 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1102 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1103 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1104 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1105 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1106 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1107 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1108 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1109 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1110 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1111 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1112 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1113 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1114 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1115 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1116 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1117 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1118 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1119 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1120 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1121 #endif 1122 ); 1123 1124 // Class for restricted 512bit vector registers 1125 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1126 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1127 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1128 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1129 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1130 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1131 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1132 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1133 #ifdef _LP64 1134 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1135 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1136 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1137 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1138 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1139 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1140 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1141 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1142 #endif 1143 ); 1144 1145 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1146 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1147 1148 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1149 %} 1150 1151 1152 //----------SOURCE BLOCK------------------------------------------------------- 1153 // This is a block of C++ code which provides values, functions, and 1154 // definitions necessary in the rest of the architecture description 1155 1156 source_hpp %{ 1157 // Header information of the source block. 1158 // Method declarations/definitions which are used outside 1159 // the ad-scope can conveniently be defined here. 1160 // 1161 // To keep related declarations/definitions/uses close together, 1162 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1163 1164 #include "runtime/vm_version.hpp" 1165 1166 class NativeJump; 1167 1168 class CallStubImpl { 1169 1170 //-------------------------------------------------------------- 1171 //---< Used for optimization in Compile::shorten_branches >--- 1172 //-------------------------------------------------------------- 1173 1174 public: 1175 // Size of call trampoline stub. 1176 static uint size_call_trampoline() { 1177 return 0; // no call trampolines on this platform 1178 } 1179 1180 // number of relocations needed by a call trampoline stub 1181 static uint reloc_call_trampoline() { 1182 return 0; // no call trampolines on this platform 1183 } 1184 }; 1185 1186 class HandlerImpl { 1187 1188 public: 1189 1190 static int emit_exception_handler(CodeBuffer &cbuf); 1191 static int emit_deopt_handler(CodeBuffer& cbuf); 1192 1193 static uint size_exception_handler() { 1194 // NativeCall instruction size is the same as NativeJump. 1195 // exception handler starts out as jump and can be patched to 1196 // a call be deoptimization. (4932387) 1197 // Note that this value is also credited (in output.cpp) to 1198 // the size of the code section. 1199 return NativeJump::instruction_size; 1200 } 1201 1202 #ifdef _LP64 1203 static uint size_deopt_handler() { 1204 // three 5 byte instructions plus one move for unreachable address. 1205 return 15+3; 1206 } 1207 #else 1208 static uint size_deopt_handler() { 1209 // NativeCall instruction size is the same as NativeJump. 1210 // exception handler starts out as jump and can be patched to 1211 // a call be deoptimization. (4932387) 1212 // Note that this value is also credited (in output.cpp) to 1213 // the size of the code section. 1214 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1215 } 1216 #endif 1217 }; 1218 1219 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1220 switch(bytes) { 1221 case 4: // fall-through 1222 case 8: // fall-through 1223 case 16: return Assembler::AVX_128bit; 1224 case 32: return Assembler::AVX_256bit; 1225 case 64: return Assembler::AVX_512bit; 1226 1227 default: { 1228 ShouldNotReachHere(); 1229 return Assembler::AVX_NoVec; 1230 } 1231 } 1232 } 1233 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1236 } 1237 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1239 uint def_idx = use->operand_index(opnd); 1240 Node* def = use->in(def_idx); 1241 return vector_length_encoding(def); 1242 } 1243 1244 static inline bool is_vector_popcount_predicate(BasicType bt) { 1245 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1246 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1247 } 1248 1249 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1250 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1251 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1252 } 1253 1254 class Node::PD { 1255 public: 1256 enum NodeFlags { 1257 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1258 Flag_sets_carry_flag = Node::_last_flag << 2, 1259 Flag_sets_parity_flag = Node::_last_flag << 3, 1260 Flag_sets_zero_flag = Node::_last_flag << 4, 1261 Flag_sets_overflow_flag = Node::_last_flag << 5, 1262 Flag_sets_sign_flag = Node::_last_flag << 6, 1263 Flag_clears_carry_flag = Node::_last_flag << 7, 1264 Flag_clears_parity_flag = Node::_last_flag << 8, 1265 Flag_clears_zero_flag = Node::_last_flag << 9, 1266 Flag_clears_overflow_flag = Node::_last_flag << 10, 1267 Flag_clears_sign_flag = Node::_last_flag << 11, 1268 _last_flag = Flag_clears_sign_flag 1269 }; 1270 }; 1271 1272 %} // end source_hpp 1273 1274 source %{ 1275 1276 #include "opto/addnode.hpp" 1277 #include "c2_intelJccErratum_x86.hpp" 1278 1279 void PhaseOutput::pd_perform_mach_node_analysis() { 1280 if (VM_Version::has_intel_jcc_erratum()) { 1281 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1282 _buf_sizes._code += extra_padding; 1283 } 1284 } 1285 1286 int MachNode::pd_alignment_required() const { 1287 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1288 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1289 return IntelJccErratum::largest_jcc_size() + 1; 1290 } else { 1291 return 1; 1292 } 1293 } 1294 1295 int MachNode::compute_padding(int current_offset) const { 1296 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1297 Compile* C = Compile::current(); 1298 PhaseOutput* output = C->output(); 1299 Block* block = output->block(); 1300 int index = output->index(); 1301 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1302 } else { 1303 return 0; 1304 } 1305 } 1306 1307 // Emit exception handler code. 1308 // Stuff framesize into a register and call a VM stub routine. 1309 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1310 1311 // Note that the code buffer's insts_mark is always relative to insts. 1312 // That's why we must use the macroassembler to generate a handler. 1313 C2_MacroAssembler _masm(&cbuf); 1314 address base = __ start_a_stub(size_exception_handler()); 1315 if (base == nullptr) { 1316 ciEnv::current()->record_failure("CodeCache is full"); 1317 return 0; // CodeBuffer::expand failed 1318 } 1319 int offset = __ offset(); 1320 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1321 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1322 __ end_a_stub(); 1323 return offset; 1324 } 1325 1326 // Emit deopt handler code. 1327 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1328 1329 // Note that the code buffer's insts_mark is always relative to insts. 1330 // That's why we must use the macroassembler to generate a handler. 1331 C2_MacroAssembler _masm(&cbuf); 1332 address base = __ start_a_stub(size_deopt_handler()); 1333 if (base == nullptr) { 1334 ciEnv::current()->record_failure("CodeCache is full"); 1335 return 0; // CodeBuffer::expand failed 1336 } 1337 int offset = __ offset(); 1338 1339 #ifdef _LP64 1340 address the_pc = (address) __ pc(); 1341 Label next; 1342 // push a "the_pc" on the stack without destroying any registers 1343 // as they all may be live. 1344 1345 // push address of "next" 1346 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1347 __ bind(next); 1348 // adjust it so it matches "the_pc" 1349 __ subptr(Address(rsp, 0), __ offset() - offset); 1350 #else 1351 InternalAddress here(__ pc()); 1352 __ pushptr(here.addr(), noreg); 1353 #endif 1354 1355 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1356 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1357 __ end_a_stub(); 1358 return offset; 1359 } 1360 1361 Assembler::Width widthForType(BasicType bt) { 1362 if (bt == T_BYTE) { 1363 return Assembler::B; 1364 } else if (bt == T_SHORT) { 1365 return Assembler::W; 1366 } else if (bt == T_INT) { 1367 return Assembler::D; 1368 } else { 1369 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1370 return Assembler::Q; 1371 } 1372 } 1373 1374 //============================================================================= 1375 1376 // Float masks come from different places depending on platform. 1377 #ifdef _LP64 1378 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1379 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1380 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1381 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1382 #else 1383 static address float_signmask() { return (address)float_signmask_pool; } 1384 static address float_signflip() { return (address)float_signflip_pool; } 1385 static address double_signmask() { return (address)double_signmask_pool; } 1386 static address double_signflip() { return (address)double_signflip_pool; } 1387 #endif 1388 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1389 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1390 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1391 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1392 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1393 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1394 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1395 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1396 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1397 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1398 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1399 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1400 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1401 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1402 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1403 1404 //============================================================================= 1405 bool Matcher::match_rule_supported(int opcode) { 1406 if (!has_match_rule(opcode)) { 1407 return false; // no match rule present 1408 } 1409 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1410 switch (opcode) { 1411 case Op_AbsVL: 1412 case Op_StoreVectorScatter: 1413 if (UseAVX < 3) { 1414 return false; 1415 } 1416 break; 1417 case Op_PopCountI: 1418 case Op_PopCountL: 1419 if (!UsePopCountInstruction) { 1420 return false; 1421 } 1422 break; 1423 case Op_PopCountVI: 1424 if (UseAVX < 2) { 1425 return false; 1426 } 1427 break; 1428 case Op_PopCountVL: 1429 if (UseAVX < 2) { 1430 return false; 1431 } 1432 break; 1433 case Op_MulVI: 1434 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1435 return false; 1436 } 1437 break; 1438 case Op_MulVL: 1439 if (UseSSE < 4) { // only with SSE4_1 or AVX 1440 return false; 1441 } 1442 break; 1443 case Op_MulReductionVL: 1444 if (VM_Version::supports_avx512dq() == false) { 1445 return false; 1446 } 1447 break; 1448 case Op_AddReductionVL: 1449 if (UseSSE < 2) { // requires at least SSE2 1450 return false; 1451 } 1452 break; 1453 case Op_AbsVB: 1454 case Op_AbsVS: 1455 case Op_AbsVI: 1456 case Op_AddReductionVI: 1457 case Op_AndReductionV: 1458 case Op_OrReductionV: 1459 case Op_XorReductionV: 1460 if (UseSSE < 3) { // requires at least SSSE3 1461 return false; 1462 } 1463 break; 1464 case Op_VectorLoadShuffle: 1465 case Op_VectorRearrange: 1466 case Op_MulReductionVI: 1467 if (UseSSE < 4) { // requires at least SSE4 1468 return false; 1469 } 1470 break; 1471 case Op_IsInfiniteF: 1472 case Op_IsInfiniteD: 1473 if (!VM_Version::supports_avx512dq()) { 1474 return false; 1475 } 1476 break; 1477 case Op_SqrtVD: 1478 case Op_SqrtVF: 1479 case Op_VectorMaskCmp: 1480 case Op_VectorCastB2X: 1481 case Op_VectorCastS2X: 1482 case Op_VectorCastI2X: 1483 case Op_VectorCastL2X: 1484 case Op_VectorCastF2X: 1485 case Op_VectorCastD2X: 1486 case Op_VectorUCastB2X: 1487 case Op_VectorUCastS2X: 1488 case Op_VectorUCastI2X: 1489 case Op_VectorMaskCast: 1490 if (UseAVX < 1) { // enabled for AVX only 1491 return false; 1492 } 1493 break; 1494 case Op_PopulateIndex: 1495 if (!is_LP64 || (UseAVX < 2)) { 1496 return false; 1497 } 1498 break; 1499 case Op_RoundVF: 1500 if (UseAVX < 2) { // enabled for AVX2 only 1501 return false; 1502 } 1503 break; 1504 case Op_RoundVD: 1505 if (UseAVX < 3) { 1506 return false; // enabled for AVX3 only 1507 } 1508 break; 1509 case Op_CompareAndSwapL: 1510 #ifdef _LP64 1511 case Op_CompareAndSwapP: 1512 #endif 1513 if (!VM_Version::supports_cx8()) { 1514 return false; 1515 } 1516 break; 1517 case Op_StrIndexOf: 1518 if (!UseSSE42Intrinsics) { 1519 return false; 1520 } 1521 break; 1522 case Op_StrIndexOfChar: 1523 if (!UseSSE42Intrinsics) { 1524 return false; 1525 } 1526 break; 1527 case Op_OnSpinWait: 1528 if (VM_Version::supports_on_spin_wait() == false) { 1529 return false; 1530 } 1531 break; 1532 case Op_MulVB: 1533 case Op_LShiftVB: 1534 case Op_RShiftVB: 1535 case Op_URShiftVB: 1536 case Op_VectorInsert: 1537 case Op_VectorLoadMask: 1538 case Op_VectorStoreMask: 1539 case Op_VectorBlend: 1540 if (UseSSE < 4) { 1541 return false; 1542 } 1543 break; 1544 #ifdef _LP64 1545 case Op_MaxD: 1546 case Op_MaxF: 1547 case Op_MinD: 1548 case Op_MinF: 1549 if (UseAVX < 1) { // enabled for AVX only 1550 return false; 1551 } 1552 break; 1553 #endif 1554 case Op_CacheWB: 1555 case Op_CacheWBPreSync: 1556 case Op_CacheWBPostSync: 1557 if (!VM_Version::supports_data_cache_line_flush()) { 1558 return false; 1559 } 1560 break; 1561 case Op_ExtractB: 1562 case Op_ExtractL: 1563 case Op_ExtractI: 1564 case Op_RoundDoubleMode: 1565 if (UseSSE < 4) { 1566 return false; 1567 } 1568 break; 1569 case Op_RoundDoubleModeV: 1570 if (VM_Version::supports_avx() == false) { 1571 return false; // 128bit vroundpd is not available 1572 } 1573 break; 1574 case Op_LoadVectorGather: 1575 if (UseAVX < 2) { 1576 return false; 1577 } 1578 break; 1579 case Op_FmaF: 1580 case Op_FmaD: 1581 case Op_FmaVD: 1582 case Op_FmaVF: 1583 if (!UseFMA) { 1584 return false; 1585 } 1586 break; 1587 case Op_MacroLogicV: 1588 if (UseAVX < 3 || !UseVectorMacroLogic) { 1589 return false; 1590 } 1591 break; 1592 1593 case Op_VectorCmpMasked: 1594 case Op_VectorMaskGen: 1595 if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { 1596 return false; 1597 } 1598 break; 1599 case Op_VectorMaskFirstTrue: 1600 case Op_VectorMaskLastTrue: 1601 case Op_VectorMaskTrueCount: 1602 case Op_VectorMaskToLong: 1603 if (!is_LP64 || UseAVX < 1) { 1604 return false; 1605 } 1606 break; 1607 case Op_RoundF: 1608 case Op_RoundD: 1609 if (!is_LP64) { 1610 return false; 1611 } 1612 break; 1613 case Op_CopySignD: 1614 case Op_CopySignF: 1615 if (UseAVX < 3 || !is_LP64) { 1616 return false; 1617 } 1618 if (!VM_Version::supports_avx512vl()) { 1619 return false; 1620 } 1621 break; 1622 #ifndef _LP64 1623 case Op_AddReductionVF: 1624 case Op_AddReductionVD: 1625 case Op_MulReductionVF: 1626 case Op_MulReductionVD: 1627 if (UseSSE < 1) { // requires at least SSE 1628 return false; 1629 } 1630 break; 1631 case Op_MulAddVS2VI: 1632 case Op_RShiftVL: 1633 case Op_AbsVD: 1634 case Op_NegVD: 1635 if (UseSSE < 2) { 1636 return false; 1637 } 1638 break; 1639 #endif // !LP64 1640 case Op_CompressBits: 1641 if (!VM_Version::supports_bmi2() || (!is_LP64 && UseSSE < 2)) { 1642 return false; 1643 } 1644 break; 1645 case Op_ExpandBits: 1646 if (!VM_Version::supports_bmi2() || (!is_LP64 && (UseSSE < 2 || !VM_Version::supports_bmi1()))) { 1647 return false; 1648 } 1649 break; 1650 case Op_SignumF: 1651 if (UseSSE < 1) { 1652 return false; 1653 } 1654 break; 1655 case Op_SignumD: 1656 if (UseSSE < 2) { 1657 return false; 1658 } 1659 break; 1660 case Op_CompressM: 1661 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1662 return false; 1663 } 1664 break; 1665 case Op_CompressV: 1666 case Op_ExpandV: 1667 if (!VM_Version::supports_avx512vl()) { 1668 return false; 1669 } 1670 break; 1671 case Op_SqrtF: 1672 if (UseSSE < 1) { 1673 return false; 1674 } 1675 break; 1676 case Op_SqrtD: 1677 #ifdef _LP64 1678 if (UseSSE < 2) { 1679 return false; 1680 } 1681 #else 1682 // x86_32.ad has a special match rule for SqrtD. 1683 // Together with common x86 rules, this handles all UseSSE cases. 1684 #endif 1685 break; 1686 case Op_ConvF2HF: 1687 case Op_ConvHF2F: 1688 if (!VM_Version::supports_float16()) { 1689 return false; 1690 } 1691 break; 1692 case Op_VectorCastF2HF: 1693 case Op_VectorCastHF2F: 1694 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1695 return false; 1696 } 1697 break; 1698 } 1699 return true; // Match rules are supported by default. 1700 } 1701 1702 //------------------------------------------------------------------------ 1703 1704 static inline bool is_pop_count_instr_target(BasicType bt) { 1705 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1706 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1707 } 1708 1709 bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) { 1710 return match_rule_supported_vector(opcode, vlen, bt); 1711 } 1712 1713 // Identify extra cases that we might want to provide match rules for vector nodes and 1714 // other intrinsics guarded with vector length (vlen) and element type (bt). 1715 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1716 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1717 if (!match_rule_supported(opcode)) { 1718 return false; 1719 } 1720 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1721 // * SSE2 supports 128bit vectors for all types; 1722 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1723 // * AVX2 supports 256bit vectors for all types; 1724 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1725 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1726 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1727 // And MaxVectorSize is taken into account as well. 1728 if (!vector_size_supported(bt, vlen)) { 1729 return false; 1730 } 1731 // Special cases which require vector length follow: 1732 // * implementation limitations 1733 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1734 // * 128bit vroundpd instruction is present only in AVX1 1735 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1736 switch (opcode) { 1737 case Op_AbsVF: 1738 case Op_NegVF: 1739 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1740 return false; // 512bit vandps and vxorps are not available 1741 } 1742 break; 1743 case Op_AbsVD: 1744 case Op_NegVD: 1745 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1746 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1747 } 1748 break; 1749 case Op_RotateRightV: 1750 case Op_RotateLeftV: 1751 if (bt != T_INT && bt != T_LONG) { 1752 return false; 1753 } // fallthrough 1754 case Op_MacroLogicV: 1755 if (!VM_Version::supports_evex() || 1756 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1757 return false; 1758 } 1759 break; 1760 case Op_ClearArray: 1761 case Op_VectorMaskGen: 1762 case Op_VectorCmpMasked: 1763 if (!is_LP64 || !VM_Version::supports_avx512bw()) { 1764 return false; 1765 } 1766 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1767 return false; 1768 } 1769 break; 1770 case Op_LoadVectorMasked: 1771 case Op_StoreVectorMasked: 1772 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1773 return false; 1774 } 1775 break; 1776 case Op_MaxV: 1777 case Op_MinV: 1778 if (UseSSE < 4 && is_integral_type(bt)) { 1779 return false; 1780 } 1781 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1782 // Float/Double intrinsics are enabled for AVX family currently. 1783 if (UseAVX == 0) { 1784 return false; 1785 } 1786 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1787 return false; 1788 } 1789 } 1790 break; 1791 case Op_CallLeafVector: 1792 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1793 return false; 1794 } 1795 break; 1796 case Op_AddReductionVI: 1797 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1798 return false; 1799 } 1800 // fallthrough 1801 case Op_AndReductionV: 1802 case Op_OrReductionV: 1803 case Op_XorReductionV: 1804 if (is_subword_type(bt) && (UseSSE < 4)) { 1805 return false; 1806 } 1807 #ifndef _LP64 1808 if (bt == T_BYTE || bt == T_LONG) { 1809 return false; 1810 } 1811 #endif 1812 break; 1813 #ifndef _LP64 1814 case Op_VectorInsert: 1815 if (bt == T_LONG || bt == T_DOUBLE) { 1816 return false; 1817 } 1818 break; 1819 #endif 1820 case Op_MinReductionV: 1821 case Op_MaxReductionV: 1822 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1823 return false; 1824 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1825 return false; 1826 } 1827 // Float/Double intrinsics enabled for AVX family. 1828 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1829 return false; 1830 } 1831 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1832 return false; 1833 } 1834 #ifndef _LP64 1835 if (bt == T_BYTE || bt == T_LONG) { 1836 return false; 1837 } 1838 #endif 1839 break; 1840 case Op_VectorTest: 1841 if (UseSSE < 4) { 1842 return false; // Implementation limitation 1843 } else if (size_in_bits < 32) { 1844 return false; // Implementation limitation 1845 } 1846 break; 1847 case Op_VectorLoadShuffle: 1848 case Op_VectorRearrange: 1849 if(vlen == 2) { 1850 return false; // Implementation limitation due to how shuffle is loaded 1851 } else if (size_in_bits == 256 && UseAVX < 2) { 1852 return false; // Implementation limitation 1853 } 1854 break; 1855 case Op_VectorLoadMask: 1856 case Op_VectorMaskCast: 1857 if (size_in_bits == 256 && UseAVX < 2) { 1858 return false; // Implementation limitation 1859 } 1860 // fallthrough 1861 case Op_VectorStoreMask: 1862 if (vlen == 2) { 1863 return false; // Implementation limitation 1864 } 1865 break; 1866 case Op_PopulateIndex: 1867 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1868 return false; 1869 } 1870 break; 1871 case Op_VectorCastB2X: 1872 case Op_VectorCastS2X: 1873 case Op_VectorCastI2X: 1874 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1875 return false; 1876 } 1877 break; 1878 case Op_VectorCastL2X: 1879 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1880 return false; 1881 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1882 return false; 1883 } 1884 break; 1885 case Op_VectorCastF2X: { 1886 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1887 // happen after intermediate conversion to integer and special handling 1888 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1889 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1890 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1891 return false; 1892 } 1893 } 1894 // fallthrough 1895 case Op_VectorCastD2X: 1896 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1897 return false; 1898 } 1899 break; 1900 case Op_VectorCastF2HF: 1901 case Op_VectorCastHF2F: 1902 if (!VM_Version::supports_f16c() && 1903 ((!VM_Version::supports_evex() || 1904 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1905 return false; 1906 } 1907 break; 1908 case Op_RoundVD: 1909 if (!VM_Version::supports_avx512dq()) { 1910 return false; 1911 } 1912 break; 1913 case Op_MulReductionVI: 1914 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1915 return false; 1916 } 1917 break; 1918 case Op_LoadVectorGatherMasked: 1919 case Op_StoreVectorScatterMasked: 1920 case Op_StoreVectorScatter: 1921 if (is_subword_type(bt)) { 1922 return false; 1923 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1924 return false; 1925 } 1926 // fallthrough 1927 case Op_LoadVectorGather: 1928 if (size_in_bits == 64 ) { 1929 return false; 1930 } 1931 break; 1932 case Op_MaskAll: 1933 if (!VM_Version::supports_evex()) { 1934 return false; 1935 } 1936 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1937 return false; 1938 } 1939 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1940 return false; 1941 } 1942 break; 1943 case Op_VectorMaskCmp: 1944 if (vlen < 2 || size_in_bits < 32) { 1945 return false; 1946 } 1947 break; 1948 case Op_CompressM: 1949 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1950 return false; 1951 } 1952 break; 1953 case Op_CompressV: 1954 case Op_ExpandV: 1955 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1956 return false; 1957 } 1958 if (size_in_bits < 128 ) { 1959 return false; 1960 } 1961 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1962 return false; 1963 } 1964 break; 1965 case Op_VectorLongToMask: 1966 if (UseAVX < 1 || !is_LP64) { 1967 return false; 1968 } 1969 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1970 return false; 1971 } 1972 break; 1973 case Op_SignumVD: 1974 case Op_SignumVF: 1975 if (UseAVX < 1) { 1976 return false; 1977 } 1978 break; 1979 case Op_PopCountVI: 1980 case Op_PopCountVL: { 1981 if (!is_pop_count_instr_target(bt) && 1982 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 1983 return false; 1984 } 1985 } 1986 break; 1987 case Op_ReverseV: 1988 case Op_ReverseBytesV: 1989 if (UseAVX < 2) { 1990 return false; 1991 } 1992 break; 1993 case Op_CountTrailingZerosV: 1994 case Op_CountLeadingZerosV: 1995 if (UseAVX < 2) { 1996 return false; 1997 } 1998 break; 1999 } 2000 return true; // Per default match rules are supported. 2001 } 2002 2003 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 2004 // ADLC based match_rule_supported routine checks for the existence of pattern based 2005 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 2006 // of their non-masked counterpart with mask edge being the differentiator. 2007 // This routine does a strict check on the existence of masked operation patterns 2008 // by returning a default false value for all the other opcodes apart from the 2009 // ones whose masked instruction patterns are defined in this file. 2010 if (!match_rule_supported_vector(opcode, vlen, bt)) { 2011 return false; 2012 } 2013 2014 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 2015 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 2016 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 2017 return false; 2018 } 2019 switch(opcode) { 2020 // Unary masked operations 2021 case Op_AbsVB: 2022 case Op_AbsVS: 2023 if(!VM_Version::supports_avx512bw()) { 2024 return false; // Implementation limitation 2025 } 2026 case Op_AbsVI: 2027 case Op_AbsVL: 2028 return true; 2029 2030 // Ternary masked operations 2031 case Op_FmaVF: 2032 case Op_FmaVD: 2033 return true; 2034 2035 case Op_MacroLogicV: 2036 if(bt != T_INT && bt != T_LONG) { 2037 return false; 2038 } 2039 return true; 2040 2041 // Binary masked operations 2042 case Op_AddVB: 2043 case Op_AddVS: 2044 case Op_SubVB: 2045 case Op_SubVS: 2046 case Op_MulVS: 2047 case Op_LShiftVS: 2048 case Op_RShiftVS: 2049 case Op_URShiftVS: 2050 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2051 if (!VM_Version::supports_avx512bw()) { 2052 return false; // Implementation limitation 2053 } 2054 return true; 2055 2056 case Op_MulVL: 2057 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2058 if (!VM_Version::supports_avx512dq()) { 2059 return false; // Implementation limitation 2060 } 2061 return true; 2062 2063 case Op_AndV: 2064 case Op_OrV: 2065 case Op_XorV: 2066 case Op_RotateRightV: 2067 case Op_RotateLeftV: 2068 if (bt != T_INT && bt != T_LONG) { 2069 return false; // Implementation limitation 2070 } 2071 return true; 2072 2073 case Op_VectorLoadMask: 2074 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2075 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2076 return false; 2077 } 2078 return true; 2079 2080 case Op_AddVI: 2081 case Op_AddVL: 2082 case Op_AddVF: 2083 case Op_AddVD: 2084 case Op_SubVI: 2085 case Op_SubVL: 2086 case Op_SubVF: 2087 case Op_SubVD: 2088 case Op_MulVI: 2089 case Op_MulVF: 2090 case Op_MulVD: 2091 case Op_DivVF: 2092 case Op_DivVD: 2093 case Op_SqrtVF: 2094 case Op_SqrtVD: 2095 case Op_LShiftVI: 2096 case Op_LShiftVL: 2097 case Op_RShiftVI: 2098 case Op_RShiftVL: 2099 case Op_URShiftVI: 2100 case Op_URShiftVL: 2101 case Op_LoadVectorMasked: 2102 case Op_StoreVectorMasked: 2103 case Op_LoadVectorGatherMasked: 2104 case Op_StoreVectorScatterMasked: 2105 return true; 2106 2107 case Op_MaxV: 2108 case Op_MinV: 2109 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2110 return false; // Implementation limitation 2111 } 2112 if (is_floating_point_type(bt)) { 2113 return false; // Implementation limitation 2114 } 2115 return true; 2116 2117 case Op_VectorMaskCmp: 2118 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2119 return false; // Implementation limitation 2120 } 2121 return true; 2122 2123 case Op_VectorRearrange: 2124 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2125 return false; // Implementation limitation 2126 } 2127 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2128 return false; // Implementation limitation 2129 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2130 return false; // Implementation limitation 2131 } 2132 return true; 2133 2134 // Binary Logical operations 2135 case Op_AndVMask: 2136 case Op_OrVMask: 2137 case Op_XorVMask: 2138 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2139 return false; // Implementation limitation 2140 } 2141 return true; 2142 2143 case Op_PopCountVI: 2144 case Op_PopCountVL: 2145 if (!is_pop_count_instr_target(bt)) { 2146 return false; 2147 } 2148 return true; 2149 2150 case Op_MaskAll: 2151 return true; 2152 2153 case Op_CountLeadingZerosV: 2154 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2155 return true; 2156 } 2157 default: 2158 return false; 2159 } 2160 } 2161 2162 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2163 return false; 2164 } 2165 2166 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2167 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2168 bool legacy = (generic_opnd->opcode() == LEGVEC); 2169 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2170 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2171 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2172 return new legVecZOper(); 2173 } 2174 if (legacy) { 2175 switch (ideal_reg) { 2176 case Op_VecS: return new legVecSOper(); 2177 case Op_VecD: return new legVecDOper(); 2178 case Op_VecX: return new legVecXOper(); 2179 case Op_VecY: return new legVecYOper(); 2180 case Op_VecZ: return new legVecZOper(); 2181 } 2182 } else { 2183 switch (ideal_reg) { 2184 case Op_VecS: return new vecSOper(); 2185 case Op_VecD: return new vecDOper(); 2186 case Op_VecX: return new vecXOper(); 2187 case Op_VecY: return new vecYOper(); 2188 case Op_VecZ: return new vecZOper(); 2189 } 2190 } 2191 ShouldNotReachHere(); 2192 return nullptr; 2193 } 2194 2195 bool Matcher::is_reg2reg_move(MachNode* m) { 2196 switch (m->rule()) { 2197 case MoveVec2Leg_rule: 2198 case MoveLeg2Vec_rule: 2199 case MoveF2VL_rule: 2200 case MoveF2LEG_rule: 2201 case MoveVL2F_rule: 2202 case MoveLEG2F_rule: 2203 case MoveD2VL_rule: 2204 case MoveD2LEG_rule: 2205 case MoveVL2D_rule: 2206 case MoveLEG2D_rule: 2207 return true; 2208 default: 2209 return false; 2210 } 2211 } 2212 2213 bool Matcher::is_generic_vector(MachOper* opnd) { 2214 switch (opnd->opcode()) { 2215 case VEC: 2216 case LEGVEC: 2217 return true; 2218 default: 2219 return false; 2220 } 2221 } 2222 2223 //------------------------------------------------------------------------ 2224 2225 const RegMask* Matcher::predicate_reg_mask(void) { 2226 return &_VECTMASK_REG_mask; 2227 } 2228 2229 const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) { 2230 return new TypeVectMask(elemTy, length); 2231 } 2232 2233 // Max vector size in bytes. 0 if not supported. 2234 int Matcher::vector_width_in_bytes(BasicType bt) { 2235 assert(is_java_primitive(bt), "only primitive type vectors"); 2236 if (UseSSE < 2) return 0; 2237 // SSE2 supports 128bit vectors for all types. 2238 // AVX2 supports 256bit vectors for all types. 2239 // AVX2/EVEX supports 512bit vectors for all types. 2240 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2241 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2242 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2243 size = (UseAVX > 2) ? 64 : 32; 2244 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2245 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2246 // Use flag to limit vector size. 2247 size = MIN2(size,(int)MaxVectorSize); 2248 // Minimum 2 values in vector (or 4 for bytes). 2249 switch (bt) { 2250 case T_DOUBLE: 2251 case T_LONG: 2252 if (size < 16) return 0; 2253 break; 2254 case T_FLOAT: 2255 case T_INT: 2256 if (size < 8) return 0; 2257 break; 2258 case T_BOOLEAN: 2259 if (size < 4) return 0; 2260 break; 2261 case T_CHAR: 2262 if (size < 4) return 0; 2263 break; 2264 case T_BYTE: 2265 if (size < 4) return 0; 2266 break; 2267 case T_SHORT: 2268 if (size < 4) return 0; 2269 break; 2270 default: 2271 ShouldNotReachHere(); 2272 } 2273 return size; 2274 } 2275 2276 // Limits on vector size (number of elements) loaded into vector. 2277 int Matcher::max_vector_size(const BasicType bt) { 2278 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2279 } 2280 int Matcher::min_vector_size(const BasicType bt) { 2281 int max_size = max_vector_size(bt); 2282 // Min size which can be loaded into vector is 4 bytes. 2283 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2284 // Support for calling svml double64 vectors 2285 if (bt == T_DOUBLE) { 2286 size = 1; 2287 } 2288 return MIN2(size,max_size); 2289 } 2290 2291 int Matcher::superword_max_vector_size(const BasicType bt) { 2292 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2293 // by default on Cascade Lake 2294 if (VM_Version::is_default_intel_cascade_lake()) { 2295 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2296 } 2297 return Matcher::max_vector_size(bt); 2298 } 2299 2300 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2301 return -1; 2302 } 2303 2304 // Vector ideal reg corresponding to specified size in bytes 2305 uint Matcher::vector_ideal_reg(int size) { 2306 assert(MaxVectorSize >= size, ""); 2307 switch(size) { 2308 case 4: return Op_VecS; 2309 case 8: return Op_VecD; 2310 case 16: return Op_VecX; 2311 case 32: return Op_VecY; 2312 case 64: return Op_VecZ; 2313 } 2314 ShouldNotReachHere(); 2315 return 0; 2316 } 2317 2318 // Check for shift by small constant as well 2319 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2320 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2321 shift->in(2)->get_int() <= 3 && 2322 // Are there other uses besides address expressions? 2323 !matcher->is_visited(shift)) { 2324 address_visited.set(shift->_idx); // Flag as address_visited 2325 mstack.push(shift->in(2), Matcher::Visit); 2326 Node *conv = shift->in(1); 2327 #ifdef _LP64 2328 // Allow Matcher to match the rule which bypass 2329 // ConvI2L operation for an array index on LP64 2330 // if the index value is positive. 2331 if (conv->Opcode() == Op_ConvI2L && 2332 conv->as_Type()->type()->is_long()->_lo >= 0 && 2333 // Are there other uses besides address expressions? 2334 !matcher->is_visited(conv)) { 2335 address_visited.set(conv->_idx); // Flag as address_visited 2336 mstack.push(conv->in(1), Matcher::Pre_Visit); 2337 } else 2338 #endif 2339 mstack.push(conv, Matcher::Pre_Visit); 2340 return true; 2341 } 2342 return false; 2343 } 2344 2345 // This function identifies sub-graphs in which a 'load' node is 2346 // input to two different nodes, and such that it can be matched 2347 // with BMI instructions like blsi, blsr, etc. 2348 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2349 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2350 // refers to the same node. 2351 // 2352 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2353 // This is a temporary solution until we make DAGs expressible in ADL. 2354 template<typename ConType> 2355 class FusedPatternMatcher { 2356 Node* _op1_node; 2357 Node* _mop_node; 2358 int _con_op; 2359 2360 static int match_next(Node* n, int next_op, int next_op_idx) { 2361 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2362 return -1; 2363 } 2364 2365 if (next_op_idx == -1) { // n is commutative, try rotations 2366 if (n->in(1)->Opcode() == next_op) { 2367 return 1; 2368 } else if (n->in(2)->Opcode() == next_op) { 2369 return 2; 2370 } 2371 } else { 2372 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2373 if (n->in(next_op_idx)->Opcode() == next_op) { 2374 return next_op_idx; 2375 } 2376 } 2377 return -1; 2378 } 2379 2380 public: 2381 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2382 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2383 2384 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2385 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2386 typename ConType::NativeType con_value) { 2387 if (_op1_node->Opcode() != op1) { 2388 return false; 2389 } 2390 if (_mop_node->outcnt() > 2) { 2391 return false; 2392 } 2393 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2394 if (op1_op2_idx == -1) { 2395 return false; 2396 } 2397 // Memory operation must be the other edge 2398 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2399 2400 // Check that the mop node is really what we want 2401 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2402 Node* op2_node = _op1_node->in(op1_op2_idx); 2403 if (op2_node->outcnt() > 1) { 2404 return false; 2405 } 2406 assert(op2_node->Opcode() == op2, "Should be"); 2407 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2408 if (op2_con_idx == -1) { 2409 return false; 2410 } 2411 // Memory operation must be the other edge 2412 int op2_mop_idx = (op2_con_idx & 1) + 1; 2413 // Check that the memory operation is the same node 2414 if (op2_node->in(op2_mop_idx) == _mop_node) { 2415 // Now check the constant 2416 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2417 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2418 return true; 2419 } 2420 } 2421 } 2422 return false; 2423 } 2424 }; 2425 2426 static bool is_bmi_pattern(Node* n, Node* m) { 2427 assert(UseBMI1Instructions, "sanity"); 2428 if (n != nullptr && m != nullptr) { 2429 if (m->Opcode() == Op_LoadI) { 2430 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2431 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2432 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2433 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2434 } else if (m->Opcode() == Op_LoadL) { 2435 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2436 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2437 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2438 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2439 } 2440 } 2441 return false; 2442 } 2443 2444 // Should the matcher clone input 'm' of node 'n'? 2445 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2446 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2447 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2448 mstack.push(m, Visit); 2449 return true; 2450 } 2451 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2452 mstack.push(m, Visit); // m = ShiftCntV 2453 return true; 2454 } 2455 return false; 2456 } 2457 2458 // Should the Matcher clone shifts on addressing modes, expecting them 2459 // to be subsumed into complex addressing expressions or compute them 2460 // into registers? 2461 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2462 Node *off = m->in(AddPNode::Offset); 2463 if (off->is_Con()) { 2464 address_visited.test_set(m->_idx); // Flag as address_visited 2465 Node *adr = m->in(AddPNode::Address); 2466 2467 // Intel can handle 2 adds in addressing mode 2468 // AtomicAdd is not an addressing expression. 2469 // Cheap to find it by looking for screwy base. 2470 if (adr->is_AddP() && 2471 !adr->in(AddPNode::Base)->is_top() && 2472 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2473 // Are there other uses besides address expressions? 2474 !is_visited(adr)) { 2475 address_visited.set(adr->_idx); // Flag as address_visited 2476 Node *shift = adr->in(AddPNode::Offset); 2477 if (!clone_shift(shift, this, mstack, address_visited)) { 2478 mstack.push(shift, Pre_Visit); 2479 } 2480 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2481 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2482 } else { 2483 mstack.push(adr, Pre_Visit); 2484 } 2485 2486 // Clone X+offset as it also folds into most addressing expressions 2487 mstack.push(off, Visit); 2488 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2489 return true; 2490 } else if (clone_shift(off, this, mstack, address_visited)) { 2491 address_visited.test_set(m->_idx); // Flag as address_visited 2492 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2493 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2494 return true; 2495 } 2496 return false; 2497 } 2498 2499 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2500 switch (bt) { 2501 case BoolTest::eq: 2502 return Assembler::eq; 2503 case BoolTest::ne: 2504 return Assembler::neq; 2505 case BoolTest::le: 2506 case BoolTest::ule: 2507 return Assembler::le; 2508 case BoolTest::ge: 2509 case BoolTest::uge: 2510 return Assembler::nlt; 2511 case BoolTest::lt: 2512 case BoolTest::ult: 2513 return Assembler::lt; 2514 case BoolTest::gt: 2515 case BoolTest::ugt: 2516 return Assembler::nle; 2517 default : ShouldNotReachHere(); return Assembler::_false; 2518 } 2519 } 2520 2521 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2522 switch (bt) { 2523 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2524 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2525 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2526 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2527 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2528 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2529 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2530 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2531 } 2532 } 2533 2534 // Helper methods for MachSpillCopyNode::implementation(). 2535 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 2536 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2537 assert(ireg == Op_VecS || // 32bit vector 2538 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2539 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 2540 "no non-adjacent vector moves" ); 2541 if (cbuf) { 2542 C2_MacroAssembler _masm(cbuf); 2543 switch (ireg) { 2544 case Op_VecS: // copy whole register 2545 case Op_VecD: 2546 case Op_VecX: 2547 #ifndef _LP64 2548 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2549 #else 2550 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2551 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2552 } else { 2553 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2554 } 2555 #endif 2556 break; 2557 case Op_VecY: 2558 #ifndef _LP64 2559 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2560 #else 2561 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2562 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2563 } else { 2564 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2565 } 2566 #endif 2567 break; 2568 case Op_VecZ: 2569 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2570 break; 2571 default: 2572 ShouldNotReachHere(); 2573 } 2574 #ifndef PRODUCT 2575 } else { 2576 switch (ireg) { 2577 case Op_VecS: 2578 case Op_VecD: 2579 case Op_VecX: 2580 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2581 break; 2582 case Op_VecY: 2583 case Op_VecZ: 2584 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2585 break; 2586 default: 2587 ShouldNotReachHere(); 2588 } 2589 #endif 2590 } 2591 } 2592 2593 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 2594 int stack_offset, int reg, uint ireg, outputStream* st) { 2595 if (cbuf) { 2596 C2_MacroAssembler _masm(cbuf); 2597 if (is_load) { 2598 switch (ireg) { 2599 case Op_VecS: 2600 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2601 break; 2602 case Op_VecD: 2603 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2604 break; 2605 case Op_VecX: 2606 #ifndef _LP64 2607 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2608 #else 2609 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2610 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2611 } else { 2612 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2613 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2614 } 2615 #endif 2616 break; 2617 case Op_VecY: 2618 #ifndef _LP64 2619 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2620 #else 2621 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2622 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2623 } else { 2624 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2625 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2626 } 2627 #endif 2628 break; 2629 case Op_VecZ: 2630 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2631 break; 2632 default: 2633 ShouldNotReachHere(); 2634 } 2635 } else { // store 2636 switch (ireg) { 2637 case Op_VecS: 2638 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2639 break; 2640 case Op_VecD: 2641 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2642 break; 2643 case Op_VecX: 2644 #ifndef _LP64 2645 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2646 #else 2647 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2648 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2649 } 2650 else { 2651 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2652 } 2653 #endif 2654 break; 2655 case Op_VecY: 2656 #ifndef _LP64 2657 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2658 #else 2659 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2660 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2661 } 2662 else { 2663 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2664 } 2665 #endif 2666 break; 2667 case Op_VecZ: 2668 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2669 break; 2670 default: 2671 ShouldNotReachHere(); 2672 } 2673 } 2674 #ifndef PRODUCT 2675 } else { 2676 if (is_load) { 2677 switch (ireg) { 2678 case Op_VecS: 2679 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2680 break; 2681 case Op_VecD: 2682 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2683 break; 2684 case Op_VecX: 2685 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2686 break; 2687 case Op_VecY: 2688 case Op_VecZ: 2689 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2690 break; 2691 default: 2692 ShouldNotReachHere(); 2693 } 2694 } else { // store 2695 switch (ireg) { 2696 case Op_VecS: 2697 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2698 break; 2699 case Op_VecD: 2700 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2701 break; 2702 case Op_VecX: 2703 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2704 break; 2705 case Op_VecY: 2706 case Op_VecZ: 2707 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2708 break; 2709 default: 2710 ShouldNotReachHere(); 2711 } 2712 } 2713 #endif 2714 } 2715 } 2716 2717 template <class T> 2718 static inline GrowableArray<jvalue>* vreplicate_imm(BasicType bt, T con, int len) { 2719 GrowableArray<jvalue>* val = new GrowableArray<jvalue>(len); 2720 jvalue ele; 2721 switch (bt) { 2722 case T_BYTE: ele.b = con; break; 2723 case T_SHORT: ele.s = con; break; 2724 case T_INT: ele.i = con; break; 2725 case T_LONG: ele.j = con; break; 2726 case T_FLOAT: ele.f = con; break; 2727 case T_DOUBLE: ele.d = con; break; 2728 default: ShouldNotReachHere(); 2729 } 2730 for (int i = 0; i < len; i++) { 2731 val->append(ele); 2732 } 2733 return val; 2734 } 2735 2736 static inline jlong high_bit_set(BasicType bt) { 2737 switch (bt) { 2738 case T_BYTE: return 0x8080808080808080; 2739 case T_SHORT: return 0x8000800080008000; 2740 case T_INT: return 0x8000000080000000; 2741 case T_LONG: return 0x8000000000000000; 2742 default: 2743 ShouldNotReachHere(); 2744 return 0; 2745 } 2746 } 2747 2748 #ifndef PRODUCT 2749 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2750 st->print("nop \t# %d bytes pad for loops and calls", _count); 2751 } 2752 #endif 2753 2754 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2755 C2_MacroAssembler _masm(&cbuf); 2756 __ nop(_count); 2757 } 2758 2759 uint MachNopNode::size(PhaseRegAlloc*) const { 2760 return _count; 2761 } 2762 2763 #ifndef PRODUCT 2764 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2765 st->print("# breakpoint"); 2766 } 2767 #endif 2768 2769 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2770 C2_MacroAssembler _masm(&cbuf); 2771 __ int3(); 2772 } 2773 2774 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2775 return MachNode::size(ra_); 2776 } 2777 2778 %} 2779 2780 encode %{ 2781 2782 enc_class call_epilog %{ 2783 C2_MacroAssembler _masm(&cbuf); 2784 if (VerifyStackAtCalls) { 2785 // Check that stack depth is unchanged: find majik cookie on stack 2786 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2787 Label L; 2788 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2789 __ jccb(Assembler::equal, L); 2790 // Die if stack mismatch 2791 __ int3(); 2792 __ bind(L); 2793 } 2794 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) { 2795 C2_MacroAssembler _masm(&cbuf); 2796 // The last return value is not set by the callee but used to pass IsInit information to compiled code. 2797 // Search for the corresponding projection, get the register and emit code that initialized it. 2798 uint con = (tf()->range_cc()->cnt() - 1); 2799 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { 2800 ProjNode* proj = fast_out(i)->as_Proj(); 2801 if (proj->_con == con) { 2802 // Set IsInit if rax is non-null (a non-null value is returned buffered or scalarized) 2803 OptoReg::Name optoReg = ra_->get_reg_first(proj); 2804 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP)); 2805 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1; 2806 __ testq(rax, rax); 2807 __ setb(Assembler::notZero, toReg); 2808 __ movzbl(toReg, toReg); 2809 if (reg->is_stack()) { 2810 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size; 2811 __ movq(Address(rsp, st_off), toReg); 2812 } 2813 break; 2814 } 2815 } 2816 if (return_value_is_used()) { 2817 // An inline type is returned as fields in multiple registers. 2818 // Rax either contains an oop if the inline type is buffered or a pointer 2819 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax 2820 // if the lowest bit is set to allow C2 to use the oop after null checking. 2821 // rax &= (rax & 1) - 1 2822 __ movptr(rscratch1, rax); 2823 __ andptr(rscratch1, 0x1); 2824 __ subptr(rscratch1, 0x1); 2825 __ andptr(rax, rscratch1); 2826 } 2827 } 2828 %} 2829 2830 %} 2831 2832 // Operands for bound floating pointer register arguments 2833 operand rxmm0() %{ 2834 constraint(ALLOC_IN_RC(xmm0_reg)); 2835 match(VecX); 2836 format%{%} 2837 interface(REG_INTER); 2838 %} 2839 2840 //----------OPERANDS----------------------------------------------------------- 2841 // Operand definitions must precede instruction definitions for correct parsing 2842 // in the ADLC because operands constitute user defined types which are used in 2843 // instruction definitions. 2844 2845 // Vectors 2846 2847 // Dummy generic vector class. Should be used for all vector operands. 2848 // Replaced with vec[SDXYZ] during post-selection pass. 2849 operand vec() %{ 2850 constraint(ALLOC_IN_RC(dynamic)); 2851 match(VecX); 2852 match(VecY); 2853 match(VecZ); 2854 match(VecS); 2855 match(VecD); 2856 2857 format %{ %} 2858 interface(REG_INTER); 2859 %} 2860 2861 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2862 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2863 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2864 // runtime code generation via reg_class_dynamic. 2865 operand legVec() %{ 2866 constraint(ALLOC_IN_RC(dynamic)); 2867 match(VecX); 2868 match(VecY); 2869 match(VecZ); 2870 match(VecS); 2871 match(VecD); 2872 2873 format %{ %} 2874 interface(REG_INTER); 2875 %} 2876 2877 // Replaces vec during post-selection cleanup. See above. 2878 operand vecS() %{ 2879 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2880 match(VecS); 2881 2882 format %{ %} 2883 interface(REG_INTER); 2884 %} 2885 2886 // Replaces legVec during post-selection cleanup. See above. 2887 operand legVecS() %{ 2888 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2889 match(VecS); 2890 2891 format %{ %} 2892 interface(REG_INTER); 2893 %} 2894 2895 // Replaces vec during post-selection cleanup. See above. 2896 operand vecD() %{ 2897 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2898 match(VecD); 2899 2900 format %{ %} 2901 interface(REG_INTER); 2902 %} 2903 2904 // Replaces legVec during post-selection cleanup. See above. 2905 operand legVecD() %{ 2906 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2907 match(VecD); 2908 2909 format %{ %} 2910 interface(REG_INTER); 2911 %} 2912 2913 // Replaces vec during post-selection cleanup. See above. 2914 operand vecX() %{ 2915 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2916 match(VecX); 2917 2918 format %{ %} 2919 interface(REG_INTER); 2920 %} 2921 2922 // Replaces legVec during post-selection cleanup. See above. 2923 operand legVecX() %{ 2924 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2925 match(VecX); 2926 2927 format %{ %} 2928 interface(REG_INTER); 2929 %} 2930 2931 // Replaces vec during post-selection cleanup. See above. 2932 operand vecY() %{ 2933 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2934 match(VecY); 2935 2936 format %{ %} 2937 interface(REG_INTER); 2938 %} 2939 2940 // Replaces legVec during post-selection cleanup. See above. 2941 operand legVecY() %{ 2942 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2943 match(VecY); 2944 2945 format %{ %} 2946 interface(REG_INTER); 2947 %} 2948 2949 // Replaces vec during post-selection cleanup. See above. 2950 operand vecZ() %{ 2951 constraint(ALLOC_IN_RC(vectorz_reg)); 2952 match(VecZ); 2953 2954 format %{ %} 2955 interface(REG_INTER); 2956 %} 2957 2958 // Replaces legVec during post-selection cleanup. See above. 2959 operand legVecZ() %{ 2960 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2961 match(VecZ); 2962 2963 format %{ %} 2964 interface(REG_INTER); 2965 %} 2966 2967 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2968 2969 // ============================================================================ 2970 2971 instruct ShouldNotReachHere() %{ 2972 match(Halt); 2973 format %{ "stop\t# ShouldNotReachHere" %} 2974 ins_encode %{ 2975 if (is_reachable()) { 2976 __ stop(_halt_reason); 2977 } 2978 %} 2979 ins_pipe(pipe_slow); 2980 %} 2981 2982 // ============================================================================ 2983 2984 instruct addF_reg(regF dst, regF src) %{ 2985 predicate((UseSSE>=1) && (UseAVX == 0)); 2986 match(Set dst (AddF dst src)); 2987 2988 format %{ "addss $dst, $src" %} 2989 ins_cost(150); 2990 ins_encode %{ 2991 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2992 %} 2993 ins_pipe(pipe_slow); 2994 %} 2995 2996 instruct addF_mem(regF dst, memory src) %{ 2997 predicate((UseSSE>=1) && (UseAVX == 0)); 2998 match(Set dst (AddF dst (LoadF src))); 2999 3000 format %{ "addss $dst, $src" %} 3001 ins_cost(150); 3002 ins_encode %{ 3003 __ addss($dst$$XMMRegister, $src$$Address); 3004 %} 3005 ins_pipe(pipe_slow); 3006 %} 3007 3008 instruct addF_imm(regF dst, immF con) %{ 3009 predicate((UseSSE>=1) && (UseAVX == 0)); 3010 match(Set dst (AddF dst con)); 3011 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3012 ins_cost(150); 3013 ins_encode %{ 3014 __ addss($dst$$XMMRegister, $constantaddress($con)); 3015 %} 3016 ins_pipe(pipe_slow); 3017 %} 3018 3019 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 3020 predicate(UseAVX > 0); 3021 match(Set dst (AddF src1 src2)); 3022 3023 format %{ "vaddss $dst, $src1, $src2" %} 3024 ins_cost(150); 3025 ins_encode %{ 3026 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3027 %} 3028 ins_pipe(pipe_slow); 3029 %} 3030 3031 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 3032 predicate(UseAVX > 0); 3033 match(Set dst (AddF src1 (LoadF src2))); 3034 3035 format %{ "vaddss $dst, $src1, $src2" %} 3036 ins_cost(150); 3037 ins_encode %{ 3038 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3039 %} 3040 ins_pipe(pipe_slow); 3041 %} 3042 3043 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 3044 predicate(UseAVX > 0); 3045 match(Set dst (AddF src con)); 3046 3047 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3048 ins_cost(150); 3049 ins_encode %{ 3050 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3051 %} 3052 ins_pipe(pipe_slow); 3053 %} 3054 3055 instruct addD_reg(regD dst, regD src) %{ 3056 predicate((UseSSE>=2) && (UseAVX == 0)); 3057 match(Set dst (AddD dst src)); 3058 3059 format %{ "addsd $dst, $src" %} 3060 ins_cost(150); 3061 ins_encode %{ 3062 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 3063 %} 3064 ins_pipe(pipe_slow); 3065 %} 3066 3067 instruct addD_mem(regD dst, memory src) %{ 3068 predicate((UseSSE>=2) && (UseAVX == 0)); 3069 match(Set dst (AddD dst (LoadD src))); 3070 3071 format %{ "addsd $dst, $src" %} 3072 ins_cost(150); 3073 ins_encode %{ 3074 __ addsd($dst$$XMMRegister, $src$$Address); 3075 %} 3076 ins_pipe(pipe_slow); 3077 %} 3078 3079 instruct addD_imm(regD dst, immD con) %{ 3080 predicate((UseSSE>=2) && (UseAVX == 0)); 3081 match(Set dst (AddD dst con)); 3082 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3083 ins_cost(150); 3084 ins_encode %{ 3085 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3086 %} 3087 ins_pipe(pipe_slow); 3088 %} 3089 3090 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3091 predicate(UseAVX > 0); 3092 match(Set dst (AddD src1 src2)); 3093 3094 format %{ "vaddsd $dst, $src1, $src2" %} 3095 ins_cost(150); 3096 ins_encode %{ 3097 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3098 %} 3099 ins_pipe(pipe_slow); 3100 %} 3101 3102 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3103 predicate(UseAVX > 0); 3104 match(Set dst (AddD src1 (LoadD src2))); 3105 3106 format %{ "vaddsd $dst, $src1, $src2" %} 3107 ins_cost(150); 3108 ins_encode %{ 3109 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3110 %} 3111 ins_pipe(pipe_slow); 3112 %} 3113 3114 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3115 predicate(UseAVX > 0); 3116 match(Set dst (AddD src con)); 3117 3118 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3119 ins_cost(150); 3120 ins_encode %{ 3121 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3122 %} 3123 ins_pipe(pipe_slow); 3124 %} 3125 3126 instruct subF_reg(regF dst, regF src) %{ 3127 predicate((UseSSE>=1) && (UseAVX == 0)); 3128 match(Set dst (SubF dst src)); 3129 3130 format %{ "subss $dst, $src" %} 3131 ins_cost(150); 3132 ins_encode %{ 3133 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3134 %} 3135 ins_pipe(pipe_slow); 3136 %} 3137 3138 instruct subF_mem(regF dst, memory src) %{ 3139 predicate((UseSSE>=1) && (UseAVX == 0)); 3140 match(Set dst (SubF dst (LoadF src))); 3141 3142 format %{ "subss $dst, $src" %} 3143 ins_cost(150); 3144 ins_encode %{ 3145 __ subss($dst$$XMMRegister, $src$$Address); 3146 %} 3147 ins_pipe(pipe_slow); 3148 %} 3149 3150 instruct subF_imm(regF dst, immF con) %{ 3151 predicate((UseSSE>=1) && (UseAVX == 0)); 3152 match(Set dst (SubF dst con)); 3153 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3154 ins_cost(150); 3155 ins_encode %{ 3156 __ subss($dst$$XMMRegister, $constantaddress($con)); 3157 %} 3158 ins_pipe(pipe_slow); 3159 %} 3160 3161 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3162 predicate(UseAVX > 0); 3163 match(Set dst (SubF src1 src2)); 3164 3165 format %{ "vsubss $dst, $src1, $src2" %} 3166 ins_cost(150); 3167 ins_encode %{ 3168 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3169 %} 3170 ins_pipe(pipe_slow); 3171 %} 3172 3173 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3174 predicate(UseAVX > 0); 3175 match(Set dst (SubF src1 (LoadF src2))); 3176 3177 format %{ "vsubss $dst, $src1, $src2" %} 3178 ins_cost(150); 3179 ins_encode %{ 3180 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3181 %} 3182 ins_pipe(pipe_slow); 3183 %} 3184 3185 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3186 predicate(UseAVX > 0); 3187 match(Set dst (SubF src con)); 3188 3189 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3190 ins_cost(150); 3191 ins_encode %{ 3192 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3193 %} 3194 ins_pipe(pipe_slow); 3195 %} 3196 3197 instruct subD_reg(regD dst, regD src) %{ 3198 predicate((UseSSE>=2) && (UseAVX == 0)); 3199 match(Set dst (SubD dst src)); 3200 3201 format %{ "subsd $dst, $src" %} 3202 ins_cost(150); 3203 ins_encode %{ 3204 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3205 %} 3206 ins_pipe(pipe_slow); 3207 %} 3208 3209 instruct subD_mem(regD dst, memory src) %{ 3210 predicate((UseSSE>=2) && (UseAVX == 0)); 3211 match(Set dst (SubD dst (LoadD src))); 3212 3213 format %{ "subsd $dst, $src" %} 3214 ins_cost(150); 3215 ins_encode %{ 3216 __ subsd($dst$$XMMRegister, $src$$Address); 3217 %} 3218 ins_pipe(pipe_slow); 3219 %} 3220 3221 instruct subD_imm(regD dst, immD con) %{ 3222 predicate((UseSSE>=2) && (UseAVX == 0)); 3223 match(Set dst (SubD dst con)); 3224 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3225 ins_cost(150); 3226 ins_encode %{ 3227 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3228 %} 3229 ins_pipe(pipe_slow); 3230 %} 3231 3232 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3233 predicate(UseAVX > 0); 3234 match(Set dst (SubD src1 src2)); 3235 3236 format %{ "vsubsd $dst, $src1, $src2" %} 3237 ins_cost(150); 3238 ins_encode %{ 3239 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3240 %} 3241 ins_pipe(pipe_slow); 3242 %} 3243 3244 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3245 predicate(UseAVX > 0); 3246 match(Set dst (SubD src1 (LoadD src2))); 3247 3248 format %{ "vsubsd $dst, $src1, $src2" %} 3249 ins_cost(150); 3250 ins_encode %{ 3251 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3252 %} 3253 ins_pipe(pipe_slow); 3254 %} 3255 3256 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3257 predicate(UseAVX > 0); 3258 match(Set dst (SubD src con)); 3259 3260 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3261 ins_cost(150); 3262 ins_encode %{ 3263 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3264 %} 3265 ins_pipe(pipe_slow); 3266 %} 3267 3268 instruct mulF_reg(regF dst, regF src) %{ 3269 predicate((UseSSE>=1) && (UseAVX == 0)); 3270 match(Set dst (MulF dst src)); 3271 3272 format %{ "mulss $dst, $src" %} 3273 ins_cost(150); 3274 ins_encode %{ 3275 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3276 %} 3277 ins_pipe(pipe_slow); 3278 %} 3279 3280 instruct mulF_mem(regF dst, memory src) %{ 3281 predicate((UseSSE>=1) && (UseAVX == 0)); 3282 match(Set dst (MulF dst (LoadF src))); 3283 3284 format %{ "mulss $dst, $src" %} 3285 ins_cost(150); 3286 ins_encode %{ 3287 __ mulss($dst$$XMMRegister, $src$$Address); 3288 %} 3289 ins_pipe(pipe_slow); 3290 %} 3291 3292 instruct mulF_imm(regF dst, immF con) %{ 3293 predicate((UseSSE>=1) && (UseAVX == 0)); 3294 match(Set dst (MulF dst con)); 3295 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3296 ins_cost(150); 3297 ins_encode %{ 3298 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3299 %} 3300 ins_pipe(pipe_slow); 3301 %} 3302 3303 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3304 predicate(UseAVX > 0); 3305 match(Set dst (MulF src1 src2)); 3306 3307 format %{ "vmulss $dst, $src1, $src2" %} 3308 ins_cost(150); 3309 ins_encode %{ 3310 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3311 %} 3312 ins_pipe(pipe_slow); 3313 %} 3314 3315 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3316 predicate(UseAVX > 0); 3317 match(Set dst (MulF src1 (LoadF src2))); 3318 3319 format %{ "vmulss $dst, $src1, $src2" %} 3320 ins_cost(150); 3321 ins_encode %{ 3322 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3323 %} 3324 ins_pipe(pipe_slow); 3325 %} 3326 3327 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3328 predicate(UseAVX > 0); 3329 match(Set dst (MulF src con)); 3330 3331 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3332 ins_cost(150); 3333 ins_encode %{ 3334 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3335 %} 3336 ins_pipe(pipe_slow); 3337 %} 3338 3339 instruct mulD_reg(regD dst, regD src) %{ 3340 predicate((UseSSE>=2) && (UseAVX == 0)); 3341 match(Set dst (MulD dst src)); 3342 3343 format %{ "mulsd $dst, $src" %} 3344 ins_cost(150); 3345 ins_encode %{ 3346 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3347 %} 3348 ins_pipe(pipe_slow); 3349 %} 3350 3351 instruct mulD_mem(regD dst, memory src) %{ 3352 predicate((UseSSE>=2) && (UseAVX == 0)); 3353 match(Set dst (MulD dst (LoadD src))); 3354 3355 format %{ "mulsd $dst, $src" %} 3356 ins_cost(150); 3357 ins_encode %{ 3358 __ mulsd($dst$$XMMRegister, $src$$Address); 3359 %} 3360 ins_pipe(pipe_slow); 3361 %} 3362 3363 instruct mulD_imm(regD dst, immD con) %{ 3364 predicate((UseSSE>=2) && (UseAVX == 0)); 3365 match(Set dst (MulD dst con)); 3366 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3367 ins_cost(150); 3368 ins_encode %{ 3369 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3370 %} 3371 ins_pipe(pipe_slow); 3372 %} 3373 3374 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3375 predicate(UseAVX > 0); 3376 match(Set dst (MulD src1 src2)); 3377 3378 format %{ "vmulsd $dst, $src1, $src2" %} 3379 ins_cost(150); 3380 ins_encode %{ 3381 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3382 %} 3383 ins_pipe(pipe_slow); 3384 %} 3385 3386 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3387 predicate(UseAVX > 0); 3388 match(Set dst (MulD src1 (LoadD src2))); 3389 3390 format %{ "vmulsd $dst, $src1, $src2" %} 3391 ins_cost(150); 3392 ins_encode %{ 3393 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3394 %} 3395 ins_pipe(pipe_slow); 3396 %} 3397 3398 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3399 predicate(UseAVX > 0); 3400 match(Set dst (MulD src con)); 3401 3402 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3403 ins_cost(150); 3404 ins_encode %{ 3405 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3406 %} 3407 ins_pipe(pipe_slow); 3408 %} 3409 3410 instruct divF_reg(regF dst, regF src) %{ 3411 predicate((UseSSE>=1) && (UseAVX == 0)); 3412 match(Set dst (DivF dst src)); 3413 3414 format %{ "divss $dst, $src" %} 3415 ins_cost(150); 3416 ins_encode %{ 3417 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3418 %} 3419 ins_pipe(pipe_slow); 3420 %} 3421 3422 instruct divF_mem(regF dst, memory src) %{ 3423 predicate((UseSSE>=1) && (UseAVX == 0)); 3424 match(Set dst (DivF dst (LoadF src))); 3425 3426 format %{ "divss $dst, $src" %} 3427 ins_cost(150); 3428 ins_encode %{ 3429 __ divss($dst$$XMMRegister, $src$$Address); 3430 %} 3431 ins_pipe(pipe_slow); 3432 %} 3433 3434 instruct divF_imm(regF dst, immF con) %{ 3435 predicate((UseSSE>=1) && (UseAVX == 0)); 3436 match(Set dst (DivF dst con)); 3437 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3438 ins_cost(150); 3439 ins_encode %{ 3440 __ divss($dst$$XMMRegister, $constantaddress($con)); 3441 %} 3442 ins_pipe(pipe_slow); 3443 %} 3444 3445 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3446 predicate(UseAVX > 0); 3447 match(Set dst (DivF src1 src2)); 3448 3449 format %{ "vdivss $dst, $src1, $src2" %} 3450 ins_cost(150); 3451 ins_encode %{ 3452 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3453 %} 3454 ins_pipe(pipe_slow); 3455 %} 3456 3457 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3458 predicate(UseAVX > 0); 3459 match(Set dst (DivF src1 (LoadF src2))); 3460 3461 format %{ "vdivss $dst, $src1, $src2" %} 3462 ins_cost(150); 3463 ins_encode %{ 3464 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3465 %} 3466 ins_pipe(pipe_slow); 3467 %} 3468 3469 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3470 predicate(UseAVX > 0); 3471 match(Set dst (DivF src con)); 3472 3473 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3474 ins_cost(150); 3475 ins_encode %{ 3476 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3477 %} 3478 ins_pipe(pipe_slow); 3479 %} 3480 3481 instruct divD_reg(regD dst, regD src) %{ 3482 predicate((UseSSE>=2) && (UseAVX == 0)); 3483 match(Set dst (DivD dst src)); 3484 3485 format %{ "divsd $dst, $src" %} 3486 ins_cost(150); 3487 ins_encode %{ 3488 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3489 %} 3490 ins_pipe(pipe_slow); 3491 %} 3492 3493 instruct divD_mem(regD dst, memory src) %{ 3494 predicate((UseSSE>=2) && (UseAVX == 0)); 3495 match(Set dst (DivD dst (LoadD src))); 3496 3497 format %{ "divsd $dst, $src" %} 3498 ins_cost(150); 3499 ins_encode %{ 3500 __ divsd($dst$$XMMRegister, $src$$Address); 3501 %} 3502 ins_pipe(pipe_slow); 3503 %} 3504 3505 instruct divD_imm(regD dst, immD con) %{ 3506 predicate((UseSSE>=2) && (UseAVX == 0)); 3507 match(Set dst (DivD dst con)); 3508 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3509 ins_cost(150); 3510 ins_encode %{ 3511 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3512 %} 3513 ins_pipe(pipe_slow); 3514 %} 3515 3516 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3517 predicate(UseAVX > 0); 3518 match(Set dst (DivD src1 src2)); 3519 3520 format %{ "vdivsd $dst, $src1, $src2" %} 3521 ins_cost(150); 3522 ins_encode %{ 3523 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3524 %} 3525 ins_pipe(pipe_slow); 3526 %} 3527 3528 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3529 predicate(UseAVX > 0); 3530 match(Set dst (DivD src1 (LoadD src2))); 3531 3532 format %{ "vdivsd $dst, $src1, $src2" %} 3533 ins_cost(150); 3534 ins_encode %{ 3535 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3536 %} 3537 ins_pipe(pipe_slow); 3538 %} 3539 3540 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3541 predicate(UseAVX > 0); 3542 match(Set dst (DivD src con)); 3543 3544 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3545 ins_cost(150); 3546 ins_encode %{ 3547 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3548 %} 3549 ins_pipe(pipe_slow); 3550 %} 3551 3552 instruct absF_reg(regF dst) %{ 3553 predicate((UseSSE>=1) && (UseAVX == 0)); 3554 match(Set dst (AbsF dst)); 3555 ins_cost(150); 3556 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3557 ins_encode %{ 3558 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3559 %} 3560 ins_pipe(pipe_slow); 3561 %} 3562 3563 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3564 predicate(UseAVX > 0); 3565 match(Set dst (AbsF src)); 3566 ins_cost(150); 3567 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3568 ins_encode %{ 3569 int vlen_enc = Assembler::AVX_128bit; 3570 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3571 ExternalAddress(float_signmask()), vlen_enc); 3572 %} 3573 ins_pipe(pipe_slow); 3574 %} 3575 3576 instruct absD_reg(regD dst) %{ 3577 predicate((UseSSE>=2) && (UseAVX == 0)); 3578 match(Set dst (AbsD dst)); 3579 ins_cost(150); 3580 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3581 "# abs double by sign masking" %} 3582 ins_encode %{ 3583 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3584 %} 3585 ins_pipe(pipe_slow); 3586 %} 3587 3588 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3589 predicate(UseAVX > 0); 3590 match(Set dst (AbsD src)); 3591 ins_cost(150); 3592 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3593 "# abs double by sign masking" %} 3594 ins_encode %{ 3595 int vlen_enc = Assembler::AVX_128bit; 3596 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3597 ExternalAddress(double_signmask()), vlen_enc); 3598 %} 3599 ins_pipe(pipe_slow); 3600 %} 3601 3602 instruct negF_reg(regF dst) %{ 3603 predicate((UseSSE>=1) && (UseAVX == 0)); 3604 match(Set dst (NegF dst)); 3605 ins_cost(150); 3606 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3607 ins_encode %{ 3608 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3609 %} 3610 ins_pipe(pipe_slow); 3611 %} 3612 3613 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3614 predicate(UseAVX > 0); 3615 match(Set dst (NegF src)); 3616 ins_cost(150); 3617 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3618 ins_encode %{ 3619 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3620 ExternalAddress(float_signflip())); 3621 %} 3622 ins_pipe(pipe_slow); 3623 %} 3624 3625 instruct negD_reg(regD dst) %{ 3626 predicate((UseSSE>=2) && (UseAVX == 0)); 3627 match(Set dst (NegD dst)); 3628 ins_cost(150); 3629 format %{ "xorpd $dst, [0x8000000000000000]\t" 3630 "# neg double by sign flipping" %} 3631 ins_encode %{ 3632 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3633 %} 3634 ins_pipe(pipe_slow); 3635 %} 3636 3637 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3638 predicate(UseAVX > 0); 3639 match(Set dst (NegD src)); 3640 ins_cost(150); 3641 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3642 "# neg double by sign flipping" %} 3643 ins_encode %{ 3644 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3645 ExternalAddress(double_signflip())); 3646 %} 3647 ins_pipe(pipe_slow); 3648 %} 3649 3650 // sqrtss instruction needs destination register to be pre initialized for best performance 3651 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3652 instruct sqrtF_reg(regF dst) %{ 3653 predicate(UseSSE>=1); 3654 match(Set dst (SqrtF dst)); 3655 format %{ "sqrtss $dst, $dst" %} 3656 ins_encode %{ 3657 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3658 %} 3659 ins_pipe(pipe_slow); 3660 %} 3661 3662 // sqrtsd instruction needs destination register to be pre initialized for best performance 3663 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3664 instruct sqrtD_reg(regD dst) %{ 3665 predicate(UseSSE>=2); 3666 match(Set dst (SqrtD dst)); 3667 format %{ "sqrtsd $dst, $dst" %} 3668 ins_encode %{ 3669 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3670 %} 3671 ins_pipe(pipe_slow); 3672 %} 3673 3674 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3675 effect(TEMP tmp); 3676 match(Set dst (ConvF2HF src)); 3677 ins_cost(125); 3678 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3679 ins_encode %{ 3680 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3681 %} 3682 ins_pipe( pipe_slow ); 3683 %} 3684 3685 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3686 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3687 effect(TEMP ktmp, TEMP rtmp); 3688 match(Set mem (StoreC mem (ConvF2HF src))); 3689 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3690 ins_encode %{ 3691 __ movl($rtmp$$Register, 0x1); 3692 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3693 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3694 %} 3695 ins_pipe( pipe_slow ); 3696 %} 3697 3698 instruct vconvF2HF(vec dst, vec src) %{ 3699 match(Set dst (VectorCastF2HF src)); 3700 format %{ "vector_conv_F2HF $dst $src" %} 3701 ins_encode %{ 3702 int vlen_enc = vector_length_encoding(this, $src); 3703 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3704 %} 3705 ins_pipe( pipe_slow ); 3706 %} 3707 3708 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3709 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3710 format %{ "vcvtps2ph $mem,$src" %} 3711 ins_encode %{ 3712 int vlen_enc = vector_length_encoding(this, $src); 3713 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3714 %} 3715 ins_pipe( pipe_slow ); 3716 %} 3717 3718 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3719 match(Set dst (ConvHF2F src)); 3720 format %{ "vcvtph2ps $dst,$src" %} 3721 ins_encode %{ 3722 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3723 %} 3724 ins_pipe( pipe_slow ); 3725 %} 3726 3727 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3728 match(Set dst (VectorCastHF2F (LoadVector mem))); 3729 format %{ "vcvtph2ps $dst,$mem" %} 3730 ins_encode %{ 3731 int vlen_enc = vector_length_encoding(this); 3732 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3733 %} 3734 ins_pipe( pipe_slow ); 3735 %} 3736 3737 instruct vconvHF2F(vec dst, vec src) %{ 3738 match(Set dst (VectorCastHF2F src)); 3739 ins_cost(125); 3740 format %{ "vector_conv_HF2F $dst,$src" %} 3741 ins_encode %{ 3742 int vlen_enc = vector_length_encoding(this); 3743 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3744 %} 3745 ins_pipe( pipe_slow ); 3746 %} 3747 3748 // ---------------------------------------- VectorReinterpret ------------------------------------ 3749 instruct reinterpret_mask(kReg dst) %{ 3750 predicate(n->bottom_type()->isa_vectmask() && 3751 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3752 match(Set dst (VectorReinterpret dst)); 3753 ins_cost(125); 3754 format %{ "vector_reinterpret $dst\t!" %} 3755 ins_encode %{ 3756 // empty 3757 %} 3758 ins_pipe( pipe_slow ); 3759 %} 3760 3761 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3762 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3763 n->bottom_type()->isa_vectmask() && 3764 n->in(1)->bottom_type()->isa_vectmask() && 3765 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3766 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3767 match(Set dst (VectorReinterpret src)); 3768 effect(TEMP xtmp); 3769 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3770 ins_encode %{ 3771 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3772 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3773 assert(src_sz == dst_sz , "src and dst size mismatch"); 3774 int vlen_enc = vector_length_encoding(src_sz); 3775 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3776 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3777 %} 3778 ins_pipe( pipe_slow ); 3779 %} 3780 3781 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3782 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3783 n->bottom_type()->isa_vectmask() && 3784 n->in(1)->bottom_type()->isa_vectmask() && 3785 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3786 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3787 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3788 match(Set dst (VectorReinterpret src)); 3789 effect(TEMP xtmp); 3790 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3791 ins_encode %{ 3792 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3793 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3794 assert(src_sz == dst_sz , "src and dst size mismatch"); 3795 int vlen_enc = vector_length_encoding(src_sz); 3796 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3797 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3798 %} 3799 ins_pipe( pipe_slow ); 3800 %} 3801 3802 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3803 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3804 n->bottom_type()->isa_vectmask() && 3805 n->in(1)->bottom_type()->isa_vectmask() && 3806 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3807 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3808 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3809 match(Set dst (VectorReinterpret src)); 3810 effect(TEMP xtmp); 3811 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3812 ins_encode %{ 3813 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3814 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3815 assert(src_sz == dst_sz , "src and dst size mismatch"); 3816 int vlen_enc = vector_length_encoding(src_sz); 3817 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3818 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3819 %} 3820 ins_pipe( pipe_slow ); 3821 %} 3822 3823 instruct reinterpret(vec dst) %{ 3824 predicate(!n->bottom_type()->isa_vectmask() && 3825 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3826 match(Set dst (VectorReinterpret dst)); 3827 ins_cost(125); 3828 format %{ "vector_reinterpret $dst\t!" %} 3829 ins_encode %{ 3830 // empty 3831 %} 3832 ins_pipe( pipe_slow ); 3833 %} 3834 3835 instruct reinterpret_expand(vec dst, vec src) %{ 3836 predicate(UseAVX == 0 && 3837 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3838 match(Set dst (VectorReinterpret src)); 3839 ins_cost(125); 3840 effect(TEMP dst); 3841 format %{ "vector_reinterpret_expand $dst,$src" %} 3842 ins_encode %{ 3843 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3844 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3845 3846 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3847 if (src_vlen_in_bytes == 4) { 3848 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3849 } else { 3850 assert(src_vlen_in_bytes == 8, ""); 3851 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3852 } 3853 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3854 %} 3855 ins_pipe( pipe_slow ); 3856 %} 3857 3858 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3859 predicate(UseAVX > 0 && 3860 !n->bottom_type()->isa_vectmask() && 3861 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3862 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3863 match(Set dst (VectorReinterpret src)); 3864 ins_cost(125); 3865 format %{ "vector_reinterpret_expand $dst,$src" %} 3866 ins_encode %{ 3867 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3868 %} 3869 ins_pipe( pipe_slow ); 3870 %} 3871 3872 3873 instruct vreinterpret_expand(legVec dst, vec src) %{ 3874 predicate(UseAVX > 0 && 3875 !n->bottom_type()->isa_vectmask() && 3876 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3877 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3878 match(Set dst (VectorReinterpret src)); 3879 ins_cost(125); 3880 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3881 ins_encode %{ 3882 switch (Matcher::vector_length_in_bytes(this, $src)) { 3883 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3884 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3885 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3886 default: ShouldNotReachHere(); 3887 } 3888 %} 3889 ins_pipe( pipe_slow ); 3890 %} 3891 3892 instruct reinterpret_shrink(vec dst, legVec src) %{ 3893 predicate(!n->bottom_type()->isa_vectmask() && 3894 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3895 match(Set dst (VectorReinterpret src)); 3896 ins_cost(125); 3897 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3898 ins_encode %{ 3899 switch (Matcher::vector_length_in_bytes(this)) { 3900 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3901 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3902 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3903 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3904 default: ShouldNotReachHere(); 3905 } 3906 %} 3907 ins_pipe( pipe_slow ); 3908 %} 3909 3910 // ---------------------------------------------------------------------------------------------------- 3911 3912 #ifdef _LP64 3913 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3914 match(Set dst (RoundDoubleMode src rmode)); 3915 format %{ "roundsd $dst,$src" %} 3916 ins_cost(150); 3917 ins_encode %{ 3918 assert(UseSSE >= 4, "required"); 3919 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3920 %} 3921 ins_pipe(pipe_slow); 3922 %} 3923 3924 instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{ 3925 match(Set dst (RoundDoubleMode (LoadD src) rmode)); 3926 format %{ "roundsd $dst,$src" %} 3927 ins_cost(150); 3928 ins_encode %{ 3929 assert(UseSSE >= 4, "required"); 3930 __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant); 3931 %} 3932 ins_pipe(pipe_slow); 3933 %} 3934 3935 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3936 match(Set dst (RoundDoubleMode con rmode)); 3937 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3938 ins_cost(150); 3939 ins_encode %{ 3940 assert(UseSSE >= 4, "required"); 3941 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3942 %} 3943 ins_pipe(pipe_slow); 3944 %} 3945 3946 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3947 predicate(Matcher::vector_length(n) < 8); 3948 match(Set dst (RoundDoubleModeV src rmode)); 3949 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3950 ins_encode %{ 3951 assert(UseAVX > 0, "required"); 3952 int vlen_enc = vector_length_encoding(this); 3953 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3954 %} 3955 ins_pipe( pipe_slow ); 3956 %} 3957 3958 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3959 predicate(Matcher::vector_length(n) == 8); 3960 match(Set dst (RoundDoubleModeV src rmode)); 3961 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3962 ins_encode %{ 3963 assert(UseAVX > 2, "required"); 3964 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3965 %} 3966 ins_pipe( pipe_slow ); 3967 %} 3968 3969 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3970 predicate(Matcher::vector_length(n) < 8); 3971 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3972 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3973 ins_encode %{ 3974 assert(UseAVX > 0, "required"); 3975 int vlen_enc = vector_length_encoding(this); 3976 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3977 %} 3978 ins_pipe( pipe_slow ); 3979 %} 3980 3981 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3982 predicate(Matcher::vector_length(n) == 8); 3983 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3984 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3985 ins_encode %{ 3986 assert(UseAVX > 2, "required"); 3987 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3988 %} 3989 ins_pipe( pipe_slow ); 3990 %} 3991 #endif // _LP64 3992 3993 instruct onspinwait() %{ 3994 match(OnSpinWait); 3995 ins_cost(200); 3996 3997 format %{ 3998 $$template 3999 $$emit$$"pause\t! membar_onspinwait" 4000 %} 4001 ins_encode %{ 4002 __ pause(); 4003 %} 4004 ins_pipe(pipe_slow); 4005 %} 4006 4007 // a * b + c 4008 instruct fmaD_reg(regD a, regD b, regD c) %{ 4009 match(Set c (FmaD c (Binary a b))); 4010 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 4011 ins_cost(150); 4012 ins_encode %{ 4013 assert(UseFMA, "Needs FMA instructions support."); 4014 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4015 %} 4016 ins_pipe( pipe_slow ); 4017 %} 4018 4019 // a * b + c 4020 instruct fmaF_reg(regF a, regF b, regF c) %{ 4021 match(Set c (FmaF c (Binary a b))); 4022 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 4023 ins_cost(150); 4024 ins_encode %{ 4025 assert(UseFMA, "Needs FMA instructions support."); 4026 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4027 %} 4028 ins_pipe( pipe_slow ); 4029 %} 4030 4031 // ====================VECTOR INSTRUCTIONS===================================== 4032 4033 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4034 instruct MoveVec2Leg(legVec dst, vec src) %{ 4035 match(Set dst src); 4036 format %{ "" %} 4037 ins_encode %{ 4038 ShouldNotReachHere(); 4039 %} 4040 ins_pipe( fpu_reg_reg ); 4041 %} 4042 4043 instruct MoveLeg2Vec(vec dst, legVec src) %{ 4044 match(Set dst src); 4045 format %{ "" %} 4046 ins_encode %{ 4047 ShouldNotReachHere(); 4048 %} 4049 ins_pipe( fpu_reg_reg ); 4050 %} 4051 4052 // ============================================================================ 4053 4054 // Load vectors generic operand pattern 4055 instruct loadV(vec dst, memory mem) %{ 4056 match(Set dst (LoadVector mem)); 4057 ins_cost(125); 4058 format %{ "load_vector $dst,$mem" %} 4059 ins_encode %{ 4060 __ load_vector($dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 4061 %} 4062 ins_pipe( pipe_slow ); 4063 %} 4064 4065 // Store vectors generic operand pattern. 4066 instruct storeV(memory mem, vec src) %{ 4067 match(Set mem (StoreVector mem src)); 4068 ins_cost(145); 4069 format %{ "store_vector $mem,$src\n\t" %} 4070 ins_encode %{ 4071 switch (Matcher::vector_length_in_bytes(this, $src)) { 4072 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 4073 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 4074 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 4075 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 4076 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 4077 default: ShouldNotReachHere(); 4078 } 4079 %} 4080 ins_pipe( pipe_slow ); 4081 %} 4082 4083 // ---------------------------------------- Gather ------------------------------------ 4084 4085 // Gather INT, LONG, FLOAT, DOUBLE 4086 4087 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 4088 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 4089 match(Set dst (LoadVectorGather mem idx)); 4090 effect(TEMP dst, TEMP tmp, TEMP mask); 4091 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 4092 ins_encode %{ 4093 assert(UseAVX >= 2, "sanity"); 4094 4095 int vlen_enc = vector_length_encoding(this); 4096 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4097 4098 assert(Matcher::vector_length_in_bytes(this) >= 16, "sanity"); 4099 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4100 4101 if (vlen_enc == Assembler::AVX_128bit) { 4102 __ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), noreg); 4103 } else { 4104 __ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), noreg); 4105 } 4106 __ lea($tmp$$Register, $mem$$Address); 4107 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4108 %} 4109 ins_pipe( pipe_slow ); 4110 %} 4111 4112 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 4113 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 4114 match(Set dst (LoadVectorGather mem idx)); 4115 effect(TEMP dst, TEMP tmp, TEMP ktmp); 4116 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 4117 ins_encode %{ 4118 assert(UseAVX > 2, "sanity"); 4119 4120 int vlen_enc = vector_length_encoding(this); 4121 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4122 4123 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4124 4125 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4126 __ lea($tmp$$Register, $mem$$Address); 4127 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4128 %} 4129 ins_pipe( pipe_slow ); 4130 %} 4131 4132 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4133 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4134 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4135 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4136 ins_encode %{ 4137 assert(UseAVX > 2, "sanity"); 4138 int vlen_enc = vector_length_encoding(this); 4139 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4140 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4141 // Note: Since gather instruction partially updates the opmask register used 4142 // for predication hense moving mask operand to a temporary. 4143 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4144 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4145 __ lea($tmp$$Register, $mem$$Address); 4146 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4147 %} 4148 ins_pipe( pipe_slow ); 4149 %} 4150 // ====================Scatter======================================= 4151 4152 // Scatter INT, LONG, FLOAT, DOUBLE 4153 4154 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4155 predicate(UseAVX > 2); 4156 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4157 effect(TEMP tmp, TEMP ktmp); 4158 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4159 ins_encode %{ 4160 int vlen_enc = vector_length_encoding(this, $src); 4161 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4162 4163 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4164 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4165 4166 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4167 __ lea($tmp$$Register, $mem$$Address); 4168 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4169 %} 4170 ins_pipe( pipe_slow ); 4171 %} 4172 4173 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4174 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4175 effect(TEMP tmp, TEMP ktmp); 4176 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4177 ins_encode %{ 4178 int vlen_enc = vector_length_encoding(this, $src); 4179 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4180 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4181 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4182 // Note: Since scatter instruction partially updates the opmask register used 4183 // for predication hense moving mask operand to a temporary. 4184 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4185 __ lea($tmp$$Register, $mem$$Address); 4186 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4187 %} 4188 ins_pipe( pipe_slow ); 4189 %} 4190 4191 // ====================REPLICATE======================================= 4192 4193 // Replicate byte scalar to be vector 4194 instruct vReplB_reg(vec dst, rRegI src) %{ 4195 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4196 match(Set dst (Replicate src)); 4197 format %{ "replicateB $dst,$src" %} 4198 ins_encode %{ 4199 uint vlen = Matcher::vector_length(this); 4200 if (UseAVX >= 2) { 4201 int vlen_enc = vector_length_encoding(this); 4202 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4203 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4204 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4205 } else { 4206 __ movdl($dst$$XMMRegister, $src$$Register); 4207 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4208 } 4209 } else { 4210 assert(UseAVX < 2, ""); 4211 __ movdl($dst$$XMMRegister, $src$$Register); 4212 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4213 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4214 if (vlen >= 16) { 4215 assert(vlen == 16, ""); 4216 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4217 } 4218 } 4219 %} 4220 ins_pipe( pipe_slow ); 4221 %} 4222 4223 instruct ReplB_mem(vec dst, memory mem) %{ 4224 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4225 match(Set dst (Replicate (LoadB mem))); 4226 format %{ "replicateB $dst,$mem" %} 4227 ins_encode %{ 4228 int vlen_enc = vector_length_encoding(this); 4229 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4230 %} 4231 ins_pipe( pipe_slow ); 4232 %} 4233 4234 // ====================ReplicateS======================================= 4235 4236 instruct vReplS_reg(vec dst, rRegI src) %{ 4237 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4238 match(Set dst (Replicate src)); 4239 format %{ "replicateS $dst,$src" %} 4240 ins_encode %{ 4241 uint vlen = Matcher::vector_length(this); 4242 int vlen_enc = vector_length_encoding(this); 4243 if (UseAVX >= 2) { 4244 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4245 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4246 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4247 } else { 4248 __ movdl($dst$$XMMRegister, $src$$Register); 4249 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4250 } 4251 } else { 4252 assert(UseAVX < 2, ""); 4253 __ movdl($dst$$XMMRegister, $src$$Register); 4254 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4255 if (vlen >= 8) { 4256 assert(vlen == 8, ""); 4257 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4258 } 4259 } 4260 %} 4261 ins_pipe( pipe_slow ); 4262 %} 4263 4264 instruct ReplS_mem(vec dst, memory mem) %{ 4265 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4266 match(Set dst (Replicate (LoadS mem))); 4267 format %{ "replicateS $dst,$mem" %} 4268 ins_encode %{ 4269 int vlen_enc = vector_length_encoding(this); 4270 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4271 %} 4272 ins_pipe( pipe_slow ); 4273 %} 4274 4275 // ====================ReplicateI======================================= 4276 4277 instruct ReplI_reg(vec dst, rRegI src) %{ 4278 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4279 match(Set dst (Replicate src)); 4280 format %{ "replicateI $dst,$src" %} 4281 ins_encode %{ 4282 uint vlen = Matcher::vector_length(this); 4283 int vlen_enc = vector_length_encoding(this); 4284 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4285 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4286 } else if (VM_Version::supports_avx2()) { 4287 __ movdl($dst$$XMMRegister, $src$$Register); 4288 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4289 } else { 4290 __ movdl($dst$$XMMRegister, $src$$Register); 4291 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4292 } 4293 %} 4294 ins_pipe( pipe_slow ); 4295 %} 4296 4297 instruct ReplI_mem(vec dst, memory mem) %{ 4298 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4299 match(Set dst (Replicate (LoadI mem))); 4300 format %{ "replicateI $dst,$mem" %} 4301 ins_encode %{ 4302 int vlen_enc = vector_length_encoding(this); 4303 if (VM_Version::supports_avx2()) { 4304 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4305 } else if (VM_Version::supports_avx()) { 4306 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4307 } else { 4308 __ movdl($dst$$XMMRegister, $mem$$Address); 4309 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4310 } 4311 %} 4312 ins_pipe( pipe_slow ); 4313 %} 4314 4315 instruct ReplI_imm(vec dst, immI con) %{ 4316 predicate(Matcher::is_non_long_integral_vector(n)); 4317 match(Set dst (Replicate con)); 4318 format %{ "replicateI $dst,$con" %} 4319 ins_encode %{ 4320 InternalAddress addr = $constantaddress(Matcher::vector_element_basic_type(this), 4321 vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4322 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 8) / 4323 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4324 BasicType bt = Matcher::vector_element_basic_type(this); 4325 int vlen = Matcher::vector_length_in_bytes(this); 4326 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4327 %} 4328 ins_pipe( pipe_slow ); 4329 %} 4330 4331 // Replicate scalar zero to be vector 4332 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4333 predicate(Matcher::is_non_long_integral_vector(n)); 4334 match(Set dst (Replicate zero)); 4335 format %{ "replicateI $dst,$zero" %} 4336 ins_encode %{ 4337 int vlen_enc = vector_length_encoding(this); 4338 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4339 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4340 } else { 4341 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4342 } 4343 %} 4344 ins_pipe( fpu_reg_reg ); 4345 %} 4346 4347 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4348 predicate(UseSSE >= 2 && Matcher::is_non_long_integral_vector(n)); 4349 match(Set dst (Replicate con)); 4350 format %{ "vallones $dst" %} 4351 ins_encode %{ 4352 int vector_len = vector_length_encoding(this); 4353 __ vallones($dst$$XMMRegister, vector_len); 4354 %} 4355 ins_pipe( pipe_slow ); 4356 %} 4357 4358 // ====================ReplicateL======================================= 4359 4360 #ifdef _LP64 4361 // Replicate long (8 byte) scalar to be vector 4362 instruct ReplL_reg(vec dst, rRegL src) %{ 4363 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4364 match(Set dst (Replicate src)); 4365 format %{ "replicateL $dst,$src" %} 4366 ins_encode %{ 4367 int vlen = Matcher::vector_length(this); 4368 int vlen_enc = vector_length_encoding(this); 4369 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4370 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4371 } else if (VM_Version::supports_avx2()) { 4372 __ movdq($dst$$XMMRegister, $src$$Register); 4373 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4374 } else { 4375 __ movdq($dst$$XMMRegister, $src$$Register); 4376 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4377 } 4378 %} 4379 ins_pipe( pipe_slow ); 4380 %} 4381 #else // _LP64 4382 // Replicate long (8 byte) scalar to be vector 4383 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 4384 predicate(Matcher::vector_length(n) <= 4 && Matcher::vector_element_basic_type(n) == T_LONG); 4385 match(Set dst (Replicate src)); 4386 effect(TEMP dst, USE src, TEMP tmp); 4387 format %{ "replicateL $dst,$src" %} 4388 ins_encode %{ 4389 uint vlen = Matcher::vector_length(this); 4390 if (vlen == 2) { 4391 __ movdl($dst$$XMMRegister, $src$$Register); 4392 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4393 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4394 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4395 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4396 int vlen_enc = Assembler::AVX_256bit; 4397 __ movdl($dst$$XMMRegister, $src$$Register); 4398 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4399 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4400 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4401 } else { 4402 __ movdl($dst$$XMMRegister, $src$$Register); 4403 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4404 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4405 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4406 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4407 } 4408 %} 4409 ins_pipe( pipe_slow ); 4410 %} 4411 4412 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 4413 predicate(Matcher::vector_length(n) == 8 && Matcher::vector_element_basic_type(n) == T_LONG); 4414 match(Set dst (Replicate src)); 4415 effect(TEMP dst, USE src, TEMP tmp); 4416 format %{ "replicateL $dst,$src" %} 4417 ins_encode %{ 4418 if (VM_Version::supports_avx512vl()) { 4419 __ movdl($dst$$XMMRegister, $src$$Register); 4420 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4421 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4422 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4423 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4424 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4425 } else { 4426 int vlen_enc = Assembler::AVX_512bit; 4427 __ movdl($dst$$XMMRegister, $src$$Register); 4428 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4429 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4430 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4431 } 4432 %} 4433 ins_pipe( pipe_slow ); 4434 %} 4435 #endif // _LP64 4436 4437 instruct ReplL_mem(vec dst, memory mem) %{ 4438 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4439 match(Set dst (Replicate (LoadL mem))); 4440 format %{ "replicateL $dst,$mem" %} 4441 ins_encode %{ 4442 int vlen_enc = vector_length_encoding(this); 4443 if (VM_Version::supports_avx2()) { 4444 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4445 } else if (VM_Version::supports_sse3()) { 4446 __ movddup($dst$$XMMRegister, $mem$$Address); 4447 } else { 4448 __ movq($dst$$XMMRegister, $mem$$Address); 4449 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4450 } 4451 %} 4452 ins_pipe( pipe_slow ); 4453 %} 4454 4455 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4456 instruct ReplL_imm(vec dst, immL con) %{ 4457 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4458 match(Set dst (Replicate con)); 4459 format %{ "replicateL $dst,$con" %} 4460 ins_encode %{ 4461 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, 1)); 4462 int vlen = Matcher::vector_length_in_bytes(this); 4463 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4464 %} 4465 ins_pipe( pipe_slow ); 4466 %} 4467 4468 instruct ReplL_zero(vec dst, immL0 zero) %{ 4469 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4470 match(Set dst (Replicate zero)); 4471 format %{ "replicateL $dst,$zero" %} 4472 ins_encode %{ 4473 int vlen_enc = vector_length_encoding(this); 4474 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4475 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4476 } else { 4477 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4478 } 4479 %} 4480 ins_pipe( fpu_reg_reg ); 4481 %} 4482 4483 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4484 predicate(UseSSE >= 2 && Matcher::vector_element_basic_type(n) == T_LONG); 4485 match(Set dst (Replicate con)); 4486 format %{ "vallones $dst" %} 4487 ins_encode %{ 4488 int vector_len = vector_length_encoding(this); 4489 __ vallones($dst$$XMMRegister, vector_len); 4490 %} 4491 ins_pipe( pipe_slow ); 4492 %} 4493 4494 // ====================ReplicateF======================================= 4495 4496 instruct vReplF_reg(vec dst, vlRegF src) %{ 4497 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4498 match(Set dst (Replicate src)); 4499 format %{ "replicateF $dst,$src" %} 4500 ins_encode %{ 4501 uint vlen = Matcher::vector_length(this); 4502 int vlen_enc = vector_length_encoding(this); 4503 if (vlen <= 4) { 4504 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4505 } else if (VM_Version::supports_avx2()) { 4506 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4507 } else { 4508 assert(vlen == 8, "sanity"); 4509 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4510 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4511 } 4512 %} 4513 ins_pipe( pipe_slow ); 4514 %} 4515 4516 instruct ReplF_reg(vec dst, vlRegF src) %{ 4517 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4518 match(Set dst (Replicate src)); 4519 format %{ "replicateF $dst,$src" %} 4520 ins_encode %{ 4521 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4522 %} 4523 ins_pipe( pipe_slow ); 4524 %} 4525 4526 instruct ReplF_mem(vec dst, memory mem) %{ 4527 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4528 match(Set dst (Replicate (LoadF mem))); 4529 format %{ "replicateF $dst,$mem" %} 4530 ins_encode %{ 4531 int vlen_enc = vector_length_encoding(this); 4532 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4533 %} 4534 ins_pipe( pipe_slow ); 4535 %} 4536 4537 // Replicate float scalar immediate to be vector by loading from const table. 4538 instruct ReplF_imm(vec dst, immF con) %{ 4539 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4540 match(Set dst (Replicate con)); 4541 format %{ "replicateF $dst,$con" %} 4542 ins_encode %{ 4543 InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant, 4544 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 2)); 4545 int vlen = Matcher::vector_length_in_bytes(this); 4546 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4547 %} 4548 ins_pipe( pipe_slow ); 4549 %} 4550 4551 instruct ReplF_zero(vec dst, immF0 zero) %{ 4552 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4553 match(Set dst (Replicate zero)); 4554 format %{ "replicateF $dst,$zero" %} 4555 ins_encode %{ 4556 int vlen_enc = vector_length_encoding(this); 4557 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4558 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4559 } else { 4560 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4561 } 4562 %} 4563 ins_pipe( fpu_reg_reg ); 4564 %} 4565 4566 // ====================ReplicateD======================================= 4567 4568 // Replicate double (8 bytes) scalar to be vector 4569 instruct vReplD_reg(vec dst, vlRegD src) %{ 4570 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4571 match(Set dst (Replicate src)); 4572 format %{ "replicateD $dst,$src" %} 4573 ins_encode %{ 4574 uint vlen = Matcher::vector_length(this); 4575 int vlen_enc = vector_length_encoding(this); 4576 if (vlen <= 2) { 4577 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4578 } else if (VM_Version::supports_avx2()) { 4579 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4580 } else { 4581 assert(vlen == 4, "sanity"); 4582 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4583 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4584 } 4585 %} 4586 ins_pipe( pipe_slow ); 4587 %} 4588 4589 instruct ReplD_reg(vec dst, vlRegD src) %{ 4590 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4591 match(Set dst (Replicate src)); 4592 format %{ "replicateD $dst,$src" %} 4593 ins_encode %{ 4594 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4595 %} 4596 ins_pipe( pipe_slow ); 4597 %} 4598 4599 instruct ReplD_mem(vec dst, memory mem) %{ 4600 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4601 match(Set dst (Replicate (LoadD mem))); 4602 format %{ "replicateD $dst,$mem" %} 4603 ins_encode %{ 4604 if (Matcher::vector_length(this) >= 4) { 4605 int vlen_enc = vector_length_encoding(this); 4606 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4607 } else { 4608 __ movddup($dst$$XMMRegister, $mem$$Address); 4609 } 4610 %} 4611 ins_pipe( pipe_slow ); 4612 %} 4613 4614 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4615 instruct ReplD_imm(vec dst, immD con) %{ 4616 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4617 match(Set dst (Replicate con)); 4618 format %{ "replicateD $dst,$con" %} 4619 ins_encode %{ 4620 InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, 1)); 4621 int vlen = Matcher::vector_length_in_bytes(this); 4622 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4623 %} 4624 ins_pipe( pipe_slow ); 4625 %} 4626 4627 instruct ReplD_zero(vec dst, immD0 zero) %{ 4628 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4629 match(Set dst (Replicate zero)); 4630 format %{ "replicateD $dst,$zero" %} 4631 ins_encode %{ 4632 int vlen_enc = vector_length_encoding(this); 4633 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4634 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4635 } else { 4636 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4637 } 4638 %} 4639 ins_pipe( fpu_reg_reg ); 4640 %} 4641 4642 // ====================VECTOR INSERT======================================= 4643 4644 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4645 predicate(Matcher::vector_length_in_bytes(n) < 32); 4646 match(Set dst (VectorInsert (Binary dst val) idx)); 4647 format %{ "vector_insert $dst,$val,$idx" %} 4648 ins_encode %{ 4649 assert(UseSSE >= 4, "required"); 4650 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4651 4652 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4653 4654 assert(is_integral_type(elem_bt), ""); 4655 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4656 4657 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4658 %} 4659 ins_pipe( pipe_slow ); 4660 %} 4661 4662 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4663 predicate(Matcher::vector_length_in_bytes(n) == 32); 4664 match(Set dst (VectorInsert (Binary src val) idx)); 4665 effect(TEMP vtmp); 4666 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4667 ins_encode %{ 4668 int vlen_enc = Assembler::AVX_256bit; 4669 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4670 int elem_per_lane = 16/type2aelembytes(elem_bt); 4671 int log2epr = log2(elem_per_lane); 4672 4673 assert(is_integral_type(elem_bt), "sanity"); 4674 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4675 4676 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4677 uint y_idx = ($idx$$constant >> log2epr) & 1; 4678 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4679 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4680 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4681 %} 4682 ins_pipe( pipe_slow ); 4683 %} 4684 4685 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4686 predicate(Matcher::vector_length_in_bytes(n) == 64); 4687 match(Set dst (VectorInsert (Binary src val) idx)); 4688 effect(TEMP vtmp); 4689 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4690 ins_encode %{ 4691 assert(UseAVX > 2, "sanity"); 4692 4693 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4694 int elem_per_lane = 16/type2aelembytes(elem_bt); 4695 int log2epr = log2(elem_per_lane); 4696 4697 assert(is_integral_type(elem_bt), ""); 4698 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4699 4700 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4701 uint y_idx = ($idx$$constant >> log2epr) & 3; 4702 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4703 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4704 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4705 %} 4706 ins_pipe( pipe_slow ); 4707 %} 4708 4709 #ifdef _LP64 4710 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4711 predicate(Matcher::vector_length(n) == 2); 4712 match(Set dst (VectorInsert (Binary dst val) idx)); 4713 format %{ "vector_insert $dst,$val,$idx" %} 4714 ins_encode %{ 4715 assert(UseSSE >= 4, "required"); 4716 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4717 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4718 4719 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4720 %} 4721 ins_pipe( pipe_slow ); 4722 %} 4723 4724 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4725 predicate(Matcher::vector_length(n) == 4); 4726 match(Set dst (VectorInsert (Binary src val) idx)); 4727 effect(TEMP vtmp); 4728 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4729 ins_encode %{ 4730 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4731 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4732 4733 uint x_idx = $idx$$constant & right_n_bits(1); 4734 uint y_idx = ($idx$$constant >> 1) & 1; 4735 int vlen_enc = Assembler::AVX_256bit; 4736 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4737 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4738 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4739 %} 4740 ins_pipe( pipe_slow ); 4741 %} 4742 4743 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4744 predicate(Matcher::vector_length(n) == 8); 4745 match(Set dst (VectorInsert (Binary src val) idx)); 4746 effect(TEMP vtmp); 4747 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4748 ins_encode %{ 4749 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4750 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4751 4752 uint x_idx = $idx$$constant & right_n_bits(1); 4753 uint y_idx = ($idx$$constant >> 1) & 3; 4754 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4755 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4756 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4757 %} 4758 ins_pipe( pipe_slow ); 4759 %} 4760 #endif 4761 4762 instruct insertF(vec dst, regF val, immU8 idx) %{ 4763 predicate(Matcher::vector_length(n) < 8); 4764 match(Set dst (VectorInsert (Binary dst val) idx)); 4765 format %{ "vector_insert $dst,$val,$idx" %} 4766 ins_encode %{ 4767 assert(UseSSE >= 4, "sanity"); 4768 4769 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4770 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4771 4772 uint x_idx = $idx$$constant & right_n_bits(2); 4773 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4774 %} 4775 ins_pipe( pipe_slow ); 4776 %} 4777 4778 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4779 predicate(Matcher::vector_length(n) >= 8); 4780 match(Set dst (VectorInsert (Binary src val) idx)); 4781 effect(TEMP vtmp); 4782 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4783 ins_encode %{ 4784 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4785 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4786 4787 int vlen = Matcher::vector_length(this); 4788 uint x_idx = $idx$$constant & right_n_bits(2); 4789 if (vlen == 8) { 4790 uint y_idx = ($idx$$constant >> 2) & 1; 4791 int vlen_enc = Assembler::AVX_256bit; 4792 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4793 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4794 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4795 } else { 4796 assert(vlen == 16, "sanity"); 4797 uint y_idx = ($idx$$constant >> 2) & 3; 4798 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4799 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4800 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4801 } 4802 %} 4803 ins_pipe( pipe_slow ); 4804 %} 4805 4806 #ifdef _LP64 4807 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4808 predicate(Matcher::vector_length(n) == 2); 4809 match(Set dst (VectorInsert (Binary dst val) idx)); 4810 effect(TEMP tmp); 4811 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4812 ins_encode %{ 4813 assert(UseSSE >= 4, "sanity"); 4814 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4815 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4816 4817 __ movq($tmp$$Register, $val$$XMMRegister); 4818 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4819 %} 4820 ins_pipe( pipe_slow ); 4821 %} 4822 4823 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4824 predicate(Matcher::vector_length(n) == 4); 4825 match(Set dst (VectorInsert (Binary src val) idx)); 4826 effect(TEMP vtmp, TEMP tmp); 4827 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4828 ins_encode %{ 4829 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4830 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4831 4832 uint x_idx = $idx$$constant & right_n_bits(1); 4833 uint y_idx = ($idx$$constant >> 1) & 1; 4834 int vlen_enc = Assembler::AVX_256bit; 4835 __ movq($tmp$$Register, $val$$XMMRegister); 4836 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4837 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4838 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4839 %} 4840 ins_pipe( pipe_slow ); 4841 %} 4842 4843 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4844 predicate(Matcher::vector_length(n) == 8); 4845 match(Set dst (VectorInsert (Binary src val) idx)); 4846 effect(TEMP tmp, TEMP vtmp); 4847 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4848 ins_encode %{ 4849 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4850 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4851 4852 uint x_idx = $idx$$constant & right_n_bits(1); 4853 uint y_idx = ($idx$$constant >> 1) & 3; 4854 __ movq($tmp$$Register, $val$$XMMRegister); 4855 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4856 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4857 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4858 %} 4859 ins_pipe( pipe_slow ); 4860 %} 4861 #endif 4862 4863 // ====================REDUCTION ARITHMETIC======================================= 4864 4865 // =======================Int Reduction========================================== 4866 4867 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4868 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 4869 match(Set dst (AddReductionVI src1 src2)); 4870 match(Set dst (MulReductionVI src1 src2)); 4871 match(Set dst (AndReductionV src1 src2)); 4872 match(Set dst ( OrReductionV src1 src2)); 4873 match(Set dst (XorReductionV src1 src2)); 4874 match(Set dst (MinReductionV src1 src2)); 4875 match(Set dst (MaxReductionV src1 src2)); 4876 effect(TEMP vtmp1, TEMP vtmp2); 4877 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4878 ins_encode %{ 4879 int opcode = this->ideal_Opcode(); 4880 int vlen = Matcher::vector_length(this, $src2); 4881 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4882 %} 4883 ins_pipe( pipe_slow ); 4884 %} 4885 4886 // =======================Long Reduction========================================== 4887 4888 #ifdef _LP64 4889 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4890 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 4891 match(Set dst (AddReductionVL src1 src2)); 4892 match(Set dst (MulReductionVL src1 src2)); 4893 match(Set dst (AndReductionV src1 src2)); 4894 match(Set dst ( OrReductionV src1 src2)); 4895 match(Set dst (XorReductionV src1 src2)); 4896 match(Set dst (MinReductionV src1 src2)); 4897 match(Set dst (MaxReductionV src1 src2)); 4898 effect(TEMP vtmp1, TEMP vtmp2); 4899 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4900 ins_encode %{ 4901 int opcode = this->ideal_Opcode(); 4902 int vlen = Matcher::vector_length(this, $src2); 4903 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4904 %} 4905 ins_pipe( pipe_slow ); 4906 %} 4907 4908 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 4909 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 4910 match(Set dst (AddReductionVL src1 src2)); 4911 match(Set dst (MulReductionVL src1 src2)); 4912 match(Set dst (AndReductionV src1 src2)); 4913 match(Set dst ( OrReductionV src1 src2)); 4914 match(Set dst (XorReductionV src1 src2)); 4915 match(Set dst (MinReductionV src1 src2)); 4916 match(Set dst (MaxReductionV src1 src2)); 4917 effect(TEMP vtmp1, TEMP vtmp2); 4918 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4919 ins_encode %{ 4920 int opcode = this->ideal_Opcode(); 4921 int vlen = Matcher::vector_length(this, $src2); 4922 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4923 %} 4924 ins_pipe( pipe_slow ); 4925 %} 4926 #endif // _LP64 4927 4928 // =======================Float Reduction========================================== 4929 4930 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 4931 predicate(Matcher::vector_length(n->in(2)) <= 4); // src 4932 match(Set dst (AddReductionVF dst src)); 4933 match(Set dst (MulReductionVF dst src)); 4934 effect(TEMP dst, TEMP vtmp); 4935 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 4936 ins_encode %{ 4937 int opcode = this->ideal_Opcode(); 4938 int vlen = Matcher::vector_length(this, $src); 4939 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4940 %} 4941 ins_pipe( pipe_slow ); 4942 %} 4943 4944 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 4945 predicate(Matcher::vector_length(n->in(2)) == 8); // src 4946 match(Set dst (AddReductionVF dst src)); 4947 match(Set dst (MulReductionVF dst src)); 4948 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4949 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4950 ins_encode %{ 4951 int opcode = this->ideal_Opcode(); 4952 int vlen = Matcher::vector_length(this, $src); 4953 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4954 %} 4955 ins_pipe( pipe_slow ); 4956 %} 4957 4958 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4959 predicate(Matcher::vector_length(n->in(2)) == 16); // src 4960 match(Set dst (AddReductionVF dst src)); 4961 match(Set dst (MulReductionVF dst src)); 4962 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4963 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4964 ins_encode %{ 4965 int opcode = this->ideal_Opcode(); 4966 int vlen = Matcher::vector_length(this, $src); 4967 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4968 %} 4969 ins_pipe( pipe_slow ); 4970 %} 4971 4972 // =======================Double Reduction========================================== 4973 4974 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 4975 predicate(Matcher::vector_length(n->in(2)) == 2); // src 4976 match(Set dst (AddReductionVD dst src)); 4977 match(Set dst (MulReductionVD dst src)); 4978 effect(TEMP dst, TEMP vtmp); 4979 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 4980 ins_encode %{ 4981 int opcode = this->ideal_Opcode(); 4982 int vlen = Matcher::vector_length(this, $src); 4983 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4984 %} 4985 ins_pipe( pipe_slow ); 4986 %} 4987 4988 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 4989 predicate(Matcher::vector_length(n->in(2)) == 4); // src 4990 match(Set dst (AddReductionVD dst src)); 4991 match(Set dst (MulReductionVD dst src)); 4992 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4993 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4994 ins_encode %{ 4995 int opcode = this->ideal_Opcode(); 4996 int vlen = Matcher::vector_length(this, $src); 4997 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4998 %} 4999 ins_pipe( pipe_slow ); 5000 %} 5001 5002 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5003 predicate(Matcher::vector_length(n->in(2)) == 8); // src 5004 match(Set dst (AddReductionVD dst src)); 5005 match(Set dst (MulReductionVD dst src)); 5006 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5007 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5008 ins_encode %{ 5009 int opcode = this->ideal_Opcode(); 5010 int vlen = Matcher::vector_length(this, $src); 5011 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5012 %} 5013 ins_pipe( pipe_slow ); 5014 %} 5015 5016 // =======================Byte Reduction========================================== 5017 5018 #ifdef _LP64 5019 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5020 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5021 match(Set dst (AddReductionVI src1 src2)); 5022 match(Set dst (AndReductionV src1 src2)); 5023 match(Set dst ( OrReductionV src1 src2)); 5024 match(Set dst (XorReductionV src1 src2)); 5025 match(Set dst (MinReductionV src1 src2)); 5026 match(Set dst (MaxReductionV src1 src2)); 5027 effect(TEMP vtmp1, TEMP vtmp2); 5028 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5029 ins_encode %{ 5030 int opcode = this->ideal_Opcode(); 5031 int vlen = Matcher::vector_length(this, $src2); 5032 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5033 %} 5034 ins_pipe( pipe_slow ); 5035 %} 5036 5037 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5038 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5039 match(Set dst (AddReductionVI src1 src2)); 5040 match(Set dst (AndReductionV src1 src2)); 5041 match(Set dst ( OrReductionV src1 src2)); 5042 match(Set dst (XorReductionV src1 src2)); 5043 match(Set dst (MinReductionV src1 src2)); 5044 match(Set dst (MaxReductionV src1 src2)); 5045 effect(TEMP vtmp1, TEMP vtmp2); 5046 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5047 ins_encode %{ 5048 int opcode = this->ideal_Opcode(); 5049 int vlen = Matcher::vector_length(this, $src2); 5050 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5051 %} 5052 ins_pipe( pipe_slow ); 5053 %} 5054 #endif 5055 5056 // =======================Short Reduction========================================== 5057 5058 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5059 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5060 match(Set dst (AddReductionVI src1 src2)); 5061 match(Set dst (MulReductionVI src1 src2)); 5062 match(Set dst (AndReductionV src1 src2)); 5063 match(Set dst ( OrReductionV src1 src2)); 5064 match(Set dst (XorReductionV src1 src2)); 5065 match(Set dst (MinReductionV src1 src2)); 5066 match(Set dst (MaxReductionV src1 src2)); 5067 effect(TEMP vtmp1, TEMP vtmp2); 5068 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5069 ins_encode %{ 5070 int opcode = this->ideal_Opcode(); 5071 int vlen = Matcher::vector_length(this, $src2); 5072 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5073 %} 5074 ins_pipe( pipe_slow ); 5075 %} 5076 5077 // =======================Mul Reduction========================================== 5078 5079 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5080 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5081 Matcher::vector_length(n->in(2)) <= 32); // src2 5082 match(Set dst (MulReductionVI src1 src2)); 5083 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5084 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5085 ins_encode %{ 5086 int opcode = this->ideal_Opcode(); 5087 int vlen = Matcher::vector_length(this, $src2); 5088 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5089 %} 5090 ins_pipe( pipe_slow ); 5091 %} 5092 5093 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5094 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5095 Matcher::vector_length(n->in(2)) == 64); // src2 5096 match(Set dst (MulReductionVI src1 src2)); 5097 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5098 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5099 ins_encode %{ 5100 int opcode = this->ideal_Opcode(); 5101 int vlen = Matcher::vector_length(this, $src2); 5102 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5103 %} 5104 ins_pipe( pipe_slow ); 5105 %} 5106 5107 //--------------------Min/Max Float Reduction -------------------- 5108 // Float Min Reduction 5109 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 5110 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5111 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5112 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5113 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5114 Matcher::vector_length(n->in(2)) == 2); 5115 match(Set dst (MinReductionV src1 src2)); 5116 match(Set dst (MaxReductionV src1 src2)); 5117 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5118 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5119 ins_encode %{ 5120 assert(UseAVX > 0, "sanity"); 5121 5122 int opcode = this->ideal_Opcode(); 5123 int vlen = Matcher::vector_length(this, $src2); 5124 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5125 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5126 %} 5127 ins_pipe( pipe_slow ); 5128 %} 5129 5130 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5131 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5132 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5133 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5134 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5135 Matcher::vector_length(n->in(2)) >= 4); 5136 match(Set dst (MinReductionV src1 src2)); 5137 match(Set dst (MaxReductionV src1 src2)); 5138 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5139 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5140 ins_encode %{ 5141 assert(UseAVX > 0, "sanity"); 5142 5143 int opcode = this->ideal_Opcode(); 5144 int vlen = Matcher::vector_length(this, $src2); 5145 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5146 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5147 %} 5148 ins_pipe( pipe_slow ); 5149 %} 5150 5151 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 5152 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5153 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5154 Matcher::vector_length(n->in(2)) == 2); 5155 match(Set dst (MinReductionV dst src)); 5156 match(Set dst (MaxReductionV dst src)); 5157 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5158 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5159 ins_encode %{ 5160 assert(UseAVX > 0, "sanity"); 5161 5162 int opcode = this->ideal_Opcode(); 5163 int vlen = Matcher::vector_length(this, $src); 5164 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5165 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5166 %} 5167 ins_pipe( pipe_slow ); 5168 %} 5169 5170 5171 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5172 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5173 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5174 Matcher::vector_length(n->in(2)) >= 4); 5175 match(Set dst (MinReductionV dst src)); 5176 match(Set dst (MaxReductionV dst src)); 5177 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5178 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5179 ins_encode %{ 5180 assert(UseAVX > 0, "sanity"); 5181 5182 int opcode = this->ideal_Opcode(); 5183 int vlen = Matcher::vector_length(this, $src); 5184 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5185 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5186 %} 5187 ins_pipe( pipe_slow ); 5188 %} 5189 5190 5191 //--------------------Min Double Reduction -------------------- 5192 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5193 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5194 rFlagsReg cr) %{ 5195 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5196 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5197 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5198 Matcher::vector_length(n->in(2)) == 2); 5199 match(Set dst (MinReductionV src1 src2)); 5200 match(Set dst (MaxReductionV src1 src2)); 5201 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5202 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5203 ins_encode %{ 5204 assert(UseAVX > 0, "sanity"); 5205 5206 int opcode = this->ideal_Opcode(); 5207 int vlen = Matcher::vector_length(this, $src2); 5208 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5209 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5210 %} 5211 ins_pipe( pipe_slow ); 5212 %} 5213 5214 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5215 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5216 rFlagsReg cr) %{ 5217 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5218 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5219 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5220 Matcher::vector_length(n->in(2)) >= 4); 5221 match(Set dst (MinReductionV src1 src2)); 5222 match(Set dst (MaxReductionV src1 src2)); 5223 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5224 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5225 ins_encode %{ 5226 assert(UseAVX > 0, "sanity"); 5227 5228 int opcode = this->ideal_Opcode(); 5229 int vlen = Matcher::vector_length(this, $src2); 5230 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5231 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5232 %} 5233 ins_pipe( pipe_slow ); 5234 %} 5235 5236 5237 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5238 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5239 rFlagsReg cr) %{ 5240 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5241 Matcher::vector_length(n->in(2)) == 2); 5242 match(Set dst (MinReductionV dst src)); 5243 match(Set dst (MaxReductionV dst src)); 5244 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5245 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5246 ins_encode %{ 5247 assert(UseAVX > 0, "sanity"); 5248 5249 int opcode = this->ideal_Opcode(); 5250 int vlen = Matcher::vector_length(this, $src); 5251 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5252 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5253 %} 5254 ins_pipe( pipe_slow ); 5255 %} 5256 5257 instruct minmax_reductionD_av(legRegD dst, legVec src, 5258 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5259 rFlagsReg cr) %{ 5260 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5261 Matcher::vector_length(n->in(2)) >= 4); 5262 match(Set dst (MinReductionV dst src)); 5263 match(Set dst (MaxReductionV dst src)); 5264 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5265 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5266 ins_encode %{ 5267 assert(UseAVX > 0, "sanity"); 5268 5269 int opcode = this->ideal_Opcode(); 5270 int vlen = Matcher::vector_length(this, $src); 5271 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5272 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5273 %} 5274 ins_pipe( pipe_slow ); 5275 %} 5276 5277 // ====================VECTOR ARITHMETIC======================================= 5278 5279 // --------------------------------- ADD -------------------------------------- 5280 5281 // Bytes vector add 5282 instruct vaddB(vec dst, vec src) %{ 5283 predicate(UseAVX == 0); 5284 match(Set dst (AddVB dst src)); 5285 format %{ "paddb $dst,$src\t! add packedB" %} 5286 ins_encode %{ 5287 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5288 %} 5289 ins_pipe( pipe_slow ); 5290 %} 5291 5292 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5293 predicate(UseAVX > 0); 5294 match(Set dst (AddVB src1 src2)); 5295 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5296 ins_encode %{ 5297 int vlen_enc = vector_length_encoding(this); 5298 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5299 %} 5300 ins_pipe( pipe_slow ); 5301 %} 5302 5303 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5304 predicate((UseAVX > 0) && 5305 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5306 match(Set dst (AddVB src (LoadVector mem))); 5307 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5308 ins_encode %{ 5309 int vlen_enc = vector_length_encoding(this); 5310 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5311 %} 5312 ins_pipe( pipe_slow ); 5313 %} 5314 5315 // Shorts/Chars vector add 5316 instruct vaddS(vec dst, vec src) %{ 5317 predicate(UseAVX == 0); 5318 match(Set dst (AddVS dst src)); 5319 format %{ "paddw $dst,$src\t! add packedS" %} 5320 ins_encode %{ 5321 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5322 %} 5323 ins_pipe( pipe_slow ); 5324 %} 5325 5326 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5327 predicate(UseAVX > 0); 5328 match(Set dst (AddVS src1 src2)); 5329 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5330 ins_encode %{ 5331 int vlen_enc = vector_length_encoding(this); 5332 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5333 %} 5334 ins_pipe( pipe_slow ); 5335 %} 5336 5337 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5338 predicate((UseAVX > 0) && 5339 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5340 match(Set dst (AddVS src (LoadVector mem))); 5341 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5342 ins_encode %{ 5343 int vlen_enc = vector_length_encoding(this); 5344 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5345 %} 5346 ins_pipe( pipe_slow ); 5347 %} 5348 5349 // Integers vector add 5350 instruct vaddI(vec dst, vec src) %{ 5351 predicate(UseAVX == 0); 5352 match(Set dst (AddVI dst src)); 5353 format %{ "paddd $dst,$src\t! add packedI" %} 5354 ins_encode %{ 5355 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5356 %} 5357 ins_pipe( pipe_slow ); 5358 %} 5359 5360 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5361 predicate(UseAVX > 0); 5362 match(Set dst (AddVI src1 src2)); 5363 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5364 ins_encode %{ 5365 int vlen_enc = vector_length_encoding(this); 5366 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5367 %} 5368 ins_pipe( pipe_slow ); 5369 %} 5370 5371 5372 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5373 predicate((UseAVX > 0) && 5374 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5375 match(Set dst (AddVI src (LoadVector mem))); 5376 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5377 ins_encode %{ 5378 int vlen_enc = vector_length_encoding(this); 5379 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5380 %} 5381 ins_pipe( pipe_slow ); 5382 %} 5383 5384 // Longs vector add 5385 instruct vaddL(vec dst, vec src) %{ 5386 predicate(UseAVX == 0); 5387 match(Set dst (AddVL dst src)); 5388 format %{ "paddq $dst,$src\t! add packedL" %} 5389 ins_encode %{ 5390 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5391 %} 5392 ins_pipe( pipe_slow ); 5393 %} 5394 5395 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5396 predicate(UseAVX > 0); 5397 match(Set dst (AddVL src1 src2)); 5398 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5399 ins_encode %{ 5400 int vlen_enc = vector_length_encoding(this); 5401 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5402 %} 5403 ins_pipe( pipe_slow ); 5404 %} 5405 5406 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5407 predicate((UseAVX > 0) && 5408 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5409 match(Set dst (AddVL src (LoadVector mem))); 5410 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5411 ins_encode %{ 5412 int vlen_enc = vector_length_encoding(this); 5413 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5414 %} 5415 ins_pipe( pipe_slow ); 5416 %} 5417 5418 // Floats vector add 5419 instruct vaddF(vec dst, vec src) %{ 5420 predicate(UseAVX == 0); 5421 match(Set dst (AddVF dst src)); 5422 format %{ "addps $dst,$src\t! add packedF" %} 5423 ins_encode %{ 5424 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5425 %} 5426 ins_pipe( pipe_slow ); 5427 %} 5428 5429 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5430 predicate(UseAVX > 0); 5431 match(Set dst (AddVF src1 src2)); 5432 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5433 ins_encode %{ 5434 int vlen_enc = vector_length_encoding(this); 5435 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5436 %} 5437 ins_pipe( pipe_slow ); 5438 %} 5439 5440 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5441 predicate((UseAVX > 0) && 5442 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5443 match(Set dst (AddVF src (LoadVector mem))); 5444 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5445 ins_encode %{ 5446 int vlen_enc = vector_length_encoding(this); 5447 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5448 %} 5449 ins_pipe( pipe_slow ); 5450 %} 5451 5452 // Doubles vector add 5453 instruct vaddD(vec dst, vec src) %{ 5454 predicate(UseAVX == 0); 5455 match(Set dst (AddVD dst src)); 5456 format %{ "addpd $dst,$src\t! add packedD" %} 5457 ins_encode %{ 5458 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5459 %} 5460 ins_pipe( pipe_slow ); 5461 %} 5462 5463 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5464 predicate(UseAVX > 0); 5465 match(Set dst (AddVD src1 src2)); 5466 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5467 ins_encode %{ 5468 int vlen_enc = vector_length_encoding(this); 5469 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5470 %} 5471 ins_pipe( pipe_slow ); 5472 %} 5473 5474 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5475 predicate((UseAVX > 0) && 5476 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5477 match(Set dst (AddVD src (LoadVector mem))); 5478 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5479 ins_encode %{ 5480 int vlen_enc = vector_length_encoding(this); 5481 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5482 %} 5483 ins_pipe( pipe_slow ); 5484 %} 5485 5486 // --------------------------------- SUB -------------------------------------- 5487 5488 // Bytes vector sub 5489 instruct vsubB(vec dst, vec src) %{ 5490 predicate(UseAVX == 0); 5491 match(Set dst (SubVB dst src)); 5492 format %{ "psubb $dst,$src\t! sub packedB" %} 5493 ins_encode %{ 5494 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5495 %} 5496 ins_pipe( pipe_slow ); 5497 %} 5498 5499 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5500 predicate(UseAVX > 0); 5501 match(Set dst (SubVB src1 src2)); 5502 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5503 ins_encode %{ 5504 int vlen_enc = vector_length_encoding(this); 5505 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5506 %} 5507 ins_pipe( pipe_slow ); 5508 %} 5509 5510 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5511 predicate((UseAVX > 0) && 5512 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5513 match(Set dst (SubVB src (LoadVector mem))); 5514 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5515 ins_encode %{ 5516 int vlen_enc = vector_length_encoding(this); 5517 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5518 %} 5519 ins_pipe( pipe_slow ); 5520 %} 5521 5522 // Shorts/Chars vector sub 5523 instruct vsubS(vec dst, vec src) %{ 5524 predicate(UseAVX == 0); 5525 match(Set dst (SubVS dst src)); 5526 format %{ "psubw $dst,$src\t! sub packedS" %} 5527 ins_encode %{ 5528 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5529 %} 5530 ins_pipe( pipe_slow ); 5531 %} 5532 5533 5534 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5535 predicate(UseAVX > 0); 5536 match(Set dst (SubVS src1 src2)); 5537 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5538 ins_encode %{ 5539 int vlen_enc = vector_length_encoding(this); 5540 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5541 %} 5542 ins_pipe( pipe_slow ); 5543 %} 5544 5545 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5546 predicate((UseAVX > 0) && 5547 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5548 match(Set dst (SubVS src (LoadVector mem))); 5549 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5550 ins_encode %{ 5551 int vlen_enc = vector_length_encoding(this); 5552 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5553 %} 5554 ins_pipe( pipe_slow ); 5555 %} 5556 5557 // Integers vector sub 5558 instruct vsubI(vec dst, vec src) %{ 5559 predicate(UseAVX == 0); 5560 match(Set dst (SubVI dst src)); 5561 format %{ "psubd $dst,$src\t! sub packedI" %} 5562 ins_encode %{ 5563 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5564 %} 5565 ins_pipe( pipe_slow ); 5566 %} 5567 5568 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5569 predicate(UseAVX > 0); 5570 match(Set dst (SubVI src1 src2)); 5571 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5572 ins_encode %{ 5573 int vlen_enc = vector_length_encoding(this); 5574 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5575 %} 5576 ins_pipe( pipe_slow ); 5577 %} 5578 5579 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5580 predicate((UseAVX > 0) && 5581 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5582 match(Set dst (SubVI src (LoadVector mem))); 5583 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5584 ins_encode %{ 5585 int vlen_enc = vector_length_encoding(this); 5586 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5587 %} 5588 ins_pipe( pipe_slow ); 5589 %} 5590 5591 // Longs vector sub 5592 instruct vsubL(vec dst, vec src) %{ 5593 predicate(UseAVX == 0); 5594 match(Set dst (SubVL dst src)); 5595 format %{ "psubq $dst,$src\t! sub packedL" %} 5596 ins_encode %{ 5597 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5598 %} 5599 ins_pipe( pipe_slow ); 5600 %} 5601 5602 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5603 predicate(UseAVX > 0); 5604 match(Set dst (SubVL src1 src2)); 5605 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5606 ins_encode %{ 5607 int vlen_enc = vector_length_encoding(this); 5608 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5609 %} 5610 ins_pipe( pipe_slow ); 5611 %} 5612 5613 5614 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5615 predicate((UseAVX > 0) && 5616 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5617 match(Set dst (SubVL src (LoadVector mem))); 5618 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5619 ins_encode %{ 5620 int vlen_enc = vector_length_encoding(this); 5621 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5622 %} 5623 ins_pipe( pipe_slow ); 5624 %} 5625 5626 // Floats vector sub 5627 instruct vsubF(vec dst, vec src) %{ 5628 predicate(UseAVX == 0); 5629 match(Set dst (SubVF dst src)); 5630 format %{ "subps $dst,$src\t! sub packedF" %} 5631 ins_encode %{ 5632 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5633 %} 5634 ins_pipe( pipe_slow ); 5635 %} 5636 5637 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5638 predicate(UseAVX > 0); 5639 match(Set dst (SubVF src1 src2)); 5640 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5641 ins_encode %{ 5642 int vlen_enc = vector_length_encoding(this); 5643 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5644 %} 5645 ins_pipe( pipe_slow ); 5646 %} 5647 5648 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5649 predicate((UseAVX > 0) && 5650 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5651 match(Set dst (SubVF src (LoadVector mem))); 5652 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5653 ins_encode %{ 5654 int vlen_enc = vector_length_encoding(this); 5655 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5656 %} 5657 ins_pipe( pipe_slow ); 5658 %} 5659 5660 // Doubles vector sub 5661 instruct vsubD(vec dst, vec src) %{ 5662 predicate(UseAVX == 0); 5663 match(Set dst (SubVD dst src)); 5664 format %{ "subpd $dst,$src\t! sub packedD" %} 5665 ins_encode %{ 5666 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5667 %} 5668 ins_pipe( pipe_slow ); 5669 %} 5670 5671 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5672 predicate(UseAVX > 0); 5673 match(Set dst (SubVD src1 src2)); 5674 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5675 ins_encode %{ 5676 int vlen_enc = vector_length_encoding(this); 5677 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5678 %} 5679 ins_pipe( pipe_slow ); 5680 %} 5681 5682 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 5683 predicate((UseAVX > 0) && 5684 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5685 match(Set dst (SubVD src (LoadVector mem))); 5686 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 5687 ins_encode %{ 5688 int vlen_enc = vector_length_encoding(this); 5689 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5690 %} 5691 ins_pipe( pipe_slow ); 5692 %} 5693 5694 // --------------------------------- MUL -------------------------------------- 5695 5696 // Byte vector mul 5697 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 5698 predicate(Matcher::vector_length_in_bytes(n) <= 8); 5699 match(Set dst (MulVB src1 src2)); 5700 effect(TEMP dst, TEMP xtmp); 5701 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5702 ins_encode %{ 5703 assert(UseSSE > 3, "required"); 5704 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 5705 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 5706 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5707 __ psllw($dst$$XMMRegister, 8); 5708 __ psrlw($dst$$XMMRegister, 8); 5709 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5710 %} 5711 ins_pipe( pipe_slow ); 5712 %} 5713 5714 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 5715 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 5716 match(Set dst (MulVB src1 src2)); 5717 effect(TEMP dst, TEMP xtmp); 5718 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5719 ins_encode %{ 5720 assert(UseSSE > 3, "required"); 5721 // Odd-index elements 5722 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 5723 __ psrlw($dst$$XMMRegister, 8); 5724 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 5725 __ psrlw($xtmp$$XMMRegister, 8); 5726 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5727 __ psllw($dst$$XMMRegister, 8); 5728 // Even-index elements 5729 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 5730 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 5731 __ psllw($xtmp$$XMMRegister, 8); 5732 __ psrlw($xtmp$$XMMRegister, 8); 5733 // Combine 5734 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 5735 %} 5736 ins_pipe( pipe_slow ); 5737 %} 5738 5739 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 5740 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 5741 match(Set dst (MulVB src1 src2)); 5742 effect(TEMP xtmp1, TEMP xtmp2); 5743 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 5744 ins_encode %{ 5745 int vlen_enc = vector_length_encoding(this); 5746 // Odd-index elements 5747 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 5748 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 5749 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5750 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 5751 // Even-index elements 5752 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5753 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5754 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5755 // Combine 5756 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5757 %} 5758 ins_pipe( pipe_slow ); 5759 %} 5760 5761 // Shorts/Chars vector mul 5762 instruct vmulS(vec dst, vec src) %{ 5763 predicate(UseAVX == 0); 5764 match(Set dst (MulVS dst src)); 5765 format %{ "pmullw $dst,$src\t! mul packedS" %} 5766 ins_encode %{ 5767 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 5768 %} 5769 ins_pipe( pipe_slow ); 5770 %} 5771 5772 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 5773 predicate(UseAVX > 0); 5774 match(Set dst (MulVS src1 src2)); 5775 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 5776 ins_encode %{ 5777 int vlen_enc = vector_length_encoding(this); 5778 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5779 %} 5780 ins_pipe( pipe_slow ); 5781 %} 5782 5783 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 5784 predicate((UseAVX > 0) && 5785 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5786 match(Set dst (MulVS src (LoadVector mem))); 5787 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 5788 ins_encode %{ 5789 int vlen_enc = vector_length_encoding(this); 5790 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5791 %} 5792 ins_pipe( pipe_slow ); 5793 %} 5794 5795 // Integers vector mul 5796 instruct vmulI(vec dst, vec src) %{ 5797 predicate(UseAVX == 0); 5798 match(Set dst (MulVI dst src)); 5799 format %{ "pmulld $dst,$src\t! mul packedI" %} 5800 ins_encode %{ 5801 assert(UseSSE > 3, "required"); 5802 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 5803 %} 5804 ins_pipe( pipe_slow ); 5805 %} 5806 5807 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 5808 predicate(UseAVX > 0); 5809 match(Set dst (MulVI src1 src2)); 5810 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 5811 ins_encode %{ 5812 int vlen_enc = vector_length_encoding(this); 5813 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5814 %} 5815 ins_pipe( pipe_slow ); 5816 %} 5817 5818 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 5819 predicate((UseAVX > 0) && 5820 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5821 match(Set dst (MulVI src (LoadVector mem))); 5822 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 5823 ins_encode %{ 5824 int vlen_enc = vector_length_encoding(this); 5825 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5826 %} 5827 ins_pipe( pipe_slow ); 5828 %} 5829 5830 // Longs vector mul 5831 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 5832 predicate((Matcher::vector_length_in_bytes(n) == 64 && 5833 VM_Version::supports_avx512dq()) || 5834 VM_Version::supports_avx512vldq()); 5835 match(Set dst (MulVL src1 src2)); 5836 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 5837 ins_encode %{ 5838 assert(UseAVX > 2, "required"); 5839 int vlen_enc = vector_length_encoding(this); 5840 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5841 %} 5842 ins_pipe( pipe_slow ); 5843 %} 5844 5845 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 5846 predicate((Matcher::vector_length_in_bytes(n) == 64 && 5847 VM_Version::supports_avx512dq()) || 5848 (Matcher::vector_length_in_bytes(n) > 8 && 5849 VM_Version::supports_avx512vldq())); 5850 match(Set dst (MulVL src (LoadVector mem))); 5851 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 5852 ins_encode %{ 5853 assert(UseAVX > 2, "required"); 5854 int vlen_enc = vector_length_encoding(this); 5855 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5856 %} 5857 ins_pipe( pipe_slow ); 5858 %} 5859 5860 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 5861 predicate(UseAVX == 0); 5862 match(Set dst (MulVL src1 src2)); 5863 effect(TEMP dst, TEMP xtmp); 5864 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5865 ins_encode %{ 5866 assert(VM_Version::supports_sse4_1(), "required"); 5867 // Get the lo-hi products, only the lower 32 bits is in concerns 5868 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 5869 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 5870 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 5871 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 5872 __ psllq($dst$$XMMRegister, 32); 5873 // Get the lo-lo products 5874 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 5875 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 5876 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 5877 %} 5878 ins_pipe( pipe_slow ); 5879 %} 5880 5881 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 5882 predicate(UseAVX > 0 && 5883 ((Matcher::vector_length_in_bytes(n) == 64 && 5884 !VM_Version::supports_avx512dq()) || 5885 (Matcher::vector_length_in_bytes(n) < 64 && 5886 !VM_Version::supports_avx512vldq()))); 5887 match(Set dst (MulVL src1 src2)); 5888 effect(TEMP xtmp1, TEMP xtmp2); 5889 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 5890 ins_encode %{ 5891 int vlen_enc = vector_length_encoding(this); 5892 // Get the lo-hi products, only the lower 32 bits is in concerns 5893 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 5894 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 5895 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 5896 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 5897 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 5898 // Get the lo-lo products 5899 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5900 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5901 %} 5902 ins_pipe( pipe_slow ); 5903 %} 5904 5905 // Floats vector mul 5906 instruct vmulF(vec dst, vec src) %{ 5907 predicate(UseAVX == 0); 5908 match(Set dst (MulVF dst src)); 5909 format %{ "mulps $dst,$src\t! mul packedF" %} 5910 ins_encode %{ 5911 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 5912 %} 5913 ins_pipe( pipe_slow ); 5914 %} 5915 5916 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 5917 predicate(UseAVX > 0); 5918 match(Set dst (MulVF src1 src2)); 5919 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 5920 ins_encode %{ 5921 int vlen_enc = vector_length_encoding(this); 5922 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5923 %} 5924 ins_pipe( pipe_slow ); 5925 %} 5926 5927 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 5928 predicate((UseAVX > 0) && 5929 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5930 match(Set dst (MulVF src (LoadVector mem))); 5931 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 5932 ins_encode %{ 5933 int vlen_enc = vector_length_encoding(this); 5934 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5935 %} 5936 ins_pipe( pipe_slow ); 5937 %} 5938 5939 // Doubles vector mul 5940 instruct vmulD(vec dst, vec src) %{ 5941 predicate(UseAVX == 0); 5942 match(Set dst (MulVD dst src)); 5943 format %{ "mulpd $dst,$src\t! mul packedD" %} 5944 ins_encode %{ 5945 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 5946 %} 5947 ins_pipe( pipe_slow ); 5948 %} 5949 5950 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 5951 predicate(UseAVX > 0); 5952 match(Set dst (MulVD src1 src2)); 5953 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 5954 ins_encode %{ 5955 int vlen_enc = vector_length_encoding(this); 5956 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5957 %} 5958 ins_pipe( pipe_slow ); 5959 %} 5960 5961 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 5962 predicate((UseAVX > 0) && 5963 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5964 match(Set dst (MulVD src (LoadVector mem))); 5965 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 5966 ins_encode %{ 5967 int vlen_enc = vector_length_encoding(this); 5968 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5969 %} 5970 ins_pipe( pipe_slow ); 5971 %} 5972 5973 // --------------------------------- DIV -------------------------------------- 5974 5975 // Floats vector div 5976 instruct vdivF(vec dst, vec src) %{ 5977 predicate(UseAVX == 0); 5978 match(Set dst (DivVF dst src)); 5979 format %{ "divps $dst,$src\t! div packedF" %} 5980 ins_encode %{ 5981 __ divps($dst$$XMMRegister, $src$$XMMRegister); 5982 %} 5983 ins_pipe( pipe_slow ); 5984 %} 5985 5986 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 5987 predicate(UseAVX > 0); 5988 match(Set dst (DivVF src1 src2)); 5989 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 5990 ins_encode %{ 5991 int vlen_enc = vector_length_encoding(this); 5992 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5993 %} 5994 ins_pipe( pipe_slow ); 5995 %} 5996 5997 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 5998 predicate((UseAVX > 0) && 5999 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6000 match(Set dst (DivVF src (LoadVector mem))); 6001 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6002 ins_encode %{ 6003 int vlen_enc = vector_length_encoding(this); 6004 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6005 %} 6006 ins_pipe( pipe_slow ); 6007 %} 6008 6009 // Doubles vector div 6010 instruct vdivD(vec dst, vec src) %{ 6011 predicate(UseAVX == 0); 6012 match(Set dst (DivVD dst src)); 6013 format %{ "divpd $dst,$src\t! div packedD" %} 6014 ins_encode %{ 6015 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6016 %} 6017 ins_pipe( pipe_slow ); 6018 %} 6019 6020 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6021 predicate(UseAVX > 0); 6022 match(Set dst (DivVD src1 src2)); 6023 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6024 ins_encode %{ 6025 int vlen_enc = vector_length_encoding(this); 6026 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6027 %} 6028 ins_pipe( pipe_slow ); 6029 %} 6030 6031 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6032 predicate((UseAVX > 0) && 6033 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6034 match(Set dst (DivVD src (LoadVector mem))); 6035 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6036 ins_encode %{ 6037 int vlen_enc = vector_length_encoding(this); 6038 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6039 %} 6040 ins_pipe( pipe_slow ); 6041 %} 6042 6043 // ------------------------------ MinMax --------------------------------------- 6044 6045 // Byte, Short, Int vector Min/Max 6046 instruct minmax_reg_sse(vec dst, vec src) %{ 6047 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6048 UseAVX == 0); 6049 match(Set dst (MinV dst src)); 6050 match(Set dst (MaxV dst src)); 6051 format %{ "vector_minmax $dst,$src\t! " %} 6052 ins_encode %{ 6053 assert(UseSSE >= 4, "required"); 6054 6055 int opcode = this->ideal_Opcode(); 6056 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6057 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6058 %} 6059 ins_pipe( pipe_slow ); 6060 %} 6061 6062 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6063 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6064 UseAVX > 0); 6065 match(Set dst (MinV src1 src2)); 6066 match(Set dst (MaxV src1 src2)); 6067 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6068 ins_encode %{ 6069 int opcode = this->ideal_Opcode(); 6070 int vlen_enc = vector_length_encoding(this); 6071 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6072 6073 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6074 %} 6075 ins_pipe( pipe_slow ); 6076 %} 6077 6078 // Long vector Min/Max 6079 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6080 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6081 UseAVX == 0); 6082 match(Set dst (MinV dst src)); 6083 match(Set dst (MaxV src dst)); 6084 effect(TEMP dst, TEMP tmp); 6085 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6086 ins_encode %{ 6087 assert(UseSSE >= 4, "required"); 6088 6089 int opcode = this->ideal_Opcode(); 6090 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6091 assert(elem_bt == T_LONG, "sanity"); 6092 6093 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6094 %} 6095 ins_pipe( pipe_slow ); 6096 %} 6097 6098 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6099 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6100 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6101 match(Set dst (MinV src1 src2)); 6102 match(Set dst (MaxV src1 src2)); 6103 effect(TEMP dst); 6104 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6105 ins_encode %{ 6106 int vlen_enc = vector_length_encoding(this); 6107 int opcode = this->ideal_Opcode(); 6108 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6109 assert(elem_bt == T_LONG, "sanity"); 6110 6111 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6112 %} 6113 ins_pipe( pipe_slow ); 6114 %} 6115 6116 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6117 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6118 Matcher::vector_element_basic_type(n) == T_LONG); 6119 match(Set dst (MinV src1 src2)); 6120 match(Set dst (MaxV src1 src2)); 6121 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6122 ins_encode %{ 6123 assert(UseAVX > 2, "required"); 6124 6125 int vlen_enc = vector_length_encoding(this); 6126 int opcode = this->ideal_Opcode(); 6127 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6128 assert(elem_bt == T_LONG, "sanity"); 6129 6130 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6131 %} 6132 ins_pipe( pipe_slow ); 6133 %} 6134 6135 // Float/Double vector Min/Max 6136 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6137 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6138 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6139 UseAVX > 0); 6140 match(Set dst (MinV a b)); 6141 match(Set dst (MaxV a b)); 6142 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6143 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6144 ins_encode %{ 6145 assert(UseAVX > 0, "required"); 6146 6147 int opcode = this->ideal_Opcode(); 6148 int vlen_enc = vector_length_encoding(this); 6149 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6150 6151 __ vminmax_fp(opcode, elem_bt, 6152 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6153 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6154 %} 6155 ins_pipe( pipe_slow ); 6156 %} 6157 6158 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6159 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6160 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6161 match(Set dst (MinV a b)); 6162 match(Set dst (MaxV a b)); 6163 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6164 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6165 ins_encode %{ 6166 assert(UseAVX > 2, "required"); 6167 6168 int opcode = this->ideal_Opcode(); 6169 int vlen_enc = vector_length_encoding(this); 6170 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6171 6172 __ evminmax_fp(opcode, elem_bt, 6173 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6174 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6175 %} 6176 ins_pipe( pipe_slow ); 6177 %} 6178 6179 // --------------------------------- Signum/CopySign --------------------------- 6180 6181 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6182 match(Set dst (SignumF dst (Binary zero one))); 6183 effect(KILL cr); 6184 format %{ "signumF $dst, $dst" %} 6185 ins_encode %{ 6186 int opcode = this->ideal_Opcode(); 6187 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6188 %} 6189 ins_pipe( pipe_slow ); 6190 %} 6191 6192 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6193 match(Set dst (SignumD dst (Binary zero one))); 6194 effect(KILL cr); 6195 format %{ "signumD $dst, $dst" %} 6196 ins_encode %{ 6197 int opcode = this->ideal_Opcode(); 6198 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6199 %} 6200 ins_pipe( pipe_slow ); 6201 %} 6202 6203 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6204 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6205 match(Set dst (SignumVF src (Binary zero one))); 6206 match(Set dst (SignumVD src (Binary zero one))); 6207 effect(TEMP dst, TEMP xtmp1); 6208 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6209 ins_encode %{ 6210 int opcode = this->ideal_Opcode(); 6211 int vec_enc = vector_length_encoding(this); 6212 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6213 $xtmp1$$XMMRegister, vec_enc); 6214 %} 6215 ins_pipe( pipe_slow ); 6216 %} 6217 6218 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6219 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6220 match(Set dst (SignumVF src (Binary zero one))); 6221 match(Set dst (SignumVD src (Binary zero one))); 6222 effect(TEMP dst, TEMP ktmp1); 6223 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6224 ins_encode %{ 6225 int opcode = this->ideal_Opcode(); 6226 int vec_enc = vector_length_encoding(this); 6227 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6228 $ktmp1$$KRegister, vec_enc); 6229 %} 6230 ins_pipe( pipe_slow ); 6231 %} 6232 6233 // --------------------------------------- 6234 // For copySign use 0xE4 as writemask for vpternlog 6235 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6236 // C (xmm2) is set to 0x7FFFFFFF 6237 // Wherever xmm2 is 0, we want to pick from B (sign) 6238 // Wherever xmm2 is 1, we want to pick from A (src) 6239 // 6240 // A B C Result 6241 // 0 0 0 0 6242 // 0 0 1 0 6243 // 0 1 0 1 6244 // 0 1 1 0 6245 // 1 0 0 0 6246 // 1 0 1 1 6247 // 1 1 0 1 6248 // 1 1 1 1 6249 // 6250 // Result going from high bit to low bit is 0x11100100 = 0xe4 6251 // --------------------------------------- 6252 6253 #ifdef _LP64 6254 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6255 match(Set dst (CopySignF dst src)); 6256 effect(TEMP tmp1, TEMP tmp2); 6257 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6258 ins_encode %{ 6259 __ movl($tmp2$$Register, 0x7FFFFFFF); 6260 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6261 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6262 %} 6263 ins_pipe( pipe_slow ); 6264 %} 6265 6266 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6267 match(Set dst (CopySignD dst (Binary src zero))); 6268 ins_cost(100); 6269 effect(TEMP tmp1, TEMP tmp2); 6270 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6271 ins_encode %{ 6272 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6273 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6274 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6275 %} 6276 ins_pipe( pipe_slow ); 6277 %} 6278 6279 #endif // _LP64 6280 6281 //----------------------------- CompressBits/ExpandBits ------------------------ 6282 6283 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6284 predicate(n->bottom_type()->isa_int()); 6285 match(Set dst (CompressBits src mask)); 6286 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6287 ins_encode %{ 6288 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6289 %} 6290 ins_pipe( pipe_slow ); 6291 %} 6292 6293 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6294 predicate(n->bottom_type()->isa_int()); 6295 match(Set dst (ExpandBits src mask)); 6296 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6297 ins_encode %{ 6298 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6299 %} 6300 ins_pipe( pipe_slow ); 6301 %} 6302 6303 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6304 predicate(n->bottom_type()->isa_int()); 6305 match(Set dst (CompressBits src (LoadI mask))); 6306 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6307 ins_encode %{ 6308 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6309 %} 6310 ins_pipe( pipe_slow ); 6311 %} 6312 6313 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6314 predicate(n->bottom_type()->isa_int()); 6315 match(Set dst (ExpandBits src (LoadI mask))); 6316 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6317 ins_encode %{ 6318 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6319 %} 6320 ins_pipe( pipe_slow ); 6321 %} 6322 6323 // --------------------------------- Sqrt -------------------------------------- 6324 6325 instruct vsqrtF_reg(vec dst, vec src) %{ 6326 match(Set dst (SqrtVF src)); 6327 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6328 ins_encode %{ 6329 assert(UseAVX > 0, "required"); 6330 int vlen_enc = vector_length_encoding(this); 6331 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6332 %} 6333 ins_pipe( pipe_slow ); 6334 %} 6335 6336 instruct vsqrtF_mem(vec dst, memory mem) %{ 6337 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6338 match(Set dst (SqrtVF (LoadVector mem))); 6339 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6340 ins_encode %{ 6341 assert(UseAVX > 0, "required"); 6342 int vlen_enc = vector_length_encoding(this); 6343 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6344 %} 6345 ins_pipe( pipe_slow ); 6346 %} 6347 6348 // Floating point vector sqrt 6349 instruct vsqrtD_reg(vec dst, vec src) %{ 6350 match(Set dst (SqrtVD src)); 6351 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6352 ins_encode %{ 6353 assert(UseAVX > 0, "required"); 6354 int vlen_enc = vector_length_encoding(this); 6355 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6356 %} 6357 ins_pipe( pipe_slow ); 6358 %} 6359 6360 instruct vsqrtD_mem(vec dst, memory mem) %{ 6361 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6362 match(Set dst (SqrtVD (LoadVector mem))); 6363 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6364 ins_encode %{ 6365 assert(UseAVX > 0, "required"); 6366 int vlen_enc = vector_length_encoding(this); 6367 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6368 %} 6369 ins_pipe( pipe_slow ); 6370 %} 6371 6372 // ------------------------------ Shift --------------------------------------- 6373 6374 // Left and right shift count vectors are the same on x86 6375 // (only lowest bits of xmm reg are used for count). 6376 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6377 match(Set dst (LShiftCntV cnt)); 6378 match(Set dst (RShiftCntV cnt)); 6379 format %{ "movdl $dst,$cnt\t! load shift count" %} 6380 ins_encode %{ 6381 __ movdl($dst$$XMMRegister, $cnt$$Register); 6382 %} 6383 ins_pipe( pipe_slow ); 6384 %} 6385 6386 // Byte vector shift 6387 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6388 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6389 match(Set dst ( LShiftVB src shift)); 6390 match(Set dst ( RShiftVB src shift)); 6391 match(Set dst (URShiftVB src shift)); 6392 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6393 format %{"vector_byte_shift $dst,$src,$shift" %} 6394 ins_encode %{ 6395 assert(UseSSE > 3, "required"); 6396 int opcode = this->ideal_Opcode(); 6397 bool sign = (opcode != Op_URShiftVB); 6398 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6399 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6400 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6401 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6402 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6403 %} 6404 ins_pipe( pipe_slow ); 6405 %} 6406 6407 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6408 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6409 UseAVX <= 1); 6410 match(Set dst ( LShiftVB src shift)); 6411 match(Set dst ( RShiftVB src shift)); 6412 match(Set dst (URShiftVB src shift)); 6413 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6414 format %{"vector_byte_shift $dst,$src,$shift" %} 6415 ins_encode %{ 6416 assert(UseSSE > 3, "required"); 6417 int opcode = this->ideal_Opcode(); 6418 bool sign = (opcode != Op_URShiftVB); 6419 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6420 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6421 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6422 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6423 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6424 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6425 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6426 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6427 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6428 %} 6429 ins_pipe( pipe_slow ); 6430 %} 6431 6432 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6433 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6434 UseAVX > 1); 6435 match(Set dst ( LShiftVB src shift)); 6436 match(Set dst ( RShiftVB src shift)); 6437 match(Set dst (URShiftVB src shift)); 6438 effect(TEMP dst, TEMP tmp); 6439 format %{"vector_byte_shift $dst,$src,$shift" %} 6440 ins_encode %{ 6441 int opcode = this->ideal_Opcode(); 6442 bool sign = (opcode != Op_URShiftVB); 6443 int vlen_enc = Assembler::AVX_256bit; 6444 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6445 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6446 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6447 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6448 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6449 %} 6450 ins_pipe( pipe_slow ); 6451 %} 6452 6453 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6454 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6455 match(Set dst ( LShiftVB src shift)); 6456 match(Set dst ( RShiftVB src shift)); 6457 match(Set dst (URShiftVB src shift)); 6458 effect(TEMP dst, TEMP tmp); 6459 format %{"vector_byte_shift $dst,$src,$shift" %} 6460 ins_encode %{ 6461 assert(UseAVX > 1, "required"); 6462 int opcode = this->ideal_Opcode(); 6463 bool sign = (opcode != Op_URShiftVB); 6464 int vlen_enc = Assembler::AVX_256bit; 6465 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6466 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6467 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6468 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6469 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6470 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6471 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6472 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6473 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6474 %} 6475 ins_pipe( pipe_slow ); 6476 %} 6477 6478 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6479 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6480 match(Set dst ( LShiftVB src shift)); 6481 match(Set dst (RShiftVB src shift)); 6482 match(Set dst (URShiftVB src shift)); 6483 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6484 format %{"vector_byte_shift $dst,$src,$shift" %} 6485 ins_encode %{ 6486 assert(UseAVX > 2, "required"); 6487 int opcode = this->ideal_Opcode(); 6488 bool sign = (opcode != Op_URShiftVB); 6489 int vlen_enc = Assembler::AVX_512bit; 6490 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6491 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6492 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6493 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6494 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6495 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6496 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6497 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6498 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6499 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6500 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 6501 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6502 %} 6503 ins_pipe( pipe_slow ); 6504 %} 6505 6506 // Shorts vector logical right shift produces incorrect Java result 6507 // for negative data because java code convert short value into int with 6508 // sign extension before a shift. But char vectors are fine since chars are 6509 // unsigned values. 6510 // Shorts/Chars vector left shift 6511 instruct vshiftS(vec dst, vec src, vec shift) %{ 6512 predicate(!n->as_ShiftV()->is_var_shift()); 6513 match(Set dst ( LShiftVS src shift)); 6514 match(Set dst ( RShiftVS src shift)); 6515 match(Set dst (URShiftVS src shift)); 6516 effect(TEMP dst, USE src, USE shift); 6517 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6518 ins_encode %{ 6519 int opcode = this->ideal_Opcode(); 6520 if (UseAVX > 0) { 6521 int vlen_enc = vector_length_encoding(this); 6522 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6523 } else { 6524 int vlen = Matcher::vector_length(this); 6525 if (vlen == 2) { 6526 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6527 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6528 } else if (vlen == 4) { 6529 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6530 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6531 } else { 6532 assert (vlen == 8, "sanity"); 6533 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6534 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6535 } 6536 } 6537 %} 6538 ins_pipe( pipe_slow ); 6539 %} 6540 6541 // Integers vector left shift 6542 instruct vshiftI(vec dst, vec src, vec shift) %{ 6543 predicate(!n->as_ShiftV()->is_var_shift()); 6544 match(Set dst ( LShiftVI src shift)); 6545 match(Set dst ( RShiftVI src shift)); 6546 match(Set dst (URShiftVI src shift)); 6547 effect(TEMP dst, USE src, USE shift); 6548 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6549 ins_encode %{ 6550 int opcode = this->ideal_Opcode(); 6551 if (UseAVX > 0) { 6552 int vlen_enc = vector_length_encoding(this); 6553 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6554 } else { 6555 int vlen = Matcher::vector_length(this); 6556 if (vlen == 2) { 6557 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6558 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6559 } else { 6560 assert(vlen == 4, "sanity"); 6561 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6562 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6563 } 6564 } 6565 %} 6566 ins_pipe( pipe_slow ); 6567 %} 6568 6569 // Integers vector left constant shift 6570 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6571 match(Set dst (LShiftVI src (LShiftCntV shift))); 6572 match(Set dst (RShiftVI src (RShiftCntV shift))); 6573 match(Set dst (URShiftVI src (RShiftCntV shift))); 6574 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6575 ins_encode %{ 6576 int opcode = this->ideal_Opcode(); 6577 if (UseAVX > 0) { 6578 int vector_len = vector_length_encoding(this); 6579 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6580 } else { 6581 int vlen = Matcher::vector_length(this); 6582 if (vlen == 2) { 6583 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6584 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6585 } else { 6586 assert(vlen == 4, "sanity"); 6587 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6588 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6589 } 6590 } 6591 %} 6592 ins_pipe( pipe_slow ); 6593 %} 6594 6595 // Longs vector shift 6596 instruct vshiftL(vec dst, vec src, vec shift) %{ 6597 predicate(!n->as_ShiftV()->is_var_shift()); 6598 match(Set dst ( LShiftVL src shift)); 6599 match(Set dst (URShiftVL src shift)); 6600 effect(TEMP dst, USE src, USE shift); 6601 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6602 ins_encode %{ 6603 int opcode = this->ideal_Opcode(); 6604 if (UseAVX > 0) { 6605 int vlen_enc = vector_length_encoding(this); 6606 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6607 } else { 6608 assert(Matcher::vector_length(this) == 2, ""); 6609 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6610 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6611 } 6612 %} 6613 ins_pipe( pipe_slow ); 6614 %} 6615 6616 // Longs vector constant shift 6617 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 6618 match(Set dst (LShiftVL src (LShiftCntV shift))); 6619 match(Set dst (URShiftVL src (RShiftCntV shift))); 6620 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 6621 ins_encode %{ 6622 int opcode = this->ideal_Opcode(); 6623 if (UseAVX > 0) { 6624 int vector_len = vector_length_encoding(this); 6625 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6626 } else { 6627 assert(Matcher::vector_length(this) == 2, ""); 6628 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6629 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6630 } 6631 %} 6632 ins_pipe( pipe_slow ); 6633 %} 6634 6635 // -------------------ArithmeticRightShift ----------------------------------- 6636 // Long vector arithmetic right shift 6637 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 6638 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 6639 match(Set dst (RShiftVL src shift)); 6640 effect(TEMP dst, TEMP tmp); 6641 format %{ "vshiftq $dst,$src,$shift" %} 6642 ins_encode %{ 6643 uint vlen = Matcher::vector_length(this); 6644 if (vlen == 2) { 6645 assert(UseSSE >= 2, "required"); 6646 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6647 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6648 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6649 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6650 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6651 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 6652 } else { 6653 assert(vlen == 4, "sanity"); 6654 assert(UseAVX > 1, "required"); 6655 int vlen_enc = Assembler::AVX_256bit; 6656 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6657 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6658 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6659 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6660 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6661 } 6662 %} 6663 ins_pipe( pipe_slow ); 6664 %} 6665 6666 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 6667 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 6668 match(Set dst (RShiftVL src shift)); 6669 format %{ "vshiftq $dst,$src,$shift" %} 6670 ins_encode %{ 6671 int vlen_enc = vector_length_encoding(this); 6672 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6673 %} 6674 ins_pipe( pipe_slow ); 6675 %} 6676 6677 // ------------------- Variable Shift ----------------------------- 6678 // Byte variable shift 6679 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 6680 predicate(Matcher::vector_length(n) <= 8 && 6681 n->as_ShiftV()->is_var_shift() && 6682 !VM_Version::supports_avx512bw()); 6683 match(Set dst ( LShiftVB src shift)); 6684 match(Set dst ( RShiftVB src shift)); 6685 match(Set dst (URShiftVB src shift)); 6686 effect(TEMP dst, TEMP vtmp); 6687 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 6688 ins_encode %{ 6689 assert(UseAVX >= 2, "required"); 6690 6691 int opcode = this->ideal_Opcode(); 6692 int vlen_enc = Assembler::AVX_128bit; 6693 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 6694 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 6695 %} 6696 ins_pipe( pipe_slow ); 6697 %} 6698 6699 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 6700 predicate(Matcher::vector_length(n) == 16 && 6701 n->as_ShiftV()->is_var_shift() && 6702 !VM_Version::supports_avx512bw()); 6703 match(Set dst ( LShiftVB src shift)); 6704 match(Set dst ( RShiftVB src shift)); 6705 match(Set dst (URShiftVB src shift)); 6706 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 6707 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 6708 ins_encode %{ 6709 assert(UseAVX >= 2, "required"); 6710 6711 int opcode = this->ideal_Opcode(); 6712 int vlen_enc = Assembler::AVX_128bit; 6713 // Shift lower half and get word result in dst 6714 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 6715 6716 // Shift upper half and get word result in vtmp1 6717 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6718 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6719 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6720 6721 // Merge and down convert the two word results to byte in dst 6722 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6723 %} 6724 ins_pipe( pipe_slow ); 6725 %} 6726 6727 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 6728 predicate(Matcher::vector_length(n) == 32 && 6729 n->as_ShiftV()->is_var_shift() && 6730 !VM_Version::supports_avx512bw()); 6731 match(Set dst ( LShiftVB src shift)); 6732 match(Set dst ( RShiftVB src shift)); 6733 match(Set dst (URShiftVB src shift)); 6734 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 6735 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 6736 ins_encode %{ 6737 assert(UseAVX >= 2, "required"); 6738 6739 int opcode = this->ideal_Opcode(); 6740 int vlen_enc = Assembler::AVX_128bit; 6741 // Process lower 128 bits and get result in dst 6742 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 6743 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6744 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6745 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6746 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6747 6748 // Process higher 128 bits and get result in vtmp3 6749 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6750 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6751 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 6752 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 6753 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 6754 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6755 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 6756 6757 // Merge the two results in dst 6758 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6759 %} 6760 ins_pipe( pipe_slow ); 6761 %} 6762 6763 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 6764 predicate(Matcher::vector_length(n) <= 32 && 6765 n->as_ShiftV()->is_var_shift() && 6766 VM_Version::supports_avx512bw()); 6767 match(Set dst ( LShiftVB src shift)); 6768 match(Set dst ( RShiftVB src shift)); 6769 match(Set dst (URShiftVB src shift)); 6770 effect(TEMP dst, TEMP vtmp); 6771 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 6772 ins_encode %{ 6773 assert(UseAVX > 2, "required"); 6774 6775 int opcode = this->ideal_Opcode(); 6776 int vlen_enc = vector_length_encoding(this); 6777 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 6778 %} 6779 ins_pipe( pipe_slow ); 6780 %} 6781 6782 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 6783 predicate(Matcher::vector_length(n) == 64 && 6784 n->as_ShiftV()->is_var_shift() && 6785 VM_Version::supports_avx512bw()); 6786 match(Set dst ( LShiftVB src shift)); 6787 match(Set dst ( RShiftVB src shift)); 6788 match(Set dst (URShiftVB src shift)); 6789 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 6790 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 6791 ins_encode %{ 6792 assert(UseAVX > 2, "required"); 6793 6794 int opcode = this->ideal_Opcode(); 6795 int vlen_enc = Assembler::AVX_256bit; 6796 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 6797 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6798 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6799 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6800 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6801 %} 6802 ins_pipe( pipe_slow ); 6803 %} 6804 6805 // Short variable shift 6806 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 6807 predicate(Matcher::vector_length(n) <= 8 && 6808 n->as_ShiftV()->is_var_shift() && 6809 !VM_Version::supports_avx512bw()); 6810 match(Set dst ( LShiftVS src shift)); 6811 match(Set dst ( RShiftVS src shift)); 6812 match(Set dst (URShiftVS src shift)); 6813 effect(TEMP dst, TEMP vtmp); 6814 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6815 ins_encode %{ 6816 assert(UseAVX >= 2, "required"); 6817 6818 int opcode = this->ideal_Opcode(); 6819 bool sign = (opcode != Op_URShiftVS); 6820 int vlen_enc = Assembler::AVX_256bit; 6821 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 6822 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 6823 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 6824 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 6825 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 6826 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 6827 %} 6828 ins_pipe( pipe_slow ); 6829 %} 6830 6831 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 6832 predicate(Matcher::vector_length(n) == 16 && 6833 n->as_ShiftV()->is_var_shift() && 6834 !VM_Version::supports_avx512bw()); 6835 match(Set dst ( LShiftVS src shift)); 6836 match(Set dst ( RShiftVS src shift)); 6837 match(Set dst (URShiftVS src shift)); 6838 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 6839 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6840 ins_encode %{ 6841 assert(UseAVX >= 2, "required"); 6842 6843 int opcode = this->ideal_Opcode(); 6844 bool sign = (opcode != Op_URShiftVS); 6845 int vlen_enc = Assembler::AVX_256bit; 6846 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 6847 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6848 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6849 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6850 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 6851 6852 // Shift upper half, with result in dst using vtmp1 as TEMP 6853 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 6854 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 6855 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6856 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6857 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6858 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 6859 6860 // Merge lower and upper half result into dst 6861 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6862 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6863 %} 6864 ins_pipe( pipe_slow ); 6865 %} 6866 6867 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 6868 predicate(n->as_ShiftV()->is_var_shift() && 6869 VM_Version::supports_avx512bw()); 6870 match(Set dst ( LShiftVS src shift)); 6871 match(Set dst ( RShiftVS src shift)); 6872 match(Set dst (URShiftVS src shift)); 6873 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 6874 ins_encode %{ 6875 assert(UseAVX > 2, "required"); 6876 6877 int opcode = this->ideal_Opcode(); 6878 int vlen_enc = vector_length_encoding(this); 6879 if (!VM_Version::supports_avx512vl()) { 6880 vlen_enc = Assembler::AVX_512bit; 6881 } 6882 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6883 %} 6884 ins_pipe( pipe_slow ); 6885 %} 6886 6887 //Integer variable shift 6888 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 6889 predicate(n->as_ShiftV()->is_var_shift()); 6890 match(Set dst ( LShiftVI src shift)); 6891 match(Set dst ( RShiftVI src shift)); 6892 match(Set dst (URShiftVI src shift)); 6893 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 6894 ins_encode %{ 6895 assert(UseAVX >= 2, "required"); 6896 6897 int opcode = this->ideal_Opcode(); 6898 int vlen_enc = vector_length_encoding(this); 6899 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6900 %} 6901 ins_pipe( pipe_slow ); 6902 %} 6903 6904 //Long variable shift 6905 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 6906 predicate(n->as_ShiftV()->is_var_shift()); 6907 match(Set dst ( LShiftVL src shift)); 6908 match(Set dst (URShiftVL src shift)); 6909 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 6910 ins_encode %{ 6911 assert(UseAVX >= 2, "required"); 6912 6913 int opcode = this->ideal_Opcode(); 6914 int vlen_enc = vector_length_encoding(this); 6915 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6916 %} 6917 ins_pipe( pipe_slow ); 6918 %} 6919 6920 //Long variable right shift arithmetic 6921 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 6922 predicate(Matcher::vector_length(n) <= 4 && 6923 n->as_ShiftV()->is_var_shift() && 6924 UseAVX == 2); 6925 match(Set dst (RShiftVL src shift)); 6926 effect(TEMP dst, TEMP vtmp); 6927 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 6928 ins_encode %{ 6929 int opcode = this->ideal_Opcode(); 6930 int vlen_enc = vector_length_encoding(this); 6931 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 6932 $vtmp$$XMMRegister); 6933 %} 6934 ins_pipe( pipe_slow ); 6935 %} 6936 6937 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 6938 predicate(n->as_ShiftV()->is_var_shift() && 6939 UseAVX > 2); 6940 match(Set dst (RShiftVL src shift)); 6941 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 6942 ins_encode %{ 6943 int opcode = this->ideal_Opcode(); 6944 int vlen_enc = vector_length_encoding(this); 6945 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6946 %} 6947 ins_pipe( pipe_slow ); 6948 %} 6949 6950 // --------------------------------- AND -------------------------------------- 6951 6952 instruct vand(vec dst, vec src) %{ 6953 predicate(UseAVX == 0); 6954 match(Set dst (AndV dst src)); 6955 format %{ "pand $dst,$src\t! and vectors" %} 6956 ins_encode %{ 6957 __ pand($dst$$XMMRegister, $src$$XMMRegister); 6958 %} 6959 ins_pipe( pipe_slow ); 6960 %} 6961 6962 instruct vand_reg(vec dst, vec src1, vec src2) %{ 6963 predicate(UseAVX > 0); 6964 match(Set dst (AndV src1 src2)); 6965 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 6966 ins_encode %{ 6967 int vlen_enc = vector_length_encoding(this); 6968 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6969 %} 6970 ins_pipe( pipe_slow ); 6971 %} 6972 6973 instruct vand_mem(vec dst, vec src, memory mem) %{ 6974 predicate((UseAVX > 0) && 6975 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6976 match(Set dst (AndV src (LoadVector mem))); 6977 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 6978 ins_encode %{ 6979 int vlen_enc = vector_length_encoding(this); 6980 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6981 %} 6982 ins_pipe( pipe_slow ); 6983 %} 6984 6985 // --------------------------------- OR --------------------------------------- 6986 6987 instruct vor(vec dst, vec src) %{ 6988 predicate(UseAVX == 0); 6989 match(Set dst (OrV dst src)); 6990 format %{ "por $dst,$src\t! or vectors" %} 6991 ins_encode %{ 6992 __ por($dst$$XMMRegister, $src$$XMMRegister); 6993 %} 6994 ins_pipe( pipe_slow ); 6995 %} 6996 6997 instruct vor_reg(vec dst, vec src1, vec src2) %{ 6998 predicate(UseAVX > 0); 6999 match(Set dst (OrV src1 src2)); 7000 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7001 ins_encode %{ 7002 int vlen_enc = vector_length_encoding(this); 7003 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7004 %} 7005 ins_pipe( pipe_slow ); 7006 %} 7007 7008 instruct vor_mem(vec dst, vec src, memory mem) %{ 7009 predicate((UseAVX > 0) && 7010 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7011 match(Set dst (OrV src (LoadVector mem))); 7012 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7013 ins_encode %{ 7014 int vlen_enc = vector_length_encoding(this); 7015 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7016 %} 7017 ins_pipe( pipe_slow ); 7018 %} 7019 7020 // --------------------------------- XOR -------------------------------------- 7021 7022 instruct vxor(vec dst, vec src) %{ 7023 predicate(UseAVX == 0); 7024 match(Set dst (XorV dst src)); 7025 format %{ "pxor $dst,$src\t! xor vectors" %} 7026 ins_encode %{ 7027 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7028 %} 7029 ins_pipe( pipe_slow ); 7030 %} 7031 7032 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7033 predicate(UseAVX > 0); 7034 match(Set dst (XorV src1 src2)); 7035 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7036 ins_encode %{ 7037 int vlen_enc = vector_length_encoding(this); 7038 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7039 %} 7040 ins_pipe( pipe_slow ); 7041 %} 7042 7043 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7044 predicate((UseAVX > 0) && 7045 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7046 match(Set dst (XorV src (LoadVector mem))); 7047 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7048 ins_encode %{ 7049 int vlen_enc = vector_length_encoding(this); 7050 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7051 %} 7052 ins_pipe( pipe_slow ); 7053 %} 7054 7055 // --------------------------------- VectorCast -------------------------------------- 7056 7057 instruct vcastBtoX(vec dst, vec src) %{ 7058 match(Set dst (VectorCastB2X src)); 7059 format %{ "vector_cast_b2x $dst,$src\t!" %} 7060 ins_encode %{ 7061 assert(UseAVX > 0, "required"); 7062 7063 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7064 int vlen_enc = vector_length_encoding(this); 7065 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7066 %} 7067 ins_pipe( pipe_slow ); 7068 %} 7069 7070 instruct castStoX(vec dst, vec src) %{ 7071 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7072 Matcher::vector_length(n->in(1)) <= 8 && // src 7073 Matcher::vector_element_basic_type(n) == T_BYTE); 7074 match(Set dst (VectorCastS2X src)); 7075 format %{ "vector_cast_s2x $dst,$src" %} 7076 ins_encode %{ 7077 assert(UseAVX > 0, "required"); 7078 7079 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7080 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7081 %} 7082 ins_pipe( pipe_slow ); 7083 %} 7084 7085 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7086 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7087 Matcher::vector_length(n->in(1)) == 16 && // src 7088 Matcher::vector_element_basic_type(n) == T_BYTE); 7089 effect(TEMP dst, TEMP vtmp); 7090 match(Set dst (VectorCastS2X src)); 7091 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7092 ins_encode %{ 7093 assert(UseAVX > 0, "required"); 7094 7095 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7096 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7097 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7098 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7099 %} 7100 ins_pipe( pipe_slow ); 7101 %} 7102 7103 instruct vcastStoX_evex(vec dst, vec src) %{ 7104 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7105 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7106 match(Set dst (VectorCastS2X src)); 7107 format %{ "vector_cast_s2x $dst,$src\t!" %} 7108 ins_encode %{ 7109 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7110 int src_vlen_enc = vector_length_encoding(this, $src); 7111 int vlen_enc = vector_length_encoding(this); 7112 switch (to_elem_bt) { 7113 case T_BYTE: 7114 if (!VM_Version::supports_avx512vl()) { 7115 vlen_enc = Assembler::AVX_512bit; 7116 } 7117 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7118 break; 7119 case T_INT: 7120 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7121 break; 7122 case T_FLOAT: 7123 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7124 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7125 break; 7126 case T_LONG: 7127 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7128 break; 7129 case T_DOUBLE: { 7130 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7131 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7132 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7133 break; 7134 } 7135 default: 7136 ShouldNotReachHere(); 7137 } 7138 %} 7139 ins_pipe( pipe_slow ); 7140 %} 7141 7142 instruct castItoX(vec dst, vec src) %{ 7143 predicate(UseAVX <= 2 && 7144 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7145 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7146 match(Set dst (VectorCastI2X src)); 7147 format %{ "vector_cast_i2x $dst,$src" %} 7148 ins_encode %{ 7149 assert(UseAVX > 0, "required"); 7150 7151 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7152 int vlen_enc = vector_length_encoding(this, $src); 7153 7154 if (to_elem_bt == T_BYTE) { 7155 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7156 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7157 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7158 } else { 7159 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7160 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7161 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7162 } 7163 %} 7164 ins_pipe( pipe_slow ); 7165 %} 7166 7167 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7168 predicate(UseAVX <= 2 && 7169 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7170 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7171 match(Set dst (VectorCastI2X src)); 7172 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7173 effect(TEMP dst, TEMP vtmp); 7174 ins_encode %{ 7175 assert(UseAVX > 0, "required"); 7176 7177 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7178 int vlen_enc = vector_length_encoding(this, $src); 7179 7180 if (to_elem_bt == T_BYTE) { 7181 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7182 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7183 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7184 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7185 } else { 7186 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7187 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7188 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7189 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7190 } 7191 %} 7192 ins_pipe( pipe_slow ); 7193 %} 7194 7195 instruct vcastItoX_evex(vec dst, vec src) %{ 7196 predicate(UseAVX > 2 || 7197 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7198 match(Set dst (VectorCastI2X src)); 7199 format %{ "vector_cast_i2x $dst,$src\t!" %} 7200 ins_encode %{ 7201 assert(UseAVX > 0, "required"); 7202 7203 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7204 int src_vlen_enc = vector_length_encoding(this, $src); 7205 int dst_vlen_enc = vector_length_encoding(this); 7206 switch (dst_elem_bt) { 7207 case T_BYTE: 7208 if (!VM_Version::supports_avx512vl()) { 7209 src_vlen_enc = Assembler::AVX_512bit; 7210 } 7211 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7212 break; 7213 case T_SHORT: 7214 if (!VM_Version::supports_avx512vl()) { 7215 src_vlen_enc = Assembler::AVX_512bit; 7216 } 7217 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7218 break; 7219 case T_FLOAT: 7220 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7221 break; 7222 case T_LONG: 7223 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7224 break; 7225 case T_DOUBLE: 7226 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7227 break; 7228 default: 7229 ShouldNotReachHere(); 7230 } 7231 %} 7232 ins_pipe( pipe_slow ); 7233 %} 7234 7235 instruct vcastLtoBS(vec dst, vec src) %{ 7236 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7237 UseAVX <= 2); 7238 match(Set dst (VectorCastL2X src)); 7239 format %{ "vector_cast_l2x $dst,$src" %} 7240 ins_encode %{ 7241 assert(UseAVX > 0, "required"); 7242 7243 int vlen = Matcher::vector_length_in_bytes(this, $src); 7244 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7245 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7246 : ExternalAddress(vector_int_to_short_mask()); 7247 if (vlen <= 16) { 7248 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7249 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7250 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7251 } else { 7252 assert(vlen <= 32, "required"); 7253 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7254 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7255 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7256 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7257 } 7258 if (to_elem_bt == T_BYTE) { 7259 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7260 } 7261 %} 7262 ins_pipe( pipe_slow ); 7263 %} 7264 7265 instruct vcastLtoX_evex(vec dst, vec src) %{ 7266 predicate(UseAVX > 2 || 7267 (Matcher::vector_element_basic_type(n) == T_INT || 7268 Matcher::vector_element_basic_type(n) == T_FLOAT || 7269 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7270 match(Set dst (VectorCastL2X src)); 7271 format %{ "vector_cast_l2x $dst,$src\t!" %} 7272 ins_encode %{ 7273 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7274 int vlen = Matcher::vector_length_in_bytes(this, $src); 7275 int vlen_enc = vector_length_encoding(this, $src); 7276 switch (to_elem_bt) { 7277 case T_BYTE: 7278 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7279 vlen_enc = Assembler::AVX_512bit; 7280 } 7281 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7282 break; 7283 case T_SHORT: 7284 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7285 vlen_enc = Assembler::AVX_512bit; 7286 } 7287 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7288 break; 7289 case T_INT: 7290 if (vlen == 8) { 7291 if ($dst$$XMMRegister != $src$$XMMRegister) { 7292 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7293 } 7294 } else if (vlen == 16) { 7295 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7296 } else if (vlen == 32) { 7297 if (UseAVX > 2) { 7298 if (!VM_Version::supports_avx512vl()) { 7299 vlen_enc = Assembler::AVX_512bit; 7300 } 7301 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7302 } else { 7303 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7304 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7305 } 7306 } else { // vlen == 64 7307 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7308 } 7309 break; 7310 case T_FLOAT: 7311 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7312 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7313 break; 7314 case T_DOUBLE: 7315 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7316 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7317 break; 7318 7319 default: assert(false, "%s", type2name(to_elem_bt)); 7320 } 7321 %} 7322 ins_pipe( pipe_slow ); 7323 %} 7324 7325 instruct vcastFtoD_reg(vec dst, vec src) %{ 7326 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7327 match(Set dst (VectorCastF2X src)); 7328 format %{ "vector_cast_f2d $dst,$src\t!" %} 7329 ins_encode %{ 7330 int vlen_enc = vector_length_encoding(this); 7331 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7332 %} 7333 ins_pipe( pipe_slow ); 7334 %} 7335 7336 7337 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7338 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7339 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7340 match(Set dst (VectorCastF2X src)); 7341 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7342 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7343 ins_encode %{ 7344 int vlen_enc = vector_length_encoding(this, $src); 7345 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7346 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7347 // 32 bit addresses for register indirect addressing mode since stub constants 7348 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7349 // However, targets are free to increase this limit, but having a large code cache size 7350 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7351 // cap we save a temporary register allocation which in limiting case can prevent 7352 // spilling in high register pressure blocks. 7353 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7354 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7355 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7356 %} 7357 ins_pipe( pipe_slow ); 7358 %} 7359 7360 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7361 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7362 is_integral_type(Matcher::vector_element_basic_type(n))); 7363 match(Set dst (VectorCastF2X src)); 7364 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7365 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7366 ins_encode %{ 7367 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7368 if (to_elem_bt == T_LONG) { 7369 int vlen_enc = vector_length_encoding(this); 7370 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7371 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7372 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7373 } else { 7374 int vlen_enc = vector_length_encoding(this, $src); 7375 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7376 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7377 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7378 } 7379 %} 7380 ins_pipe( pipe_slow ); 7381 %} 7382 7383 instruct vcastDtoF_reg(vec dst, vec src) %{ 7384 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7385 match(Set dst (VectorCastD2X src)); 7386 format %{ "vector_cast_d2x $dst,$src\t!" %} 7387 ins_encode %{ 7388 int vlen_enc = vector_length_encoding(this, $src); 7389 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7390 %} 7391 ins_pipe( pipe_slow ); 7392 %} 7393 7394 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7395 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7396 is_integral_type(Matcher::vector_element_basic_type(n))); 7397 match(Set dst (VectorCastD2X src)); 7398 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7399 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7400 ins_encode %{ 7401 int vlen_enc = vector_length_encoding(this, $src); 7402 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7403 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7404 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7405 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7406 %} 7407 ins_pipe( pipe_slow ); 7408 %} 7409 7410 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7411 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7412 is_integral_type(Matcher::vector_element_basic_type(n))); 7413 match(Set dst (VectorCastD2X src)); 7414 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7415 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7416 ins_encode %{ 7417 int vlen_enc = vector_length_encoding(this, $src); 7418 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7419 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7420 ExternalAddress(vector_float_signflip()); 7421 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7422 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7423 %} 7424 ins_pipe( pipe_slow ); 7425 %} 7426 7427 instruct vucast(vec dst, vec src) %{ 7428 match(Set dst (VectorUCastB2X src)); 7429 match(Set dst (VectorUCastS2X src)); 7430 match(Set dst (VectorUCastI2X src)); 7431 format %{ "vector_ucast $dst,$src\t!" %} 7432 ins_encode %{ 7433 assert(UseAVX > 0, "required"); 7434 7435 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7436 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7437 int vlen_enc = vector_length_encoding(this); 7438 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7439 %} 7440 ins_pipe( pipe_slow ); 7441 %} 7442 7443 #ifdef _LP64 7444 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7445 predicate(!VM_Version::supports_avx512vl() && 7446 Matcher::vector_length_in_bytes(n) < 64 && 7447 Matcher::vector_element_basic_type(n) == T_INT); 7448 match(Set dst (RoundVF src)); 7449 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7450 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7451 ins_encode %{ 7452 int vlen_enc = vector_length_encoding(this); 7453 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7454 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7455 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7456 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7457 %} 7458 ins_pipe( pipe_slow ); 7459 %} 7460 7461 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7462 predicate((VM_Version::supports_avx512vl() || 7463 Matcher::vector_length_in_bytes(n) == 64) && 7464 Matcher::vector_element_basic_type(n) == T_INT); 7465 match(Set dst (RoundVF src)); 7466 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7467 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7468 ins_encode %{ 7469 int vlen_enc = vector_length_encoding(this); 7470 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7471 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7472 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7473 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7474 %} 7475 ins_pipe( pipe_slow ); 7476 %} 7477 7478 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7479 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7480 match(Set dst (RoundVD src)); 7481 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7482 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7483 ins_encode %{ 7484 int vlen_enc = vector_length_encoding(this); 7485 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7486 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 7487 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 7488 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7489 %} 7490 ins_pipe( pipe_slow ); 7491 %} 7492 7493 #endif // _LP64 7494 7495 // --------------------------------- VectorMaskCmp -------------------------------------- 7496 7497 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7498 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7499 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7500 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7501 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7502 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7503 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7504 ins_encode %{ 7505 int vlen_enc = vector_length_encoding(this, $src1); 7506 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7507 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7508 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7509 } else { 7510 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7511 } 7512 %} 7513 ins_pipe( pipe_slow ); 7514 %} 7515 7516 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7517 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7518 n->bottom_type()->isa_vectmask() == nullptr && 7519 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7520 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7521 effect(TEMP ktmp); 7522 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7523 ins_encode %{ 7524 int vlen_enc = Assembler::AVX_512bit; 7525 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7526 KRegister mask = k0; // The comparison itself is not being masked. 7527 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7528 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7529 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7530 } else { 7531 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7532 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7533 } 7534 %} 7535 ins_pipe( pipe_slow ); 7536 %} 7537 7538 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7539 predicate(n->bottom_type()->isa_vectmask() && 7540 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7541 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7542 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7543 ins_encode %{ 7544 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7545 int vlen_enc = vector_length_encoding(this, $src1); 7546 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7547 KRegister mask = k0; // The comparison itself is not being masked. 7548 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7549 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7550 } else { 7551 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7552 } 7553 %} 7554 ins_pipe( pipe_slow ); 7555 %} 7556 7557 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7558 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7559 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7560 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7561 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7562 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7563 (n->in(2)->get_int() == BoolTest::eq || 7564 n->in(2)->get_int() == BoolTest::lt || 7565 n->in(2)->get_int() == BoolTest::gt)); // cond 7566 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7567 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7568 ins_encode %{ 7569 int vlen_enc = vector_length_encoding(this, $src1); 7570 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7571 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7572 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 7573 %} 7574 ins_pipe( pipe_slow ); 7575 %} 7576 7577 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7578 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7579 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7580 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7581 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7582 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7583 (n->in(2)->get_int() == BoolTest::ne || 7584 n->in(2)->get_int() == BoolTest::le || 7585 n->in(2)->get_int() == BoolTest::ge)); // cond 7586 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7587 effect(TEMP dst, TEMP xtmp); 7588 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7589 ins_encode %{ 7590 int vlen_enc = vector_length_encoding(this, $src1); 7591 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7592 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7593 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7594 %} 7595 ins_pipe( pipe_slow ); 7596 %} 7597 7598 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7599 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7600 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7601 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7602 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7603 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7604 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7605 effect(TEMP dst, TEMP xtmp); 7606 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7607 ins_encode %{ 7608 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 7609 int vlen_enc = vector_length_encoding(this, $src1); 7610 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7611 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7612 7613 if (vlen_enc == Assembler::AVX_128bit) { 7614 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7615 } else { 7616 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7617 } 7618 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7619 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7620 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7621 %} 7622 ins_pipe( pipe_slow ); 7623 %} 7624 7625 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7626 predicate((n->bottom_type()->isa_vectmask() == nullptr && 7627 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 7628 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7629 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7630 effect(TEMP ktmp); 7631 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7632 ins_encode %{ 7633 assert(UseAVX > 2, "required"); 7634 7635 int vlen_enc = vector_length_encoding(this, $src1); 7636 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7637 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7638 KRegister mask = k0; // The comparison itself is not being masked. 7639 bool merge = false; 7640 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7641 7642 switch (src1_elem_bt) { 7643 case T_INT: { 7644 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7645 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7646 break; 7647 } 7648 case T_LONG: { 7649 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7650 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7651 break; 7652 } 7653 default: assert(false, "%s", type2name(src1_elem_bt)); 7654 } 7655 %} 7656 ins_pipe( pipe_slow ); 7657 %} 7658 7659 7660 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 7661 predicate(n->bottom_type()->isa_vectmask() && 7662 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7663 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7664 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 7665 ins_encode %{ 7666 assert(UseAVX > 2, "required"); 7667 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7668 7669 int vlen_enc = vector_length_encoding(this, $src1); 7670 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7671 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7672 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7673 7674 // Comparison i 7675 switch (src1_elem_bt) { 7676 case T_BYTE: { 7677 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7678 break; 7679 } 7680 case T_SHORT: { 7681 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7682 break; 7683 } 7684 case T_INT: { 7685 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7686 break; 7687 } 7688 case T_LONG: { 7689 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7690 break; 7691 } 7692 default: assert(false, "%s", type2name(src1_elem_bt)); 7693 } 7694 %} 7695 ins_pipe( pipe_slow ); 7696 %} 7697 7698 // Extract 7699 7700 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 7701 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 7702 match(Set dst (ExtractI src idx)); 7703 match(Set dst (ExtractS src idx)); 7704 #ifdef _LP64 7705 match(Set dst (ExtractB src idx)); 7706 #endif 7707 format %{ "extractI $dst,$src,$idx\t!" %} 7708 ins_encode %{ 7709 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7710 7711 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 7712 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7713 %} 7714 ins_pipe( pipe_slow ); 7715 %} 7716 7717 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 7718 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 7719 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 7720 match(Set dst (ExtractI src idx)); 7721 match(Set dst (ExtractS src idx)); 7722 #ifdef _LP64 7723 match(Set dst (ExtractB src idx)); 7724 #endif 7725 effect(TEMP vtmp); 7726 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 7727 ins_encode %{ 7728 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7729 7730 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 7731 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7732 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 7733 %} 7734 ins_pipe( pipe_slow ); 7735 %} 7736 7737 #ifdef _LP64 7738 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 7739 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 7740 match(Set dst (ExtractL src idx)); 7741 format %{ "extractL $dst,$src,$idx\t!" %} 7742 ins_encode %{ 7743 assert(UseSSE >= 4, "required"); 7744 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7745 7746 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7747 %} 7748 ins_pipe( pipe_slow ); 7749 %} 7750 7751 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 7752 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 7753 Matcher::vector_length(n->in(1)) == 8); // src 7754 match(Set dst (ExtractL src idx)); 7755 effect(TEMP vtmp); 7756 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 7757 ins_encode %{ 7758 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7759 7760 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7761 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 7762 %} 7763 ins_pipe( pipe_slow ); 7764 %} 7765 #endif 7766 7767 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 7768 predicate(Matcher::vector_length(n->in(1)) <= 4); 7769 match(Set dst (ExtractF src idx)); 7770 effect(TEMP dst, TEMP vtmp); 7771 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 7772 ins_encode %{ 7773 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7774 7775 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 7776 %} 7777 ins_pipe( pipe_slow ); 7778 %} 7779 7780 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 7781 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 7782 Matcher::vector_length(n->in(1)/*src*/) == 16); 7783 match(Set dst (ExtractF src idx)); 7784 effect(TEMP vtmp); 7785 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 7786 ins_encode %{ 7787 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7788 7789 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7790 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 7791 %} 7792 ins_pipe( pipe_slow ); 7793 %} 7794 7795 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 7796 predicate(Matcher::vector_length(n->in(1)) == 2); // src 7797 match(Set dst (ExtractD src idx)); 7798 format %{ "extractD $dst,$src,$idx\t!" %} 7799 ins_encode %{ 7800 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7801 7802 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7803 %} 7804 ins_pipe( pipe_slow ); 7805 %} 7806 7807 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 7808 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 7809 Matcher::vector_length(n->in(1)) == 8); // src 7810 match(Set dst (ExtractD src idx)); 7811 effect(TEMP vtmp); 7812 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 7813 ins_encode %{ 7814 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7815 7816 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7817 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 7818 %} 7819 ins_pipe( pipe_slow ); 7820 %} 7821 7822 // --------------------------------- Vector Blend -------------------------------------- 7823 7824 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 7825 predicate(UseAVX == 0); 7826 match(Set dst (VectorBlend (Binary dst src) mask)); 7827 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 7828 effect(TEMP tmp); 7829 ins_encode %{ 7830 assert(UseSSE >= 4, "required"); 7831 7832 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 7833 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 7834 } 7835 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 7836 %} 7837 ins_pipe( pipe_slow ); 7838 %} 7839 7840 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7841 predicate(UseAVX > 0 && 7842 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 7843 Matcher::vector_length_in_bytes(n) <= 32 && 7844 is_integral_type(Matcher::vector_element_basic_type(n))); 7845 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7846 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7847 ins_encode %{ 7848 int vlen_enc = vector_length_encoding(this); 7849 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7850 %} 7851 ins_pipe( pipe_slow ); 7852 %} 7853 7854 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7855 predicate(UseAVX > 0 && 7856 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 7857 Matcher::vector_length_in_bytes(n) <= 32 && 7858 !is_integral_type(Matcher::vector_element_basic_type(n))); 7859 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7860 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7861 ins_encode %{ 7862 int vlen_enc = vector_length_encoding(this); 7863 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7864 %} 7865 ins_pipe( pipe_slow ); 7866 %} 7867 7868 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 7869 predicate(Matcher::vector_length_in_bytes(n) == 64 && 7870 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 7871 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7872 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 7873 effect(TEMP ktmp); 7874 ins_encode %{ 7875 int vlen_enc = Assembler::AVX_512bit; 7876 BasicType elem_bt = Matcher::vector_element_basic_type(this); 7877 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 7878 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7879 %} 7880 ins_pipe( pipe_slow ); 7881 %} 7882 7883 7884 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 7885 predicate(n->in(2)->bottom_type()->isa_vectmask() && 7886 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 7887 VM_Version::supports_avx512bw())); 7888 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7889 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 7890 ins_encode %{ 7891 int vlen_enc = vector_length_encoding(this); 7892 BasicType elem_bt = Matcher::vector_element_basic_type(this); 7893 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7894 %} 7895 ins_pipe( pipe_slow ); 7896 %} 7897 7898 // --------------------------------- ABS -------------------------------------- 7899 // a = |a| 7900 instruct vabsB_reg(vec dst, vec src) %{ 7901 match(Set dst (AbsVB src)); 7902 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 7903 ins_encode %{ 7904 uint vlen = Matcher::vector_length(this); 7905 if (vlen <= 16) { 7906 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 7907 } else { 7908 int vlen_enc = vector_length_encoding(this); 7909 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7910 } 7911 %} 7912 ins_pipe( pipe_slow ); 7913 %} 7914 7915 instruct vabsS_reg(vec dst, vec src) %{ 7916 match(Set dst (AbsVS src)); 7917 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 7918 ins_encode %{ 7919 uint vlen = Matcher::vector_length(this); 7920 if (vlen <= 8) { 7921 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 7922 } else { 7923 int vlen_enc = vector_length_encoding(this); 7924 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7925 } 7926 %} 7927 ins_pipe( pipe_slow ); 7928 %} 7929 7930 instruct vabsI_reg(vec dst, vec src) %{ 7931 match(Set dst (AbsVI src)); 7932 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 7933 ins_encode %{ 7934 uint vlen = Matcher::vector_length(this); 7935 if (vlen <= 4) { 7936 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 7937 } else { 7938 int vlen_enc = vector_length_encoding(this); 7939 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7940 } 7941 %} 7942 ins_pipe( pipe_slow ); 7943 %} 7944 7945 instruct vabsL_reg(vec dst, vec src) %{ 7946 match(Set dst (AbsVL src)); 7947 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 7948 ins_encode %{ 7949 assert(UseAVX > 2, "required"); 7950 int vlen_enc = vector_length_encoding(this); 7951 if (!VM_Version::supports_avx512vl()) { 7952 vlen_enc = Assembler::AVX_512bit; 7953 } 7954 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7955 %} 7956 ins_pipe( pipe_slow ); 7957 %} 7958 7959 // --------------------------------- ABSNEG -------------------------------------- 7960 7961 instruct vabsnegF(vec dst, vec src) %{ 7962 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 7963 match(Set dst (AbsVF src)); 7964 match(Set dst (NegVF src)); 7965 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 7966 ins_cost(150); 7967 ins_encode %{ 7968 int opcode = this->ideal_Opcode(); 7969 int vlen = Matcher::vector_length(this); 7970 if (vlen == 2) { 7971 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 7972 } else { 7973 assert(vlen == 8 || vlen == 16, "required"); 7974 int vlen_enc = vector_length_encoding(this); 7975 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7976 } 7977 %} 7978 ins_pipe( pipe_slow ); 7979 %} 7980 7981 instruct vabsneg4F(vec dst) %{ 7982 predicate(Matcher::vector_length(n) == 4); 7983 match(Set dst (AbsVF dst)); 7984 match(Set dst (NegVF dst)); 7985 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 7986 ins_cost(150); 7987 ins_encode %{ 7988 int opcode = this->ideal_Opcode(); 7989 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 7990 %} 7991 ins_pipe( pipe_slow ); 7992 %} 7993 7994 instruct vabsnegD(vec dst, vec src) %{ 7995 match(Set dst (AbsVD src)); 7996 match(Set dst (NegVD src)); 7997 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 7998 ins_encode %{ 7999 int opcode = this->ideal_Opcode(); 8000 uint vlen = Matcher::vector_length(this); 8001 if (vlen == 2) { 8002 assert(UseSSE >= 2, "required"); 8003 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8004 } else { 8005 int vlen_enc = vector_length_encoding(this); 8006 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8007 } 8008 %} 8009 ins_pipe( pipe_slow ); 8010 %} 8011 8012 //------------------------------------- VectorTest -------------------------------------------- 8013 8014 #ifdef _LP64 8015 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8016 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8017 match(Set cr (VectorTest src1 src2)); 8018 effect(TEMP vtmp); 8019 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8020 ins_encode %{ 8021 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8022 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8023 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8024 %} 8025 ins_pipe( pipe_slow ); 8026 %} 8027 8028 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8029 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8030 match(Set cr (VectorTest src1 src2)); 8031 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8032 ins_encode %{ 8033 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8034 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8035 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8036 %} 8037 ins_pipe( pipe_slow ); 8038 %} 8039 8040 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8041 predicate((Matcher::vector_length(n->in(1)) < 8 || 8042 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8043 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8044 match(Set cr (VectorTest src1 src2)); 8045 effect(TEMP tmp); 8046 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8047 ins_encode %{ 8048 uint masklen = Matcher::vector_length(this, $src1); 8049 __ kmovwl($tmp$$Register, $src1$$KRegister); 8050 __ andl($tmp$$Register, (1 << masklen) - 1); 8051 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8052 %} 8053 ins_pipe( pipe_slow ); 8054 %} 8055 8056 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8057 predicate((Matcher::vector_length(n->in(1)) < 8 || 8058 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8059 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8060 match(Set cr (VectorTest src1 src2)); 8061 effect(TEMP tmp); 8062 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8063 ins_encode %{ 8064 uint masklen = Matcher::vector_length(this, $src1); 8065 __ kmovwl($tmp$$Register, $src1$$KRegister); 8066 __ andl($tmp$$Register, (1 << masklen) - 1); 8067 %} 8068 ins_pipe( pipe_slow ); 8069 %} 8070 8071 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8072 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8073 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8074 match(Set cr (VectorTest src1 src2)); 8075 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8076 ins_encode %{ 8077 uint masklen = Matcher::vector_length(this, $src1); 8078 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8079 %} 8080 ins_pipe( pipe_slow ); 8081 %} 8082 #endif 8083 8084 //------------------------------------- LoadMask -------------------------------------------- 8085 8086 instruct loadMask(legVec dst, legVec src) %{ 8087 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8088 match(Set dst (VectorLoadMask src)); 8089 effect(TEMP dst); 8090 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8091 ins_encode %{ 8092 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8093 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8094 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8095 %} 8096 ins_pipe( pipe_slow ); 8097 %} 8098 8099 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8100 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8101 match(Set dst (VectorLoadMask src)); 8102 effect(TEMP xtmp); 8103 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8104 ins_encode %{ 8105 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8106 true, Assembler::AVX_512bit); 8107 %} 8108 ins_pipe( pipe_slow ); 8109 %} 8110 8111 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8112 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8113 match(Set dst (VectorLoadMask src)); 8114 effect(TEMP xtmp); 8115 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8116 ins_encode %{ 8117 int vlen_enc = vector_length_encoding(in(1)); 8118 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8119 false, vlen_enc); 8120 %} 8121 ins_pipe( pipe_slow ); 8122 %} 8123 8124 //------------------------------------- StoreMask -------------------------------------------- 8125 8126 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8127 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8128 match(Set dst (VectorStoreMask src size)); 8129 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8130 ins_encode %{ 8131 int vlen = Matcher::vector_length(this); 8132 if (vlen <= 16 && UseAVX <= 2) { 8133 assert(UseSSE >= 3, "required"); 8134 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8135 } else { 8136 assert(UseAVX > 0, "required"); 8137 int src_vlen_enc = vector_length_encoding(this, $src); 8138 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8139 } 8140 %} 8141 ins_pipe( pipe_slow ); 8142 %} 8143 8144 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8145 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8146 match(Set dst (VectorStoreMask src size)); 8147 effect(TEMP_DEF dst, TEMP xtmp); 8148 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8149 ins_encode %{ 8150 int vlen_enc = Assembler::AVX_128bit; 8151 int vlen = Matcher::vector_length(this); 8152 if (vlen <= 8) { 8153 assert(UseSSE >= 3, "required"); 8154 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8155 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8156 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8157 } else { 8158 assert(UseAVX > 0, "required"); 8159 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8160 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8161 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8162 } 8163 %} 8164 ins_pipe( pipe_slow ); 8165 %} 8166 8167 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8168 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8169 match(Set dst (VectorStoreMask src size)); 8170 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8171 effect(TEMP_DEF dst, TEMP xtmp); 8172 ins_encode %{ 8173 int vlen_enc = Assembler::AVX_128bit; 8174 int vlen = Matcher::vector_length(this); 8175 if (vlen <= 4) { 8176 assert(UseSSE >= 3, "required"); 8177 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8178 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8179 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8180 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8181 } else { 8182 assert(UseAVX > 0, "required"); 8183 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8184 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8185 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8186 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8187 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8188 } 8189 %} 8190 ins_pipe( pipe_slow ); 8191 %} 8192 8193 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8194 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8195 match(Set dst (VectorStoreMask src size)); 8196 effect(TEMP_DEF dst, TEMP xtmp); 8197 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8198 ins_encode %{ 8199 assert(UseSSE >= 3, "required"); 8200 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8201 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8202 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8203 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8204 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8205 %} 8206 ins_pipe( pipe_slow ); 8207 %} 8208 8209 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8210 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8211 match(Set dst (VectorStoreMask src size)); 8212 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8213 effect(TEMP_DEF dst, TEMP vtmp); 8214 ins_encode %{ 8215 int vlen_enc = Assembler::AVX_128bit; 8216 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8217 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8218 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8219 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8220 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8221 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8222 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8223 %} 8224 ins_pipe( pipe_slow ); 8225 %} 8226 8227 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8228 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8229 match(Set dst (VectorStoreMask src size)); 8230 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8231 ins_encode %{ 8232 int src_vlen_enc = vector_length_encoding(this, $src); 8233 int dst_vlen_enc = vector_length_encoding(this); 8234 if (!VM_Version::supports_avx512vl()) { 8235 src_vlen_enc = Assembler::AVX_512bit; 8236 } 8237 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8238 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8239 %} 8240 ins_pipe( pipe_slow ); 8241 %} 8242 8243 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8244 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8245 match(Set dst (VectorStoreMask src size)); 8246 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8247 ins_encode %{ 8248 int src_vlen_enc = vector_length_encoding(this, $src); 8249 int dst_vlen_enc = vector_length_encoding(this); 8250 if (!VM_Version::supports_avx512vl()) { 8251 src_vlen_enc = Assembler::AVX_512bit; 8252 } 8253 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8254 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8255 %} 8256 ins_pipe( pipe_slow ); 8257 %} 8258 8259 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8260 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8261 match(Set dst (VectorStoreMask mask size)); 8262 effect(TEMP_DEF dst); 8263 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8264 ins_encode %{ 8265 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8266 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8267 false, Assembler::AVX_512bit, noreg); 8268 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8269 %} 8270 ins_pipe( pipe_slow ); 8271 %} 8272 8273 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8274 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8275 match(Set dst (VectorStoreMask mask size)); 8276 effect(TEMP_DEF dst); 8277 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8278 ins_encode %{ 8279 int dst_vlen_enc = vector_length_encoding(this); 8280 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8281 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8282 %} 8283 ins_pipe( pipe_slow ); 8284 %} 8285 8286 instruct vmaskcast_evex(kReg dst) %{ 8287 match(Set dst (VectorMaskCast dst)); 8288 ins_cost(0); 8289 format %{ "vector_mask_cast $dst" %} 8290 ins_encode %{ 8291 // empty 8292 %} 8293 ins_pipe(empty); 8294 %} 8295 8296 instruct vmaskcast(vec dst) %{ 8297 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8298 match(Set dst (VectorMaskCast dst)); 8299 ins_cost(0); 8300 format %{ "vector_mask_cast $dst" %} 8301 ins_encode %{ 8302 // empty 8303 %} 8304 ins_pipe(empty); 8305 %} 8306 8307 instruct vmaskcast_avx(vec dst, vec src) %{ 8308 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8309 match(Set dst (VectorMaskCast src)); 8310 format %{ "vector_mask_cast $dst, $src" %} 8311 ins_encode %{ 8312 int vlen = Matcher::vector_length(this); 8313 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8314 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8315 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8316 %} 8317 ins_pipe(pipe_slow); 8318 %} 8319 8320 //-------------------------------- Load Iota Indices ---------------------------------- 8321 8322 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8323 match(Set dst (VectorLoadConst src)); 8324 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8325 ins_encode %{ 8326 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8327 BasicType bt = Matcher::vector_element_basic_type(this); 8328 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8329 %} 8330 ins_pipe( pipe_slow ); 8331 %} 8332 8333 #ifdef _LP64 8334 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8335 match(Set dst (PopulateIndex src1 src2)); 8336 effect(TEMP dst, TEMP vtmp); 8337 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8338 ins_encode %{ 8339 assert($src2$$constant == 1, "required"); 8340 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8341 int vlen_enc = vector_length_encoding(this); 8342 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8343 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8344 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8345 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8346 %} 8347 ins_pipe( pipe_slow ); 8348 %} 8349 8350 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8351 match(Set dst (PopulateIndex src1 src2)); 8352 effect(TEMP dst, TEMP vtmp); 8353 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8354 ins_encode %{ 8355 assert($src2$$constant == 1, "required"); 8356 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8357 int vlen_enc = vector_length_encoding(this); 8358 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8359 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8360 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8361 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8362 %} 8363 ins_pipe( pipe_slow ); 8364 %} 8365 #endif 8366 //-------------------------------- Rearrange ---------------------------------- 8367 8368 // LoadShuffle/Rearrange for Byte 8369 8370 instruct loadShuffleB(vec dst) %{ 8371 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 8372 match(Set dst (VectorLoadShuffle dst)); 8373 format %{ "vector_load_shuffle $dst, $dst" %} 8374 ins_encode %{ 8375 // empty 8376 %} 8377 ins_pipe( pipe_slow ); 8378 %} 8379 8380 instruct rearrangeB(vec dst, vec shuffle) %{ 8381 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8382 Matcher::vector_length(n) < 32); 8383 match(Set dst (VectorRearrange dst shuffle)); 8384 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8385 ins_encode %{ 8386 assert(UseSSE >= 4, "required"); 8387 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8388 %} 8389 ins_pipe( pipe_slow ); 8390 %} 8391 8392 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8393 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8394 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8395 match(Set dst (VectorRearrange src shuffle)); 8396 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8397 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8398 ins_encode %{ 8399 assert(UseAVX >= 2, "required"); 8400 // Swap src into vtmp1 8401 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8402 // Shuffle swapped src to get entries from other 128 bit lane 8403 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8404 // Shuffle original src to get entries from self 128 bit lane 8405 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8406 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8407 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8408 // Perform the blend 8409 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8410 %} 8411 ins_pipe( pipe_slow ); 8412 %} 8413 8414 8415 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8416 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8417 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8418 match(Set dst (VectorRearrange src shuffle)); 8419 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8420 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8421 ins_encode %{ 8422 int vlen_enc = vector_length_encoding(this); 8423 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8424 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8425 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8426 %} 8427 ins_pipe( pipe_slow ); 8428 %} 8429 8430 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8431 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8432 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8433 match(Set dst (VectorRearrange src shuffle)); 8434 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8435 ins_encode %{ 8436 int vlen_enc = vector_length_encoding(this); 8437 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8438 %} 8439 ins_pipe( pipe_slow ); 8440 %} 8441 8442 // LoadShuffle/Rearrange for Short 8443 8444 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8445 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8446 Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS 8447 match(Set dst (VectorLoadShuffle src)); 8448 effect(TEMP dst, TEMP vtmp); 8449 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8450 ins_encode %{ 8451 // Create a byte shuffle mask from short shuffle mask 8452 // only byte shuffle instruction available on these platforms 8453 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8454 if (UseAVX == 0) { 8455 assert(vlen_in_bytes <= 16, "required"); 8456 // Multiply each shuffle by two to get byte index 8457 __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); 8458 __ psllw($vtmp$$XMMRegister, 1); 8459 8460 // Duplicate to create 2 copies of byte index 8461 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8462 __ psllw($dst$$XMMRegister, 8); 8463 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8464 8465 // Add one to get alternate byte index 8466 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8467 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8468 } else { 8469 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8470 int vlen_enc = vector_length_encoding(this); 8471 // Multiply each shuffle by two to get byte index 8472 __ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8473 __ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8474 8475 // Duplicate to create 2 copies of byte index 8476 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8477 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8478 8479 // Add one to get alternate byte index 8480 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 8481 } 8482 %} 8483 ins_pipe( pipe_slow ); 8484 %} 8485 8486 instruct rearrangeS(vec dst, vec shuffle) %{ 8487 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8488 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8489 match(Set dst (VectorRearrange dst shuffle)); 8490 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8491 ins_encode %{ 8492 assert(UseSSE >= 4, "required"); 8493 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8494 %} 8495 ins_pipe( pipe_slow ); 8496 %} 8497 8498 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8499 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8500 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8501 match(Set dst (VectorRearrange src shuffle)); 8502 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8503 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8504 ins_encode %{ 8505 assert(UseAVX >= 2, "required"); 8506 // Swap src into vtmp1 8507 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8508 // Shuffle swapped src to get entries from other 128 bit lane 8509 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8510 // Shuffle original src to get entries from self 128 bit lane 8511 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8512 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8513 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8514 // Perform the blend 8515 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8516 %} 8517 ins_pipe( pipe_slow ); 8518 %} 8519 8520 instruct loadShuffleS_evex(vec dst, vec src) %{ 8521 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8522 VM_Version::supports_avx512bw()); 8523 match(Set dst (VectorLoadShuffle src)); 8524 format %{ "vector_load_shuffle $dst, $src" %} 8525 ins_encode %{ 8526 int vlen_enc = vector_length_encoding(this); 8527 if (!VM_Version::supports_avx512vl()) { 8528 vlen_enc = Assembler::AVX_512bit; 8529 } 8530 __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8531 %} 8532 ins_pipe( pipe_slow ); 8533 %} 8534 8535 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8536 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8537 VM_Version::supports_avx512bw()); 8538 match(Set dst (VectorRearrange src shuffle)); 8539 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8540 ins_encode %{ 8541 int vlen_enc = vector_length_encoding(this); 8542 if (!VM_Version::supports_avx512vl()) { 8543 vlen_enc = Assembler::AVX_512bit; 8544 } 8545 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8546 %} 8547 ins_pipe( pipe_slow ); 8548 %} 8549 8550 // LoadShuffle/Rearrange for Integer and Float 8551 8552 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 8553 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8554 Matcher::vector_length(n) == 4 && UseAVX == 0); 8555 match(Set dst (VectorLoadShuffle src)); 8556 effect(TEMP dst, TEMP vtmp); 8557 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8558 ins_encode %{ 8559 assert(UseSSE >= 4, "required"); 8560 8561 // Create a byte shuffle mask from int shuffle mask 8562 // only byte shuffle instruction available on these platforms 8563 8564 // Duplicate and multiply each shuffle by 4 8565 __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); 8566 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8567 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8568 __ psllw($vtmp$$XMMRegister, 2); 8569 8570 // Duplicate again to create 4 copies of byte index 8571 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8572 __ psllw($dst$$XMMRegister, 8); 8573 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 8574 8575 // Add 3,2,1,0 to get alternate byte index 8576 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 8577 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8578 %} 8579 ins_pipe( pipe_slow ); 8580 %} 8581 8582 instruct rearrangeI(vec dst, vec shuffle) %{ 8583 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8584 UseAVX == 0); 8585 match(Set dst (VectorRearrange dst shuffle)); 8586 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8587 ins_encode %{ 8588 assert(UseSSE >= 4, "required"); 8589 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8590 %} 8591 ins_pipe( pipe_slow ); 8592 %} 8593 8594 instruct loadShuffleI_avx(vec dst, vec src) %{ 8595 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8596 UseAVX > 0); 8597 match(Set dst (VectorLoadShuffle src)); 8598 format %{ "vector_load_shuffle $dst, $src" %} 8599 ins_encode %{ 8600 int vlen_enc = vector_length_encoding(this); 8601 __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8602 %} 8603 ins_pipe( pipe_slow ); 8604 %} 8605 8606 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 8607 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8608 UseAVX > 0); 8609 match(Set dst (VectorRearrange src shuffle)); 8610 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8611 ins_encode %{ 8612 int vlen_enc = vector_length_encoding(this); 8613 BasicType bt = Matcher::vector_element_basic_type(this); 8614 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8615 %} 8616 ins_pipe( pipe_slow ); 8617 %} 8618 8619 // LoadShuffle/Rearrange for Long and Double 8620 8621 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 8622 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8623 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8624 match(Set dst (VectorLoadShuffle src)); 8625 effect(TEMP dst, TEMP vtmp); 8626 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8627 ins_encode %{ 8628 assert(UseAVX >= 2, "required"); 8629 8630 int vlen_enc = vector_length_encoding(this); 8631 // Create a double word shuffle mask from long shuffle mask 8632 // only double word shuffle instruction available on these platforms 8633 8634 // Multiply each shuffle by two to get double word index 8635 __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8636 __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8637 8638 // Duplicate each double word shuffle 8639 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 8640 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8641 8642 // Add one to get alternate double word index 8643 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 8644 %} 8645 ins_pipe( pipe_slow ); 8646 %} 8647 8648 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 8649 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8650 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8651 match(Set dst (VectorRearrange src shuffle)); 8652 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8653 ins_encode %{ 8654 assert(UseAVX >= 2, "required"); 8655 8656 int vlen_enc = vector_length_encoding(this); 8657 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8658 %} 8659 ins_pipe( pipe_slow ); 8660 %} 8661 8662 instruct loadShuffleL_evex(vec dst, vec src) %{ 8663 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8664 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8665 match(Set dst (VectorLoadShuffle src)); 8666 format %{ "vector_load_shuffle $dst, $src" %} 8667 ins_encode %{ 8668 assert(UseAVX > 2, "required"); 8669 8670 int vlen_enc = vector_length_encoding(this); 8671 __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8672 %} 8673 ins_pipe( pipe_slow ); 8674 %} 8675 8676 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 8677 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8678 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8679 match(Set dst (VectorRearrange src shuffle)); 8680 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8681 ins_encode %{ 8682 assert(UseAVX > 2, "required"); 8683 8684 int vlen_enc = vector_length_encoding(this); 8685 if (vlen_enc == Assembler::AVX_128bit) { 8686 vlen_enc = Assembler::AVX_256bit; 8687 } 8688 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8689 %} 8690 ins_pipe( pipe_slow ); 8691 %} 8692 8693 // --------------------------------- FMA -------------------------------------- 8694 // a * b + c 8695 8696 instruct vfmaF_reg(vec a, vec b, vec c) %{ 8697 match(Set c (FmaVF c (Binary a b))); 8698 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8699 ins_cost(150); 8700 ins_encode %{ 8701 assert(UseFMA, "not enabled"); 8702 int vlen_enc = vector_length_encoding(this); 8703 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8704 %} 8705 ins_pipe( pipe_slow ); 8706 %} 8707 8708 instruct vfmaF_mem(vec a, memory b, vec c) %{ 8709 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8710 match(Set c (FmaVF c (Binary a (LoadVector b)))); 8711 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8712 ins_cost(150); 8713 ins_encode %{ 8714 assert(UseFMA, "not enabled"); 8715 int vlen_enc = vector_length_encoding(this); 8716 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8717 %} 8718 ins_pipe( pipe_slow ); 8719 %} 8720 8721 instruct vfmaD_reg(vec a, vec b, vec c) %{ 8722 match(Set c (FmaVD c (Binary a b))); 8723 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8724 ins_cost(150); 8725 ins_encode %{ 8726 assert(UseFMA, "not enabled"); 8727 int vlen_enc = vector_length_encoding(this); 8728 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8729 %} 8730 ins_pipe( pipe_slow ); 8731 %} 8732 8733 instruct vfmaD_mem(vec a, memory b, vec c) %{ 8734 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8735 match(Set c (FmaVD c (Binary a (LoadVector b)))); 8736 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8737 ins_cost(150); 8738 ins_encode %{ 8739 assert(UseFMA, "not enabled"); 8740 int vlen_enc = vector_length_encoding(this); 8741 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8742 %} 8743 ins_pipe( pipe_slow ); 8744 %} 8745 8746 // --------------------------------- Vector Multiply Add -------------------------------------- 8747 8748 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 8749 predicate(UseAVX == 0); 8750 match(Set dst (MulAddVS2VI dst src1)); 8751 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 8752 ins_encode %{ 8753 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 8754 %} 8755 ins_pipe( pipe_slow ); 8756 %} 8757 8758 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 8759 predicate(UseAVX > 0); 8760 match(Set dst (MulAddVS2VI src1 src2)); 8761 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 8762 ins_encode %{ 8763 int vlen_enc = vector_length_encoding(this); 8764 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8765 %} 8766 ins_pipe( pipe_slow ); 8767 %} 8768 8769 // --------------------------------- Vector Multiply Add Add ---------------------------------- 8770 8771 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 8772 predicate(VM_Version::supports_avx512_vnni()); 8773 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 8774 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 8775 ins_encode %{ 8776 assert(UseAVX > 2, "required"); 8777 int vlen_enc = vector_length_encoding(this); 8778 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8779 %} 8780 ins_pipe( pipe_slow ); 8781 ins_cost(10); 8782 %} 8783 8784 // --------------------------------- PopCount -------------------------------------- 8785 8786 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 8787 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8788 match(Set dst (PopCountVI src)); 8789 match(Set dst (PopCountVL src)); 8790 format %{ "vector_popcount_integral $dst, $src" %} 8791 ins_encode %{ 8792 int opcode = this->ideal_Opcode(); 8793 int vlen_enc = vector_length_encoding(this, $src); 8794 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8795 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 8796 %} 8797 ins_pipe( pipe_slow ); 8798 %} 8799 8800 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 8801 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8802 match(Set dst (PopCountVI src mask)); 8803 match(Set dst (PopCountVL src mask)); 8804 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 8805 ins_encode %{ 8806 int vlen_enc = vector_length_encoding(this, $src); 8807 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8808 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8809 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 8810 %} 8811 ins_pipe( pipe_slow ); 8812 %} 8813 8814 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 8815 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8816 match(Set dst (PopCountVI src)); 8817 match(Set dst (PopCountVL src)); 8818 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 8819 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 8820 ins_encode %{ 8821 int opcode = this->ideal_Opcode(); 8822 int vlen_enc = vector_length_encoding(this, $src); 8823 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8824 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8825 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 8826 %} 8827 ins_pipe( pipe_slow ); 8828 %} 8829 8830 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 8831 8832 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 8833 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 8834 Matcher::vector_length_in_bytes(n->in(1)))); 8835 match(Set dst (CountTrailingZerosV src)); 8836 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 8837 ins_cost(400); 8838 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 8839 ins_encode %{ 8840 int vlen_enc = vector_length_encoding(this, $src); 8841 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8842 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 8843 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 8844 %} 8845 ins_pipe( pipe_slow ); 8846 %} 8847 8848 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 8849 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 8850 VM_Version::supports_avx512cd() && 8851 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 8852 match(Set dst (CountTrailingZerosV src)); 8853 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 8854 ins_cost(400); 8855 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 8856 ins_encode %{ 8857 int vlen_enc = vector_length_encoding(this, $src); 8858 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8859 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8860 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 8861 %} 8862 ins_pipe( pipe_slow ); 8863 %} 8864 8865 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 8866 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 8867 match(Set dst (CountTrailingZerosV src)); 8868 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 8869 ins_cost(400); 8870 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 8871 ins_encode %{ 8872 int vlen_enc = vector_length_encoding(this, $src); 8873 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8874 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8875 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 8876 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 8877 %} 8878 ins_pipe( pipe_slow ); 8879 %} 8880 8881 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 8882 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 8883 match(Set dst (CountTrailingZerosV src)); 8884 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 8885 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 8886 ins_encode %{ 8887 int vlen_enc = vector_length_encoding(this, $src); 8888 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8889 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8890 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 8891 %} 8892 ins_pipe( pipe_slow ); 8893 %} 8894 8895 8896 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 8897 8898 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 8899 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 8900 effect(TEMP dst); 8901 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8902 ins_encode %{ 8903 int vector_len = vector_length_encoding(this); 8904 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 8905 %} 8906 ins_pipe( pipe_slow ); 8907 %} 8908 8909 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 8910 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 8911 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 8912 effect(TEMP dst); 8913 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8914 ins_encode %{ 8915 int vector_len = vector_length_encoding(this); 8916 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 8917 %} 8918 ins_pipe( pipe_slow ); 8919 %} 8920 8921 // --------------------------------- Rotation Operations ---------------------------------- 8922 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 8923 match(Set dst (RotateLeftV src shift)); 8924 match(Set dst (RotateRightV src shift)); 8925 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 8926 ins_encode %{ 8927 int opcode = this->ideal_Opcode(); 8928 int vector_len = vector_length_encoding(this); 8929 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 8930 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 8931 %} 8932 ins_pipe( pipe_slow ); 8933 %} 8934 8935 instruct vprorate(vec dst, vec src, vec shift) %{ 8936 match(Set dst (RotateLeftV src shift)); 8937 match(Set dst (RotateRightV src shift)); 8938 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 8939 ins_encode %{ 8940 int opcode = this->ideal_Opcode(); 8941 int vector_len = vector_length_encoding(this); 8942 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 8943 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8944 %} 8945 ins_pipe( pipe_slow ); 8946 %} 8947 8948 // ---------------------------------- Masked Operations ------------------------------------ 8949 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 8950 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 8951 match(Set dst (LoadVectorMasked mem mask)); 8952 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 8953 ins_encode %{ 8954 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 8955 int vlen_enc = vector_length_encoding(this); 8956 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 8957 %} 8958 ins_pipe( pipe_slow ); 8959 %} 8960 8961 8962 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 8963 predicate(n->in(3)->bottom_type()->isa_vectmask()); 8964 match(Set dst (LoadVectorMasked mem mask)); 8965 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 8966 ins_encode %{ 8967 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 8968 int vector_len = vector_length_encoding(this); 8969 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 8970 %} 8971 ins_pipe( pipe_slow ); 8972 %} 8973 8974 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 8975 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 8976 match(Set mem (StoreVectorMasked mem (Binary src mask))); 8977 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 8978 ins_encode %{ 8979 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 8980 int vlen_enc = vector_length_encoding(src_node); 8981 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 8982 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8983 %} 8984 ins_pipe( pipe_slow ); 8985 %} 8986 8987 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 8988 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 8989 match(Set mem (StoreVectorMasked mem (Binary src mask))); 8990 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 8991 ins_encode %{ 8992 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 8993 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 8994 int vlen_enc = vector_length_encoding(src_node); 8995 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 8996 %} 8997 ins_pipe( pipe_slow ); 8998 %} 8999 9000 #ifdef _LP64 9001 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9002 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9003 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9004 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9005 ins_encode %{ 9006 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9007 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9008 9009 Label DONE; 9010 int vlen_enc = vector_length_encoding(this, $src1); 9011 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9012 9013 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9014 __ mov64($dst$$Register, -1L); 9015 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9016 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9017 __ jccb(Assembler::carrySet, DONE); 9018 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9019 __ notq($dst$$Register); 9020 __ tzcntq($dst$$Register, $dst$$Register); 9021 __ bind(DONE); 9022 %} 9023 ins_pipe( pipe_slow ); 9024 %} 9025 9026 9027 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9028 match(Set dst (VectorMaskGen len)); 9029 effect(TEMP temp, KILL cr); 9030 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9031 ins_encode %{ 9032 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9033 %} 9034 ins_pipe( pipe_slow ); 9035 %} 9036 9037 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9038 match(Set dst (VectorMaskGen len)); 9039 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9040 effect(TEMP temp); 9041 ins_encode %{ 9042 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9043 __ kmovql($dst$$KRegister, $temp$$Register); 9044 %} 9045 ins_pipe( pipe_slow ); 9046 %} 9047 9048 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9049 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9050 match(Set dst (VectorMaskToLong mask)); 9051 effect(TEMP dst, KILL cr); 9052 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9053 ins_encode %{ 9054 int opcode = this->ideal_Opcode(); 9055 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9056 int mask_len = Matcher::vector_length(this, $mask); 9057 int mask_size = mask_len * type2aelembytes(mbt); 9058 int vlen_enc = vector_length_encoding(this, $mask); 9059 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9060 $dst$$Register, mask_len, mask_size, vlen_enc); 9061 %} 9062 ins_pipe( pipe_slow ); 9063 %} 9064 9065 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9066 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9067 match(Set dst (VectorMaskToLong mask)); 9068 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9069 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9070 ins_encode %{ 9071 int opcode = this->ideal_Opcode(); 9072 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9073 int mask_len = Matcher::vector_length(this, $mask); 9074 int vlen_enc = vector_length_encoding(this, $mask); 9075 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9076 $dst$$Register, mask_len, mbt, vlen_enc); 9077 %} 9078 ins_pipe( pipe_slow ); 9079 %} 9080 9081 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9082 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9083 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9084 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9085 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9086 ins_encode %{ 9087 int opcode = this->ideal_Opcode(); 9088 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9089 int mask_len = Matcher::vector_length(this, $mask); 9090 int vlen_enc = vector_length_encoding(this, $mask); 9091 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9092 $dst$$Register, mask_len, mbt, vlen_enc); 9093 %} 9094 ins_pipe( pipe_slow ); 9095 %} 9096 9097 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9098 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9099 match(Set dst (VectorMaskTrueCount mask)); 9100 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9101 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9102 ins_encode %{ 9103 int opcode = this->ideal_Opcode(); 9104 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9105 int mask_len = Matcher::vector_length(this, $mask); 9106 int mask_size = mask_len * type2aelembytes(mbt); 9107 int vlen_enc = vector_length_encoding(this, $mask); 9108 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9109 $tmp$$Register, mask_len, mask_size, vlen_enc); 9110 %} 9111 ins_pipe( pipe_slow ); 9112 %} 9113 9114 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9115 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9116 match(Set dst (VectorMaskTrueCount mask)); 9117 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9118 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9119 ins_encode %{ 9120 int opcode = this->ideal_Opcode(); 9121 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9122 int mask_len = Matcher::vector_length(this, $mask); 9123 int vlen_enc = vector_length_encoding(this, $mask); 9124 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9125 $tmp$$Register, mask_len, mbt, vlen_enc); 9126 %} 9127 ins_pipe( pipe_slow ); 9128 %} 9129 9130 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9131 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9132 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9133 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9134 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9135 ins_encode %{ 9136 int opcode = this->ideal_Opcode(); 9137 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9138 int mask_len = Matcher::vector_length(this, $mask); 9139 int vlen_enc = vector_length_encoding(this, $mask); 9140 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9141 $tmp$$Register, mask_len, mbt, vlen_enc); 9142 %} 9143 ins_pipe( pipe_slow ); 9144 %} 9145 9146 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9147 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9148 match(Set dst (VectorMaskFirstTrue mask)); 9149 match(Set dst (VectorMaskLastTrue mask)); 9150 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9151 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9152 ins_encode %{ 9153 int opcode = this->ideal_Opcode(); 9154 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9155 int mask_len = Matcher::vector_length(this, $mask); 9156 int mask_size = mask_len * type2aelembytes(mbt); 9157 int vlen_enc = vector_length_encoding(this, $mask); 9158 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9159 $tmp$$Register, mask_len, mask_size, vlen_enc); 9160 %} 9161 ins_pipe( pipe_slow ); 9162 %} 9163 9164 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9165 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9166 match(Set dst (VectorMaskFirstTrue mask)); 9167 match(Set dst (VectorMaskLastTrue mask)); 9168 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9169 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9170 ins_encode %{ 9171 int opcode = this->ideal_Opcode(); 9172 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9173 int mask_len = Matcher::vector_length(this, $mask); 9174 int vlen_enc = vector_length_encoding(this, $mask); 9175 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9176 $tmp$$Register, mask_len, mbt, vlen_enc); 9177 %} 9178 ins_pipe( pipe_slow ); 9179 %} 9180 9181 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9182 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9183 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9184 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9185 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9186 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9187 ins_encode %{ 9188 int opcode = this->ideal_Opcode(); 9189 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9190 int mask_len = Matcher::vector_length(this, $mask); 9191 int vlen_enc = vector_length_encoding(this, $mask); 9192 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9193 $tmp$$Register, mask_len, mbt, vlen_enc); 9194 %} 9195 ins_pipe( pipe_slow ); 9196 %} 9197 9198 // --------------------------------- Compress/Expand Operations --------------------------- 9199 9200 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9201 match(Set dst (CompressV src mask)); 9202 match(Set dst (ExpandV src mask)); 9203 format %{ "vector_compress_expand $dst, $src, $mask" %} 9204 ins_encode %{ 9205 int opcode = this->ideal_Opcode(); 9206 int vector_len = vector_length_encoding(this); 9207 BasicType bt = Matcher::vector_element_basic_type(this); 9208 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9209 %} 9210 ins_pipe( pipe_slow ); 9211 %} 9212 9213 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9214 match(Set dst (CompressM mask)); 9215 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9216 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9217 ins_encode %{ 9218 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9219 int mask_len = Matcher::vector_length(this); 9220 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9221 %} 9222 ins_pipe( pipe_slow ); 9223 %} 9224 9225 #endif // _LP64 9226 9227 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9228 9229 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9230 predicate(!VM_Version::supports_gfni()); 9231 match(Set dst (ReverseV src)); 9232 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9233 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9234 ins_encode %{ 9235 int vec_enc = vector_length_encoding(this); 9236 BasicType bt = Matcher::vector_element_basic_type(this); 9237 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9238 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9239 %} 9240 ins_pipe( pipe_slow ); 9241 %} 9242 9243 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9244 predicate(VM_Version::supports_gfni()); 9245 match(Set dst (ReverseV src)); 9246 effect(TEMP dst, TEMP xtmp); 9247 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9248 ins_encode %{ 9249 int vec_enc = vector_length_encoding(this); 9250 BasicType bt = Matcher::vector_element_basic_type(this); 9251 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1)); 9252 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9253 $xtmp$$XMMRegister); 9254 %} 9255 ins_pipe( pipe_slow ); 9256 %} 9257 9258 instruct vreverse_byte_reg(vec dst, vec src) %{ 9259 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9260 match(Set dst (ReverseBytesV src)); 9261 effect(TEMP dst); 9262 format %{ "vector_reverse_byte $dst, $src" %} 9263 ins_encode %{ 9264 int vec_enc = vector_length_encoding(this); 9265 BasicType bt = Matcher::vector_element_basic_type(this); 9266 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9267 %} 9268 ins_pipe( pipe_slow ); 9269 %} 9270 9271 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9272 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9273 match(Set dst (ReverseBytesV src)); 9274 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9275 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9276 ins_encode %{ 9277 int vec_enc = vector_length_encoding(this); 9278 BasicType bt = Matcher::vector_element_basic_type(this); 9279 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9280 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9281 %} 9282 ins_pipe( pipe_slow ); 9283 %} 9284 9285 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9286 9287 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9288 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9289 Matcher::vector_length_in_bytes(n->in(1)))); 9290 match(Set dst (CountLeadingZerosV src)); 9291 format %{ "vector_count_leading_zeros $dst, $src" %} 9292 ins_encode %{ 9293 int vlen_enc = vector_length_encoding(this, $src); 9294 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9295 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9296 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9297 %} 9298 ins_pipe( pipe_slow ); 9299 %} 9300 9301 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9302 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9303 Matcher::vector_length_in_bytes(n->in(1)))); 9304 match(Set dst (CountLeadingZerosV src mask)); 9305 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9306 ins_encode %{ 9307 int vlen_enc = vector_length_encoding(this, $src); 9308 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9309 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9310 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9311 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9312 %} 9313 ins_pipe( pipe_slow ); 9314 %} 9315 9316 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9317 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9318 VM_Version::supports_avx512cd() && 9319 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9320 match(Set dst (CountLeadingZerosV src)); 9321 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9322 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9323 ins_encode %{ 9324 int vlen_enc = vector_length_encoding(this, $src); 9325 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9326 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9327 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9328 %} 9329 ins_pipe( pipe_slow ); 9330 %} 9331 9332 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9333 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9334 match(Set dst (CountLeadingZerosV src)); 9335 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9336 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9337 ins_encode %{ 9338 int vlen_enc = vector_length_encoding(this, $src); 9339 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9340 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9341 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9342 $rtmp$$Register, true, vlen_enc); 9343 %} 9344 ins_pipe( pipe_slow ); 9345 %} 9346 9347 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9348 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9349 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9350 match(Set dst (CountLeadingZerosV src)); 9351 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9352 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9353 ins_encode %{ 9354 int vlen_enc = vector_length_encoding(this, $src); 9355 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9356 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9357 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9358 %} 9359 ins_pipe( pipe_slow ); 9360 %} 9361 9362 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9363 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9364 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9365 match(Set dst (CountLeadingZerosV src)); 9366 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9367 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9368 ins_encode %{ 9369 int vlen_enc = vector_length_encoding(this, $src); 9370 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9371 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9372 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9373 %} 9374 ins_pipe( pipe_slow ); 9375 %} 9376 9377 // ---------------------------------- Vector Masked Operations ------------------------------------ 9378 9379 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9380 match(Set dst (AddVB (Binary dst src2) mask)); 9381 match(Set dst (AddVS (Binary dst src2) mask)); 9382 match(Set dst (AddVI (Binary dst src2) mask)); 9383 match(Set dst (AddVL (Binary dst src2) mask)); 9384 match(Set dst (AddVF (Binary dst src2) mask)); 9385 match(Set dst (AddVD (Binary dst src2) mask)); 9386 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9387 ins_encode %{ 9388 int vlen_enc = vector_length_encoding(this); 9389 BasicType bt = Matcher::vector_element_basic_type(this); 9390 int opc = this->ideal_Opcode(); 9391 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9392 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9393 %} 9394 ins_pipe( pipe_slow ); 9395 %} 9396 9397 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9398 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9399 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9400 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9401 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9402 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9403 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9404 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9405 ins_encode %{ 9406 int vlen_enc = vector_length_encoding(this); 9407 BasicType bt = Matcher::vector_element_basic_type(this); 9408 int opc = this->ideal_Opcode(); 9409 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9410 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9411 %} 9412 ins_pipe( pipe_slow ); 9413 %} 9414 9415 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9416 match(Set dst (XorV (Binary dst src2) mask)); 9417 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9418 ins_encode %{ 9419 int vlen_enc = vector_length_encoding(this); 9420 BasicType bt = Matcher::vector_element_basic_type(this); 9421 int opc = this->ideal_Opcode(); 9422 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9423 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9424 %} 9425 ins_pipe( pipe_slow ); 9426 %} 9427 9428 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9429 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9430 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9431 ins_encode %{ 9432 int vlen_enc = vector_length_encoding(this); 9433 BasicType bt = Matcher::vector_element_basic_type(this); 9434 int opc = this->ideal_Opcode(); 9435 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9436 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9437 %} 9438 ins_pipe( pipe_slow ); 9439 %} 9440 9441 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9442 match(Set dst (OrV (Binary dst src2) mask)); 9443 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9444 ins_encode %{ 9445 int vlen_enc = vector_length_encoding(this); 9446 BasicType bt = Matcher::vector_element_basic_type(this); 9447 int opc = this->ideal_Opcode(); 9448 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9449 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9450 %} 9451 ins_pipe( pipe_slow ); 9452 %} 9453 9454 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9455 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9456 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9457 ins_encode %{ 9458 int vlen_enc = vector_length_encoding(this); 9459 BasicType bt = Matcher::vector_element_basic_type(this); 9460 int opc = this->ideal_Opcode(); 9461 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9462 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9463 %} 9464 ins_pipe( pipe_slow ); 9465 %} 9466 9467 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9468 match(Set dst (AndV (Binary dst src2) mask)); 9469 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9470 ins_encode %{ 9471 int vlen_enc = vector_length_encoding(this); 9472 BasicType bt = Matcher::vector_element_basic_type(this); 9473 int opc = this->ideal_Opcode(); 9474 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9475 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9476 %} 9477 ins_pipe( pipe_slow ); 9478 %} 9479 9480 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9481 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9482 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9483 ins_encode %{ 9484 int vlen_enc = vector_length_encoding(this); 9485 BasicType bt = Matcher::vector_element_basic_type(this); 9486 int opc = this->ideal_Opcode(); 9487 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9488 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9489 %} 9490 ins_pipe( pipe_slow ); 9491 %} 9492 9493 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9494 match(Set dst (SubVB (Binary dst src2) mask)); 9495 match(Set dst (SubVS (Binary dst src2) mask)); 9496 match(Set dst (SubVI (Binary dst src2) mask)); 9497 match(Set dst (SubVL (Binary dst src2) mask)); 9498 match(Set dst (SubVF (Binary dst src2) mask)); 9499 match(Set dst (SubVD (Binary dst src2) mask)); 9500 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9501 ins_encode %{ 9502 int vlen_enc = vector_length_encoding(this); 9503 BasicType bt = Matcher::vector_element_basic_type(this); 9504 int opc = this->ideal_Opcode(); 9505 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9506 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9507 %} 9508 ins_pipe( pipe_slow ); 9509 %} 9510 9511 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9512 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9513 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9514 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9515 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9516 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9517 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9518 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9519 ins_encode %{ 9520 int vlen_enc = vector_length_encoding(this); 9521 BasicType bt = Matcher::vector_element_basic_type(this); 9522 int opc = this->ideal_Opcode(); 9523 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9524 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9525 %} 9526 ins_pipe( pipe_slow ); 9527 %} 9528 9529 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9530 match(Set dst (MulVS (Binary dst src2) mask)); 9531 match(Set dst (MulVI (Binary dst src2) mask)); 9532 match(Set dst (MulVL (Binary dst src2) mask)); 9533 match(Set dst (MulVF (Binary dst src2) mask)); 9534 match(Set dst (MulVD (Binary dst src2) mask)); 9535 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9536 ins_encode %{ 9537 int vlen_enc = vector_length_encoding(this); 9538 BasicType bt = Matcher::vector_element_basic_type(this); 9539 int opc = this->ideal_Opcode(); 9540 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9541 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9542 %} 9543 ins_pipe( pipe_slow ); 9544 %} 9545 9546 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 9547 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 9548 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 9549 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 9550 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 9551 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 9552 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9553 ins_encode %{ 9554 int vlen_enc = vector_length_encoding(this); 9555 BasicType bt = Matcher::vector_element_basic_type(this); 9556 int opc = this->ideal_Opcode(); 9557 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9558 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9559 %} 9560 ins_pipe( pipe_slow ); 9561 %} 9562 9563 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 9564 match(Set dst (SqrtVF dst mask)); 9565 match(Set dst (SqrtVD dst mask)); 9566 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 9567 ins_encode %{ 9568 int vlen_enc = vector_length_encoding(this); 9569 BasicType bt = Matcher::vector_element_basic_type(this); 9570 int opc = this->ideal_Opcode(); 9571 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9572 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9573 %} 9574 ins_pipe( pipe_slow ); 9575 %} 9576 9577 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 9578 match(Set dst (DivVF (Binary dst src2) mask)); 9579 match(Set dst (DivVD (Binary dst src2) mask)); 9580 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9581 ins_encode %{ 9582 int vlen_enc = vector_length_encoding(this); 9583 BasicType bt = Matcher::vector_element_basic_type(this); 9584 int opc = this->ideal_Opcode(); 9585 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9586 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9587 %} 9588 ins_pipe( pipe_slow ); 9589 %} 9590 9591 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 9592 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 9593 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 9594 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9595 ins_encode %{ 9596 int vlen_enc = vector_length_encoding(this); 9597 BasicType bt = Matcher::vector_element_basic_type(this); 9598 int opc = this->ideal_Opcode(); 9599 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9600 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9601 %} 9602 ins_pipe( pipe_slow ); 9603 %} 9604 9605 9606 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9607 match(Set dst (RotateLeftV (Binary dst shift) mask)); 9608 match(Set dst (RotateRightV (Binary dst shift) mask)); 9609 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 9610 ins_encode %{ 9611 int vlen_enc = vector_length_encoding(this); 9612 BasicType bt = Matcher::vector_element_basic_type(this); 9613 int opc = this->ideal_Opcode(); 9614 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9615 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9616 %} 9617 ins_pipe( pipe_slow ); 9618 %} 9619 9620 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 9621 match(Set dst (RotateLeftV (Binary dst src2) mask)); 9622 match(Set dst (RotateRightV (Binary dst src2) mask)); 9623 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 9624 ins_encode %{ 9625 int vlen_enc = vector_length_encoding(this); 9626 BasicType bt = Matcher::vector_element_basic_type(this); 9627 int opc = this->ideal_Opcode(); 9628 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9629 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9630 %} 9631 ins_pipe( pipe_slow ); 9632 %} 9633 9634 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9635 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 9636 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 9637 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 9638 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 9639 ins_encode %{ 9640 int vlen_enc = vector_length_encoding(this); 9641 BasicType bt = Matcher::vector_element_basic_type(this); 9642 int opc = this->ideal_Opcode(); 9643 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9644 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9645 %} 9646 ins_pipe( pipe_slow ); 9647 %} 9648 9649 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9650 predicate(!n->as_ShiftV()->is_var_shift()); 9651 match(Set dst (LShiftVS (Binary dst src2) mask)); 9652 match(Set dst (LShiftVI (Binary dst src2) mask)); 9653 match(Set dst (LShiftVL (Binary dst src2) mask)); 9654 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9655 ins_encode %{ 9656 int vlen_enc = vector_length_encoding(this); 9657 BasicType bt = Matcher::vector_element_basic_type(this); 9658 int opc = this->ideal_Opcode(); 9659 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9660 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9661 %} 9662 ins_pipe( pipe_slow ); 9663 %} 9664 9665 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9666 predicate(n->as_ShiftV()->is_var_shift()); 9667 match(Set dst (LShiftVS (Binary dst src2) mask)); 9668 match(Set dst (LShiftVI (Binary dst src2) mask)); 9669 match(Set dst (LShiftVL (Binary dst src2) mask)); 9670 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9671 ins_encode %{ 9672 int vlen_enc = vector_length_encoding(this); 9673 BasicType bt = Matcher::vector_element_basic_type(this); 9674 int opc = this->ideal_Opcode(); 9675 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9676 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9677 %} 9678 ins_pipe( pipe_slow ); 9679 %} 9680 9681 instruct vlshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9682 match(Set dst (LShiftVS (Binary dst (LoadVector src2)) mask)); 9683 match(Set dst (LShiftVI (Binary dst (LoadVector src2)) mask)); 9684 match(Set dst (LShiftVL (Binary dst (LoadVector src2)) mask)); 9685 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9686 ins_encode %{ 9687 int vlen_enc = vector_length_encoding(this); 9688 BasicType bt = Matcher::vector_element_basic_type(this); 9689 int opc = this->ideal_Opcode(); 9690 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9691 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9692 %} 9693 ins_pipe( pipe_slow ); 9694 %} 9695 9696 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9697 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 9698 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 9699 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 9700 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 9701 ins_encode %{ 9702 int vlen_enc = vector_length_encoding(this); 9703 BasicType bt = Matcher::vector_element_basic_type(this); 9704 int opc = this->ideal_Opcode(); 9705 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9706 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9707 %} 9708 ins_pipe( pipe_slow ); 9709 %} 9710 9711 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9712 predicate(!n->as_ShiftV()->is_var_shift()); 9713 match(Set dst (RShiftVS (Binary dst src2) mask)); 9714 match(Set dst (RShiftVI (Binary dst src2) mask)); 9715 match(Set dst (RShiftVL (Binary dst src2) mask)); 9716 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9717 ins_encode %{ 9718 int vlen_enc = vector_length_encoding(this); 9719 BasicType bt = Matcher::vector_element_basic_type(this); 9720 int opc = this->ideal_Opcode(); 9721 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9722 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9723 %} 9724 ins_pipe( pipe_slow ); 9725 %} 9726 9727 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9728 predicate(n->as_ShiftV()->is_var_shift()); 9729 match(Set dst (RShiftVS (Binary dst src2) mask)); 9730 match(Set dst (RShiftVI (Binary dst src2) mask)); 9731 match(Set dst (RShiftVL (Binary dst src2) mask)); 9732 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9733 ins_encode %{ 9734 int vlen_enc = vector_length_encoding(this); 9735 BasicType bt = Matcher::vector_element_basic_type(this); 9736 int opc = this->ideal_Opcode(); 9737 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9738 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9739 %} 9740 ins_pipe( pipe_slow ); 9741 %} 9742 9743 instruct vrshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9744 match(Set dst (RShiftVS (Binary dst (LoadVector src2)) mask)); 9745 match(Set dst (RShiftVI (Binary dst (LoadVector src2)) mask)); 9746 match(Set dst (RShiftVL (Binary dst (LoadVector src2)) mask)); 9747 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9748 ins_encode %{ 9749 int vlen_enc = vector_length_encoding(this); 9750 BasicType bt = Matcher::vector_element_basic_type(this); 9751 int opc = this->ideal_Opcode(); 9752 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9753 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9754 %} 9755 ins_pipe( pipe_slow ); 9756 %} 9757 9758 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9759 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 9760 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 9761 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 9762 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 9763 ins_encode %{ 9764 int vlen_enc = vector_length_encoding(this); 9765 BasicType bt = Matcher::vector_element_basic_type(this); 9766 int opc = this->ideal_Opcode(); 9767 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9768 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9769 %} 9770 ins_pipe( pipe_slow ); 9771 %} 9772 9773 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9774 predicate(!n->as_ShiftV()->is_var_shift()); 9775 match(Set dst (URShiftVS (Binary dst src2) mask)); 9776 match(Set dst (URShiftVI (Binary dst src2) mask)); 9777 match(Set dst (URShiftVL (Binary dst src2) mask)); 9778 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9779 ins_encode %{ 9780 int vlen_enc = vector_length_encoding(this); 9781 BasicType bt = Matcher::vector_element_basic_type(this); 9782 int opc = this->ideal_Opcode(); 9783 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9784 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9785 %} 9786 ins_pipe( pipe_slow ); 9787 %} 9788 9789 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9790 predicate(n->as_ShiftV()->is_var_shift()); 9791 match(Set dst (URShiftVS (Binary dst src2) mask)); 9792 match(Set dst (URShiftVI (Binary dst src2) mask)); 9793 match(Set dst (URShiftVL (Binary dst src2) mask)); 9794 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9795 ins_encode %{ 9796 int vlen_enc = vector_length_encoding(this); 9797 BasicType bt = Matcher::vector_element_basic_type(this); 9798 int opc = this->ideal_Opcode(); 9799 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9800 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9801 %} 9802 ins_pipe( pipe_slow ); 9803 %} 9804 9805 instruct vurshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9806 match(Set dst (URShiftVS (Binary dst (LoadVector src2)) mask)); 9807 match(Set dst (URShiftVI (Binary dst (LoadVector src2)) mask)); 9808 match(Set dst (URShiftVL (Binary dst (LoadVector src2)) mask)); 9809 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9810 ins_encode %{ 9811 int vlen_enc = vector_length_encoding(this); 9812 BasicType bt = Matcher::vector_element_basic_type(this); 9813 int opc = this->ideal_Opcode(); 9814 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9815 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9816 %} 9817 ins_pipe( pipe_slow ); 9818 %} 9819 9820 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 9821 match(Set dst (MaxV (Binary dst src2) mask)); 9822 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 9823 ins_encode %{ 9824 int vlen_enc = vector_length_encoding(this); 9825 BasicType bt = Matcher::vector_element_basic_type(this); 9826 int opc = this->ideal_Opcode(); 9827 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9828 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9829 %} 9830 ins_pipe( pipe_slow ); 9831 %} 9832 9833 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 9834 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 9835 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 9836 ins_encode %{ 9837 int vlen_enc = vector_length_encoding(this); 9838 BasicType bt = Matcher::vector_element_basic_type(this); 9839 int opc = this->ideal_Opcode(); 9840 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9841 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9842 %} 9843 ins_pipe( pipe_slow ); 9844 %} 9845 9846 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 9847 match(Set dst (MinV (Binary dst src2) mask)); 9848 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 9849 ins_encode %{ 9850 int vlen_enc = vector_length_encoding(this); 9851 BasicType bt = Matcher::vector_element_basic_type(this); 9852 int opc = this->ideal_Opcode(); 9853 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9854 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9855 %} 9856 ins_pipe( pipe_slow ); 9857 %} 9858 9859 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 9860 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 9861 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 9862 ins_encode %{ 9863 int vlen_enc = vector_length_encoding(this); 9864 BasicType bt = Matcher::vector_element_basic_type(this); 9865 int opc = this->ideal_Opcode(); 9866 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9867 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9868 %} 9869 ins_pipe( pipe_slow ); 9870 %} 9871 9872 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 9873 match(Set dst (VectorRearrange (Binary dst src2) mask)); 9874 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 9875 ins_encode %{ 9876 int vlen_enc = vector_length_encoding(this); 9877 BasicType bt = Matcher::vector_element_basic_type(this); 9878 int opc = this->ideal_Opcode(); 9879 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9880 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 9881 %} 9882 ins_pipe( pipe_slow ); 9883 %} 9884 9885 instruct vabs_masked(vec dst, kReg mask) %{ 9886 match(Set dst (AbsVB dst mask)); 9887 match(Set dst (AbsVS dst mask)); 9888 match(Set dst (AbsVI dst mask)); 9889 match(Set dst (AbsVL dst mask)); 9890 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 9891 ins_encode %{ 9892 int vlen_enc = vector_length_encoding(this); 9893 BasicType bt = Matcher::vector_element_basic_type(this); 9894 int opc = this->ideal_Opcode(); 9895 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9896 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9897 %} 9898 ins_pipe( pipe_slow ); 9899 %} 9900 9901 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 9902 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 9903 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 9904 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 9905 ins_encode %{ 9906 assert(UseFMA, "Needs FMA instructions support."); 9907 int vlen_enc = vector_length_encoding(this); 9908 BasicType bt = Matcher::vector_element_basic_type(this); 9909 int opc = this->ideal_Opcode(); 9910 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9911 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 9912 %} 9913 ins_pipe( pipe_slow ); 9914 %} 9915 9916 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 9917 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 9918 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 9919 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 9920 ins_encode %{ 9921 assert(UseFMA, "Needs FMA instructions support."); 9922 int vlen_enc = vector_length_encoding(this); 9923 BasicType bt = Matcher::vector_element_basic_type(this); 9924 int opc = this->ideal_Opcode(); 9925 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9926 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 9927 %} 9928 ins_pipe( pipe_slow ); 9929 %} 9930 9931 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 9932 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 9933 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 9934 ins_encode %{ 9935 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 9936 int vlen_enc = vector_length_encoding(this, $src1); 9937 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 9938 9939 // Comparison i 9940 switch (src1_elem_bt) { 9941 case T_BYTE: { 9942 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 9943 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9944 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9945 break; 9946 } 9947 case T_SHORT: { 9948 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 9949 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9950 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9951 break; 9952 } 9953 case T_INT: { 9954 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 9955 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9956 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9957 break; 9958 } 9959 case T_LONG: { 9960 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 9961 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9962 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9963 break; 9964 } 9965 case T_FLOAT: { 9966 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 9967 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 9968 break; 9969 } 9970 case T_DOUBLE: { 9971 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 9972 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 9973 break; 9974 } 9975 default: assert(false, "%s", type2name(src1_elem_bt)); break; 9976 } 9977 %} 9978 ins_pipe( pipe_slow ); 9979 %} 9980 9981 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 9982 predicate(Matcher::vector_length(n) <= 32); 9983 match(Set dst (MaskAll src)); 9984 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 9985 ins_encode %{ 9986 int mask_len = Matcher::vector_length(this); 9987 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 9988 %} 9989 ins_pipe( pipe_slow ); 9990 %} 9991 9992 #ifdef _LP64 9993 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 9994 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 9995 match(Set dst (XorVMask src (MaskAll cnt))); 9996 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 9997 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 9998 ins_encode %{ 9999 uint masklen = Matcher::vector_length(this); 10000 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10001 %} 10002 ins_pipe( pipe_slow ); 10003 %} 10004 10005 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10006 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10007 (Matcher::vector_length(n) == 16) || 10008 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10009 match(Set dst (XorVMask src (MaskAll cnt))); 10010 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10011 ins_encode %{ 10012 uint masklen = Matcher::vector_length(this); 10013 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10014 %} 10015 ins_pipe( pipe_slow ); 10016 %} 10017 10018 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10019 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10020 match(Set dst (VectorLongToMask src)); 10021 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10022 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10023 ins_encode %{ 10024 int mask_len = Matcher::vector_length(this); 10025 int vec_enc = vector_length_encoding(mask_len); 10026 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10027 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10028 %} 10029 ins_pipe( pipe_slow ); 10030 %} 10031 10032 10033 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10034 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10035 match(Set dst (VectorLongToMask src)); 10036 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10037 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10038 ins_encode %{ 10039 int mask_len = Matcher::vector_length(this); 10040 assert(mask_len <= 32, "invalid mask length"); 10041 int vec_enc = vector_length_encoding(mask_len); 10042 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10043 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10044 %} 10045 ins_pipe( pipe_slow ); 10046 %} 10047 10048 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10049 predicate(n->bottom_type()->isa_vectmask()); 10050 match(Set dst (VectorLongToMask src)); 10051 format %{ "long_to_mask_evex $dst, $src\t!" %} 10052 ins_encode %{ 10053 __ kmov($dst$$KRegister, $src$$Register); 10054 %} 10055 ins_pipe( pipe_slow ); 10056 %} 10057 #endif 10058 10059 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10060 match(Set dst (AndVMask src1 src2)); 10061 match(Set dst (OrVMask src1 src2)); 10062 match(Set dst (XorVMask src1 src2)); 10063 effect(TEMP kscratch); 10064 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10065 ins_encode %{ 10066 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10067 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10068 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 10069 uint masklen = Matcher::vector_length(this); 10070 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10071 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10072 %} 10073 ins_pipe( pipe_slow ); 10074 %} 10075 10076 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10077 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10078 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10079 ins_encode %{ 10080 int vlen_enc = vector_length_encoding(this); 10081 BasicType bt = Matcher::vector_element_basic_type(this); 10082 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10083 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10084 %} 10085 ins_pipe( pipe_slow ); 10086 %} 10087 10088 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10089 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10090 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10091 ins_encode %{ 10092 int vlen_enc = vector_length_encoding(this); 10093 BasicType bt = Matcher::vector_element_basic_type(this); 10094 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10095 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10096 %} 10097 ins_pipe( pipe_slow ); 10098 %} 10099 10100 instruct castMM(kReg dst) 10101 %{ 10102 match(Set dst (CastVV dst)); 10103 10104 size(0); 10105 format %{ "# castVV of $dst" %} 10106 ins_encode(/* empty encoding */); 10107 ins_cost(0); 10108 ins_pipe(empty); 10109 %} 10110 10111 instruct castVV(vec dst) 10112 %{ 10113 match(Set dst (CastVV dst)); 10114 10115 size(0); 10116 format %{ "# castVV of $dst" %} 10117 ins_encode(/* empty encoding */); 10118 ins_cost(0); 10119 ins_pipe(empty); 10120 %} 10121 10122 instruct castVVLeg(legVec dst) 10123 %{ 10124 match(Set dst (CastVV dst)); 10125 10126 size(0); 10127 format %{ "# castVV of $dst" %} 10128 ins_encode(/* empty encoding */); 10129 ins_cost(0); 10130 ins_pipe(empty); 10131 %} 10132 10133 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10134 %{ 10135 match(Set dst (IsInfiniteF src)); 10136 effect(TEMP ktmp, KILL cr); 10137 format %{ "float_class_check $dst, $src" %} 10138 ins_encode %{ 10139 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10140 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10141 %} 10142 ins_pipe(pipe_slow); 10143 %} 10144 10145 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10146 %{ 10147 match(Set dst (IsInfiniteD src)); 10148 effect(TEMP ktmp, KILL cr); 10149 format %{ "double_class_check $dst, $src" %} 10150 ins_encode %{ 10151 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10152 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10153 %} 10154 ins_pipe(pipe_slow); 10155 %} 10156 10157