1 // 2 // Copyright (c) 2011, 2024, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM15 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 // AVX3 Mask Registers. 632 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 633 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 634 635 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 636 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 637 638 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 639 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 640 641 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 642 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 643 644 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 645 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 646 647 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 648 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 649 650 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 651 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 652 653 654 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 655 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 656 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 657 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 658 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 659 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 660 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 661 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 662 #ifdef _LP64 663 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 664 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 665 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 666 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 667 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 668 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 669 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 670 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 671 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 672 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 673 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 674 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 675 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 676 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 677 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 678 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 679 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 680 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 681 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 682 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 683 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 684 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 685 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 686 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 687 #endif 688 ); 689 690 alloc_class chunk2(K7, K7_H, 691 K6, K6_H, 692 K5, K5_H, 693 K4, K4_H, 694 K3, K3_H, 695 K2, K2_H, 696 K1, K1_H); 697 698 reg_class vectmask_reg(K1, K1_H, 699 K2, K2_H, 700 K3, K3_H, 701 K4, K4_H, 702 K5, K5_H, 703 K6, K6_H, 704 K7, K7_H); 705 706 reg_class vectmask_reg_K1(K1, K1_H); 707 reg_class vectmask_reg_K2(K2, K2_H); 708 reg_class vectmask_reg_K3(K3, K3_H); 709 reg_class vectmask_reg_K4(K4, K4_H); 710 reg_class vectmask_reg_K5(K5, K5_H); 711 reg_class vectmask_reg_K6(K6, K6_H); 712 reg_class vectmask_reg_K7(K7, K7_H); 713 714 // flags allocation class should be last. 715 alloc_class chunk3(RFLAGS); 716 717 718 // Singleton class for condition codes 719 reg_class int_flags(RFLAGS); 720 721 // Class for pre evex float registers 722 reg_class float_reg_legacy(XMM0, 723 XMM1, 724 XMM2, 725 XMM3, 726 XMM4, 727 XMM5, 728 XMM6, 729 XMM7 730 #ifdef _LP64 731 ,XMM8, 732 XMM9, 733 XMM10, 734 XMM11, 735 XMM12, 736 XMM13, 737 XMM14, 738 XMM15 739 #endif 740 ); 741 742 // Class for evex float registers 743 reg_class float_reg_evex(XMM0, 744 XMM1, 745 XMM2, 746 XMM3, 747 XMM4, 748 XMM5, 749 XMM6, 750 XMM7 751 #ifdef _LP64 752 ,XMM8, 753 XMM9, 754 XMM10, 755 XMM11, 756 XMM12, 757 XMM13, 758 XMM14, 759 XMM15, 760 XMM16, 761 XMM17, 762 XMM18, 763 XMM19, 764 XMM20, 765 XMM21, 766 XMM22, 767 XMM23, 768 XMM24, 769 XMM25, 770 XMM26, 771 XMM27, 772 XMM28, 773 XMM29, 774 XMM30, 775 XMM31 776 #endif 777 ); 778 779 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 780 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 781 782 // Class for pre evex double registers 783 reg_class double_reg_legacy(XMM0, XMM0b, 784 XMM1, XMM1b, 785 XMM2, XMM2b, 786 XMM3, XMM3b, 787 XMM4, XMM4b, 788 XMM5, XMM5b, 789 XMM6, XMM6b, 790 XMM7, XMM7b 791 #ifdef _LP64 792 ,XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b 800 #endif 801 ); 802 803 // Class for evex double registers 804 reg_class double_reg_evex(XMM0, XMM0b, 805 XMM1, XMM1b, 806 XMM2, XMM2b, 807 XMM3, XMM3b, 808 XMM4, XMM4b, 809 XMM5, XMM5b, 810 XMM6, XMM6b, 811 XMM7, XMM7b 812 #ifdef _LP64 813 ,XMM8, XMM8b, 814 XMM9, XMM9b, 815 XMM10, XMM10b, 816 XMM11, XMM11b, 817 XMM12, XMM12b, 818 XMM13, XMM13b, 819 XMM14, XMM14b, 820 XMM15, XMM15b, 821 XMM16, XMM16b, 822 XMM17, XMM17b, 823 XMM18, XMM18b, 824 XMM19, XMM19b, 825 XMM20, XMM20b, 826 XMM21, XMM21b, 827 XMM22, XMM22b, 828 XMM23, XMM23b, 829 XMM24, XMM24b, 830 XMM25, XMM25b, 831 XMM26, XMM26b, 832 XMM27, XMM27b, 833 XMM28, XMM28b, 834 XMM29, XMM29b, 835 XMM30, XMM30b, 836 XMM31, XMM31b 837 #endif 838 ); 839 840 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 841 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 842 843 // Class for pre evex 32bit vector registers 844 reg_class vectors_reg_legacy(XMM0, 845 XMM1, 846 XMM2, 847 XMM3, 848 XMM4, 849 XMM5, 850 XMM6, 851 XMM7 852 #ifdef _LP64 853 ,XMM8, 854 XMM9, 855 XMM10, 856 XMM11, 857 XMM12, 858 XMM13, 859 XMM14, 860 XMM15 861 #endif 862 ); 863 864 // Class for evex 32bit vector registers 865 reg_class vectors_reg_evex(XMM0, 866 XMM1, 867 XMM2, 868 XMM3, 869 XMM4, 870 XMM5, 871 XMM6, 872 XMM7 873 #ifdef _LP64 874 ,XMM8, 875 XMM9, 876 XMM10, 877 XMM11, 878 XMM12, 879 XMM13, 880 XMM14, 881 XMM15, 882 XMM16, 883 XMM17, 884 XMM18, 885 XMM19, 886 XMM20, 887 XMM21, 888 XMM22, 889 XMM23, 890 XMM24, 891 XMM25, 892 XMM26, 893 XMM27, 894 XMM28, 895 XMM29, 896 XMM30, 897 XMM31 898 #endif 899 ); 900 901 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 902 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 903 904 // Class for all 64bit vector registers 905 reg_class vectord_reg_legacy(XMM0, XMM0b, 906 XMM1, XMM1b, 907 XMM2, XMM2b, 908 XMM3, XMM3b, 909 XMM4, XMM4b, 910 XMM5, XMM5b, 911 XMM6, XMM6b, 912 XMM7, XMM7b 913 #ifdef _LP64 914 ,XMM8, XMM8b, 915 XMM9, XMM9b, 916 XMM10, XMM10b, 917 XMM11, XMM11b, 918 XMM12, XMM12b, 919 XMM13, XMM13b, 920 XMM14, XMM14b, 921 XMM15, XMM15b 922 #endif 923 ); 924 925 // Class for all 64bit vector registers 926 reg_class vectord_reg_evex(XMM0, XMM0b, 927 XMM1, XMM1b, 928 XMM2, XMM2b, 929 XMM3, XMM3b, 930 XMM4, XMM4b, 931 XMM5, XMM5b, 932 XMM6, XMM6b, 933 XMM7, XMM7b 934 #ifdef _LP64 935 ,XMM8, XMM8b, 936 XMM9, XMM9b, 937 XMM10, XMM10b, 938 XMM11, XMM11b, 939 XMM12, XMM12b, 940 XMM13, XMM13b, 941 XMM14, XMM14b, 942 XMM15, XMM15b, 943 XMM16, XMM16b, 944 XMM17, XMM17b, 945 XMM18, XMM18b, 946 XMM19, XMM19b, 947 XMM20, XMM20b, 948 XMM21, XMM21b, 949 XMM22, XMM22b, 950 XMM23, XMM23b, 951 XMM24, XMM24b, 952 XMM25, XMM25b, 953 XMM26, XMM26b, 954 XMM27, XMM27b, 955 XMM28, XMM28b, 956 XMM29, XMM29b, 957 XMM30, XMM30b, 958 XMM31, XMM31b 959 #endif 960 ); 961 962 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 963 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 964 965 // Class for all 128bit vector registers 966 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 967 XMM1, XMM1b, XMM1c, XMM1d, 968 XMM2, XMM2b, XMM2c, XMM2d, 969 XMM3, XMM3b, XMM3c, XMM3d, 970 XMM4, XMM4b, XMM4c, XMM4d, 971 XMM5, XMM5b, XMM5c, XMM5d, 972 XMM6, XMM6b, XMM6c, XMM6d, 973 XMM7, XMM7b, XMM7c, XMM7d 974 #ifdef _LP64 975 ,XMM8, XMM8b, XMM8c, XMM8d, 976 XMM9, XMM9b, XMM9c, XMM9d, 977 XMM10, XMM10b, XMM10c, XMM10d, 978 XMM11, XMM11b, XMM11c, XMM11d, 979 XMM12, XMM12b, XMM12c, XMM12d, 980 XMM13, XMM13b, XMM13c, XMM13d, 981 XMM14, XMM14b, XMM14c, XMM14d, 982 XMM15, XMM15b, XMM15c, XMM15d 983 #endif 984 ); 985 986 // Class for all 128bit vector registers 987 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 988 XMM1, XMM1b, XMM1c, XMM1d, 989 XMM2, XMM2b, XMM2c, XMM2d, 990 XMM3, XMM3b, XMM3c, XMM3d, 991 XMM4, XMM4b, XMM4c, XMM4d, 992 XMM5, XMM5b, XMM5c, XMM5d, 993 XMM6, XMM6b, XMM6c, XMM6d, 994 XMM7, XMM7b, XMM7c, XMM7d 995 #ifdef _LP64 996 ,XMM8, XMM8b, XMM8c, XMM8d, 997 XMM9, XMM9b, XMM9c, XMM9d, 998 XMM10, XMM10b, XMM10c, XMM10d, 999 XMM11, XMM11b, XMM11c, XMM11d, 1000 XMM12, XMM12b, XMM12c, XMM12d, 1001 XMM13, XMM13b, XMM13c, XMM13d, 1002 XMM14, XMM14b, XMM14c, XMM14d, 1003 XMM15, XMM15b, XMM15c, XMM15d, 1004 XMM16, XMM16b, XMM16c, XMM16d, 1005 XMM17, XMM17b, XMM17c, XMM17d, 1006 XMM18, XMM18b, XMM18c, XMM18d, 1007 XMM19, XMM19b, XMM19c, XMM19d, 1008 XMM20, XMM20b, XMM20c, XMM20d, 1009 XMM21, XMM21b, XMM21c, XMM21d, 1010 XMM22, XMM22b, XMM22c, XMM22d, 1011 XMM23, XMM23b, XMM23c, XMM23d, 1012 XMM24, XMM24b, XMM24c, XMM24d, 1013 XMM25, XMM25b, XMM25c, XMM25d, 1014 XMM26, XMM26b, XMM26c, XMM26d, 1015 XMM27, XMM27b, XMM27c, XMM27d, 1016 XMM28, XMM28b, XMM28c, XMM28d, 1017 XMM29, XMM29b, XMM29c, XMM29d, 1018 XMM30, XMM30b, XMM30c, XMM30d, 1019 XMM31, XMM31b, XMM31c, XMM31d 1020 #endif 1021 ); 1022 1023 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1024 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1025 1026 // Class for all 256bit vector registers 1027 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1028 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1029 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1030 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1031 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1032 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1033 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1034 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1035 #ifdef _LP64 1036 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1037 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1038 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1039 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1040 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1041 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1042 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1043 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1044 #endif 1045 ); 1046 1047 // Class for all 256bit vector registers 1048 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1049 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1050 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1051 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1052 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1053 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1054 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1055 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1056 #ifdef _LP64 1057 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1058 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1059 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1060 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1061 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1062 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1063 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1064 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1065 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1066 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1067 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1068 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1069 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1070 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1071 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1072 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1073 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1074 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1075 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1076 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1077 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1078 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1079 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1080 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1081 #endif 1082 ); 1083 1084 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1085 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1086 1087 // Class for all 512bit vector registers 1088 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1089 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1090 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1091 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1092 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1093 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1094 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1095 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1096 #ifdef _LP64 1097 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1098 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1099 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1100 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1101 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1102 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1103 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1104 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1105 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1106 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1107 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1108 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1109 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1110 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1111 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1112 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1113 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1114 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1115 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1116 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1117 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1118 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1119 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1120 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1121 #endif 1122 ); 1123 1124 // Class for restricted 512bit vector registers 1125 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1126 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1127 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1128 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1129 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1130 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1131 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1132 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1133 #ifdef _LP64 1134 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1135 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1136 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1137 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1138 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1139 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1140 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1141 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1142 #endif 1143 ); 1144 1145 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1146 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1147 1148 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1149 %} 1150 1151 1152 //----------SOURCE BLOCK------------------------------------------------------- 1153 // This is a block of C++ code which provides values, functions, and 1154 // definitions necessary in the rest of the architecture description 1155 1156 source_hpp %{ 1157 // Header information of the source block. 1158 // Method declarations/definitions which are used outside 1159 // the ad-scope can conveniently be defined here. 1160 // 1161 // To keep related declarations/definitions/uses close together, 1162 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1163 1164 #include "runtime/vm_version.hpp" 1165 1166 class NativeJump; 1167 1168 class CallStubImpl { 1169 1170 //-------------------------------------------------------------- 1171 //---< Used for optimization in Compile::shorten_branches >--- 1172 //-------------------------------------------------------------- 1173 1174 public: 1175 // Size of call trampoline stub. 1176 static uint size_call_trampoline() { 1177 return 0; // no call trampolines on this platform 1178 } 1179 1180 // number of relocations needed by a call trampoline stub 1181 static uint reloc_call_trampoline() { 1182 return 0; // no call trampolines on this platform 1183 } 1184 }; 1185 1186 class HandlerImpl { 1187 1188 public: 1189 1190 static int emit_exception_handler(C2_MacroAssembler *masm); 1191 static int emit_deopt_handler(C2_MacroAssembler* masm); 1192 1193 static uint size_exception_handler() { 1194 // NativeCall instruction size is the same as NativeJump. 1195 // exception handler starts out as jump and can be patched to 1196 // a call be deoptimization. (4932387) 1197 // Note that this value is also credited (in output.cpp) to 1198 // the size of the code section. 1199 return NativeJump::instruction_size; 1200 } 1201 1202 #ifdef _LP64 1203 static uint size_deopt_handler() { 1204 // three 5 byte instructions plus one move for unreachable address. 1205 return 15+3; 1206 } 1207 #else 1208 static uint size_deopt_handler() { 1209 // NativeCall instruction size is the same as NativeJump. 1210 // exception handler starts out as jump and can be patched to 1211 // a call be deoptimization. (4932387) 1212 // Note that this value is also credited (in output.cpp) to 1213 // the size of the code section. 1214 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1215 } 1216 #endif 1217 }; 1218 1219 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1220 switch(bytes) { 1221 case 4: // fall-through 1222 case 8: // fall-through 1223 case 16: return Assembler::AVX_128bit; 1224 case 32: return Assembler::AVX_256bit; 1225 case 64: return Assembler::AVX_512bit; 1226 1227 default: { 1228 ShouldNotReachHere(); 1229 return Assembler::AVX_NoVec; 1230 } 1231 } 1232 } 1233 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1236 } 1237 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1239 uint def_idx = use->operand_index(opnd); 1240 Node* def = use->in(def_idx); 1241 return vector_length_encoding(def); 1242 } 1243 1244 static inline bool is_vector_popcount_predicate(BasicType bt) { 1245 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1246 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1247 } 1248 1249 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1250 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1251 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1252 } 1253 1254 class Node::PD { 1255 public: 1256 enum NodeFlags { 1257 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1258 Flag_sets_carry_flag = Node::_last_flag << 2, 1259 Flag_sets_parity_flag = Node::_last_flag << 3, 1260 Flag_sets_zero_flag = Node::_last_flag << 4, 1261 Flag_sets_overflow_flag = Node::_last_flag << 5, 1262 Flag_sets_sign_flag = Node::_last_flag << 6, 1263 Flag_clears_carry_flag = Node::_last_flag << 7, 1264 Flag_clears_parity_flag = Node::_last_flag << 8, 1265 Flag_clears_zero_flag = Node::_last_flag << 9, 1266 Flag_clears_overflow_flag = Node::_last_flag << 10, 1267 Flag_clears_sign_flag = Node::_last_flag << 11, 1268 _last_flag = Flag_clears_sign_flag 1269 }; 1270 }; 1271 1272 %} // end source_hpp 1273 1274 source %{ 1275 1276 #include "opto/addnode.hpp" 1277 #include "c2_intelJccErratum_x86.hpp" 1278 1279 void PhaseOutput::pd_perform_mach_node_analysis() { 1280 if (VM_Version::has_intel_jcc_erratum()) { 1281 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1282 _buf_sizes._code += extra_padding; 1283 } 1284 } 1285 1286 int MachNode::pd_alignment_required() const { 1287 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1288 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1289 return IntelJccErratum::largest_jcc_size() + 1; 1290 } else { 1291 return 1; 1292 } 1293 } 1294 1295 int MachNode::compute_padding(int current_offset) const { 1296 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1297 Compile* C = Compile::current(); 1298 PhaseOutput* output = C->output(); 1299 Block* block = output->block(); 1300 int index = output->index(); 1301 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1302 } else { 1303 return 0; 1304 } 1305 } 1306 1307 // Emit exception handler code. 1308 // Stuff framesize into a register and call a VM stub routine. 1309 int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm) { 1310 1311 // Note that the code buffer's insts_mark is always relative to insts. 1312 // That's why we must use the macroassembler to generate a handler. 1313 address base = __ start_a_stub(size_exception_handler()); 1314 if (base == nullptr) { 1315 ciEnv::current()->record_failure("CodeCache is full"); 1316 return 0; // CodeBuffer::expand failed 1317 } 1318 int offset = __ offset(); 1319 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1320 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1321 __ end_a_stub(); 1322 return offset; 1323 } 1324 1325 // Emit deopt handler code. 1326 int HandlerImpl::emit_deopt_handler(C2_MacroAssembler* masm) { 1327 1328 // Note that the code buffer's insts_mark is always relative to insts. 1329 // That's why we must use the macroassembler to generate a handler. 1330 address base = __ start_a_stub(size_deopt_handler()); 1331 if (base == nullptr) { 1332 ciEnv::current()->record_failure("CodeCache is full"); 1333 return 0; // CodeBuffer::expand failed 1334 } 1335 int offset = __ offset(); 1336 1337 #ifdef _LP64 1338 address the_pc = (address) __ pc(); 1339 Label next; 1340 // push a "the_pc" on the stack without destroying any registers 1341 // as they all may be live. 1342 1343 // push address of "next" 1344 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1345 __ bind(next); 1346 // adjust it so it matches "the_pc" 1347 __ subptr(Address(rsp, 0), __ offset() - offset); 1348 #else 1349 InternalAddress here(__ pc()); 1350 __ pushptr(here.addr(), noreg); 1351 #endif 1352 1353 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1354 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1355 __ end_a_stub(); 1356 return offset; 1357 } 1358 1359 static Assembler::Width widthForType(BasicType bt) { 1360 if (bt == T_BYTE) { 1361 return Assembler::B; 1362 } else if (bt == T_SHORT) { 1363 return Assembler::W; 1364 } else if (bt == T_INT) { 1365 return Assembler::D; 1366 } else { 1367 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1368 return Assembler::Q; 1369 } 1370 } 1371 1372 //============================================================================= 1373 1374 // Float masks come from different places depending on platform. 1375 #ifdef _LP64 1376 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1377 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1378 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1379 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1380 #else 1381 static address float_signmask() { return (address)float_signmask_pool; } 1382 static address float_signflip() { return (address)float_signflip_pool; } 1383 static address double_signmask() { return (address)double_signmask_pool; } 1384 static address double_signflip() { return (address)double_signflip_pool; } 1385 #endif 1386 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1387 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1388 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1389 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1390 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1391 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1392 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1393 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1394 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1395 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1396 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1397 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1398 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1399 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1400 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1401 1402 //============================================================================= 1403 bool Matcher::match_rule_supported(int opcode) { 1404 if (!has_match_rule(opcode)) { 1405 return false; // no match rule present 1406 } 1407 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1408 switch (opcode) { 1409 case Op_AbsVL: 1410 case Op_StoreVectorScatter: 1411 if (UseAVX < 3) { 1412 return false; 1413 } 1414 break; 1415 case Op_PopCountI: 1416 case Op_PopCountL: 1417 if (!UsePopCountInstruction) { 1418 return false; 1419 } 1420 break; 1421 case Op_PopCountVI: 1422 if (UseAVX < 2) { 1423 return false; 1424 } 1425 break; 1426 case Op_CompressV: 1427 case Op_ExpandV: 1428 case Op_PopCountVL: 1429 if (UseAVX < 2) { 1430 return false; 1431 } 1432 break; 1433 case Op_MulVI: 1434 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1435 return false; 1436 } 1437 break; 1438 case Op_MulVL: 1439 if (UseSSE < 4) { // only with SSE4_1 or AVX 1440 return false; 1441 } 1442 break; 1443 case Op_MulReductionVL: 1444 if (VM_Version::supports_avx512dq() == false) { 1445 return false; 1446 } 1447 break; 1448 case Op_AddReductionVL: 1449 if (UseSSE < 2) { // requires at least SSE2 1450 return false; 1451 } 1452 break; 1453 case Op_AbsVB: 1454 case Op_AbsVS: 1455 case Op_AbsVI: 1456 case Op_AddReductionVI: 1457 case Op_AndReductionV: 1458 case Op_OrReductionV: 1459 case Op_XorReductionV: 1460 if (UseSSE < 3) { // requires at least SSSE3 1461 return false; 1462 } 1463 break; 1464 case Op_VectorLoadShuffle: 1465 case Op_VectorRearrange: 1466 case Op_MulReductionVI: 1467 if (UseSSE < 4) { // requires at least SSE4 1468 return false; 1469 } 1470 break; 1471 case Op_IsInfiniteF: 1472 case Op_IsInfiniteD: 1473 if (!VM_Version::supports_avx512dq()) { 1474 return false; 1475 } 1476 break; 1477 case Op_SqrtVD: 1478 case Op_SqrtVF: 1479 case Op_VectorMaskCmp: 1480 case Op_VectorCastB2X: 1481 case Op_VectorCastS2X: 1482 case Op_VectorCastI2X: 1483 case Op_VectorCastL2X: 1484 case Op_VectorCastF2X: 1485 case Op_VectorCastD2X: 1486 case Op_VectorUCastB2X: 1487 case Op_VectorUCastS2X: 1488 case Op_VectorUCastI2X: 1489 case Op_VectorMaskCast: 1490 if (UseAVX < 1) { // enabled for AVX only 1491 return false; 1492 } 1493 break; 1494 case Op_PopulateIndex: 1495 if (!is_LP64 || (UseAVX < 2)) { 1496 return false; 1497 } 1498 break; 1499 case Op_RoundVF: 1500 if (UseAVX < 2) { // enabled for AVX2 only 1501 return false; 1502 } 1503 break; 1504 case Op_RoundVD: 1505 if (UseAVX < 3) { 1506 return false; // enabled for AVX3 only 1507 } 1508 break; 1509 case Op_CompareAndSwapL: 1510 #ifdef _LP64 1511 case Op_CompareAndSwapP: 1512 #endif 1513 break; 1514 case Op_StrIndexOf: 1515 if (!UseSSE42Intrinsics) { 1516 return false; 1517 } 1518 break; 1519 case Op_StrIndexOfChar: 1520 if (!UseSSE42Intrinsics) { 1521 return false; 1522 } 1523 break; 1524 case Op_OnSpinWait: 1525 if (VM_Version::supports_on_spin_wait() == false) { 1526 return false; 1527 } 1528 break; 1529 case Op_MulVB: 1530 case Op_LShiftVB: 1531 case Op_RShiftVB: 1532 case Op_URShiftVB: 1533 case Op_VectorInsert: 1534 case Op_VectorLoadMask: 1535 case Op_VectorStoreMask: 1536 case Op_VectorBlend: 1537 if (UseSSE < 4) { 1538 return false; 1539 } 1540 break; 1541 #ifdef _LP64 1542 case Op_MaxD: 1543 case Op_MaxF: 1544 case Op_MinD: 1545 case Op_MinF: 1546 if (UseAVX < 1) { // enabled for AVX only 1547 return false; 1548 } 1549 break; 1550 #endif 1551 case Op_CacheWB: 1552 case Op_CacheWBPreSync: 1553 case Op_CacheWBPostSync: 1554 if (!VM_Version::supports_data_cache_line_flush()) { 1555 return false; 1556 } 1557 break; 1558 case Op_ExtractB: 1559 case Op_ExtractL: 1560 case Op_ExtractI: 1561 case Op_RoundDoubleMode: 1562 if (UseSSE < 4) { 1563 return false; 1564 } 1565 break; 1566 case Op_RoundDoubleModeV: 1567 if (VM_Version::supports_avx() == false) { 1568 return false; // 128bit vroundpd is not available 1569 } 1570 break; 1571 case Op_LoadVectorGather: 1572 case Op_LoadVectorGatherMasked: 1573 if (UseAVX < 2) { 1574 return false; 1575 } 1576 break; 1577 case Op_FmaF: 1578 case Op_FmaD: 1579 case Op_FmaVD: 1580 case Op_FmaVF: 1581 if (!UseFMA) { 1582 return false; 1583 } 1584 break; 1585 case Op_MacroLogicV: 1586 if (UseAVX < 3 || !UseVectorMacroLogic) { 1587 return false; 1588 } 1589 break; 1590 1591 case Op_VectorCmpMasked: 1592 case Op_VectorMaskGen: 1593 if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { 1594 return false; 1595 } 1596 break; 1597 case Op_VectorMaskFirstTrue: 1598 case Op_VectorMaskLastTrue: 1599 case Op_VectorMaskTrueCount: 1600 case Op_VectorMaskToLong: 1601 if (!is_LP64 || UseAVX < 1) { 1602 return false; 1603 } 1604 break; 1605 case Op_RoundF: 1606 case Op_RoundD: 1607 if (!is_LP64) { 1608 return false; 1609 } 1610 break; 1611 case Op_CopySignD: 1612 case Op_CopySignF: 1613 if (UseAVX < 3 || !is_LP64) { 1614 return false; 1615 } 1616 if (!VM_Version::supports_avx512vl()) { 1617 return false; 1618 } 1619 break; 1620 #ifndef _LP64 1621 case Op_AddReductionVF: 1622 case Op_AddReductionVD: 1623 case Op_MulReductionVF: 1624 case Op_MulReductionVD: 1625 if (UseSSE < 1) { // requires at least SSE 1626 return false; 1627 } 1628 break; 1629 case Op_MulAddVS2VI: 1630 case Op_RShiftVL: 1631 case Op_AbsVD: 1632 case Op_NegVD: 1633 if (UseSSE < 2) { 1634 return false; 1635 } 1636 break; 1637 #endif // !LP64 1638 case Op_CompressBits: 1639 if (!VM_Version::supports_bmi2() || (!is_LP64 && UseSSE < 2)) { 1640 return false; 1641 } 1642 break; 1643 case Op_ExpandBits: 1644 if (!VM_Version::supports_bmi2() || (!is_LP64 && (UseSSE < 2 || !VM_Version::supports_bmi1()))) { 1645 return false; 1646 } 1647 break; 1648 case Op_SignumF: 1649 if (UseSSE < 1) { 1650 return false; 1651 } 1652 break; 1653 case Op_SignumD: 1654 if (UseSSE < 2) { 1655 return false; 1656 } 1657 break; 1658 case Op_CompressM: 1659 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1660 return false; 1661 } 1662 break; 1663 case Op_SqrtF: 1664 if (UseSSE < 1) { 1665 return false; 1666 } 1667 break; 1668 case Op_SqrtD: 1669 #ifdef _LP64 1670 if (UseSSE < 2) { 1671 return false; 1672 } 1673 #else 1674 // x86_32.ad has a special match rule for SqrtD. 1675 // Together with common x86 rules, this handles all UseSSE cases. 1676 #endif 1677 break; 1678 case Op_ConvF2HF: 1679 case Op_ConvHF2F: 1680 if (!VM_Version::supports_float16()) { 1681 return false; 1682 } 1683 break; 1684 case Op_VectorCastF2HF: 1685 case Op_VectorCastHF2F: 1686 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1687 return false; 1688 } 1689 break; 1690 } 1691 return true; // Match rules are supported by default. 1692 } 1693 1694 //------------------------------------------------------------------------ 1695 1696 static inline bool is_pop_count_instr_target(BasicType bt) { 1697 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1698 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1699 } 1700 1701 bool Matcher::match_rule_supported_auto_vectorization(int opcode, int vlen, BasicType bt) { 1702 return match_rule_supported_vector(opcode, vlen, bt); 1703 } 1704 1705 // Identify extra cases that we might want to provide match rules for vector nodes and 1706 // other intrinsics guarded with vector length (vlen) and element type (bt). 1707 bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1708 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1709 if (!match_rule_supported(opcode)) { 1710 return false; 1711 } 1712 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1713 // * SSE2 supports 128bit vectors for all types; 1714 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1715 // * AVX2 supports 256bit vectors for all types; 1716 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1717 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1718 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1719 // And MaxVectorSize is taken into account as well. 1720 if (!vector_size_supported(bt, vlen)) { 1721 return false; 1722 } 1723 // Special cases which require vector length follow: 1724 // * implementation limitations 1725 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1726 // * 128bit vroundpd instruction is present only in AVX1 1727 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1728 switch (opcode) { 1729 case Op_AbsVF: 1730 case Op_NegVF: 1731 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1732 return false; // 512bit vandps and vxorps are not available 1733 } 1734 break; 1735 case Op_AbsVD: 1736 case Op_NegVD: 1737 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1738 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1739 } 1740 break; 1741 case Op_RotateRightV: 1742 case Op_RotateLeftV: 1743 if (bt != T_INT && bt != T_LONG) { 1744 return false; 1745 } // fallthrough 1746 case Op_MacroLogicV: 1747 if (!VM_Version::supports_evex() || 1748 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1749 return false; 1750 } 1751 break; 1752 case Op_ClearArray: 1753 case Op_VectorMaskGen: 1754 case Op_VectorCmpMasked: 1755 if (!is_LP64 || !VM_Version::supports_avx512bw()) { 1756 return false; 1757 } 1758 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1759 return false; 1760 } 1761 break; 1762 case Op_LoadVectorMasked: 1763 case Op_StoreVectorMasked: 1764 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1765 return false; 1766 } 1767 break; 1768 case Op_MaxV: 1769 case Op_MinV: 1770 if (UseSSE < 4 && is_integral_type(bt)) { 1771 return false; 1772 } 1773 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1774 // Float/Double intrinsics are enabled for AVX family currently. 1775 if (UseAVX == 0) { 1776 return false; 1777 } 1778 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1779 return false; 1780 } 1781 } 1782 break; 1783 case Op_CallLeafVector: 1784 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1785 return false; 1786 } 1787 break; 1788 case Op_AddReductionVI: 1789 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1790 return false; 1791 } 1792 // fallthrough 1793 case Op_AndReductionV: 1794 case Op_OrReductionV: 1795 case Op_XorReductionV: 1796 if (is_subword_type(bt) && (UseSSE < 4)) { 1797 return false; 1798 } 1799 #ifndef _LP64 1800 if (bt == T_BYTE || bt == T_LONG) { 1801 return false; 1802 } 1803 #endif 1804 break; 1805 #ifndef _LP64 1806 case Op_VectorInsert: 1807 if (bt == T_LONG || bt == T_DOUBLE) { 1808 return false; 1809 } 1810 break; 1811 #endif 1812 case Op_MinReductionV: 1813 case Op_MaxReductionV: 1814 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1815 return false; 1816 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1817 return false; 1818 } 1819 // Float/Double intrinsics enabled for AVX family. 1820 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1821 return false; 1822 } 1823 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1824 return false; 1825 } 1826 #ifndef _LP64 1827 if (bt == T_BYTE || bt == T_LONG) { 1828 return false; 1829 } 1830 #endif 1831 break; 1832 case Op_VectorTest: 1833 if (UseSSE < 4) { 1834 return false; // Implementation limitation 1835 } else if (size_in_bits < 32) { 1836 return false; // Implementation limitation 1837 } 1838 break; 1839 case Op_VectorLoadShuffle: 1840 case Op_VectorRearrange: 1841 if(vlen == 2) { 1842 return false; // Implementation limitation due to how shuffle is loaded 1843 } else if (size_in_bits == 256 && UseAVX < 2) { 1844 return false; // Implementation limitation 1845 } 1846 break; 1847 case Op_VectorLoadMask: 1848 case Op_VectorMaskCast: 1849 if (size_in_bits == 256 && UseAVX < 2) { 1850 return false; // Implementation limitation 1851 } 1852 // fallthrough 1853 case Op_VectorStoreMask: 1854 if (vlen == 2) { 1855 return false; // Implementation limitation 1856 } 1857 break; 1858 case Op_PopulateIndex: 1859 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1860 return false; 1861 } 1862 break; 1863 case Op_VectorCastB2X: 1864 case Op_VectorCastS2X: 1865 case Op_VectorCastI2X: 1866 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1867 return false; 1868 } 1869 break; 1870 case Op_VectorCastL2X: 1871 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1872 return false; 1873 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1874 return false; 1875 } 1876 break; 1877 case Op_VectorCastF2X: { 1878 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1879 // happen after intermediate conversion to integer and special handling 1880 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1881 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1882 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1883 return false; 1884 } 1885 } 1886 // fallthrough 1887 case Op_VectorCastD2X: 1888 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1889 return false; 1890 } 1891 break; 1892 case Op_VectorCastF2HF: 1893 case Op_VectorCastHF2F: 1894 if (!VM_Version::supports_f16c() && 1895 ((!VM_Version::supports_evex() || 1896 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1897 return false; 1898 } 1899 break; 1900 case Op_RoundVD: 1901 if (!VM_Version::supports_avx512dq()) { 1902 return false; 1903 } 1904 break; 1905 case Op_MulReductionVI: 1906 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1907 return false; 1908 } 1909 break; 1910 case Op_LoadVectorGatherMasked: 1911 if (!is_subword_type(bt) && size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1912 return false; 1913 } 1914 if (is_subword_type(bt) && 1915 (!is_LP64 || 1916 (size_in_bits > 256 && !VM_Version::supports_avx512bw()) || 1917 (size_in_bits < 64) || 1918 (bt == T_SHORT && !VM_Version::supports_bmi2()))) { 1919 return false; 1920 } 1921 break; 1922 case Op_StoreVectorScatterMasked: 1923 case Op_StoreVectorScatter: 1924 if (is_subword_type(bt)) { 1925 return false; 1926 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1927 return false; 1928 } 1929 // fallthrough 1930 case Op_LoadVectorGather: 1931 if (!is_subword_type(bt) && size_in_bits == 64) { 1932 return false; 1933 } 1934 if (is_subword_type(bt) && size_in_bits < 64) { 1935 return false; 1936 } 1937 break; 1938 case Op_MaskAll: 1939 if (!VM_Version::supports_evex()) { 1940 return false; 1941 } 1942 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1943 return false; 1944 } 1945 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1946 return false; 1947 } 1948 break; 1949 case Op_VectorMaskCmp: 1950 if (vlen < 2 || size_in_bits < 32) { 1951 return false; 1952 } 1953 break; 1954 case Op_CompressM: 1955 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1956 return false; 1957 } 1958 break; 1959 case Op_CompressV: 1960 case Op_ExpandV: 1961 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1962 return false; 1963 } 1964 if (!is_LP64 && !VM_Version::supports_avx512vl() && size_in_bits < 512) { 1965 return false; 1966 } 1967 if (size_in_bits < 128 ) { 1968 return false; 1969 } 1970 case Op_VectorLongToMask: 1971 if (UseAVX < 1 || !is_LP64) { 1972 return false; 1973 } 1974 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1975 return false; 1976 } 1977 break; 1978 case Op_SignumVD: 1979 case Op_SignumVF: 1980 if (UseAVX < 1) { 1981 return false; 1982 } 1983 break; 1984 case Op_PopCountVI: 1985 case Op_PopCountVL: { 1986 if (!is_pop_count_instr_target(bt) && 1987 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 1988 return false; 1989 } 1990 } 1991 break; 1992 case Op_ReverseV: 1993 case Op_ReverseBytesV: 1994 if (UseAVX < 2) { 1995 return false; 1996 } 1997 break; 1998 case Op_CountTrailingZerosV: 1999 case Op_CountLeadingZerosV: 2000 if (UseAVX < 2) { 2001 return false; 2002 } 2003 break; 2004 } 2005 return true; // Per default match rules are supported. 2006 } 2007 2008 bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 2009 // ADLC based match_rule_supported routine checks for the existence of pattern based 2010 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 2011 // of their non-masked counterpart with mask edge being the differentiator. 2012 // This routine does a strict check on the existence of masked operation patterns 2013 // by returning a default false value for all the other opcodes apart from the 2014 // ones whose masked instruction patterns are defined in this file. 2015 if (!match_rule_supported_vector(opcode, vlen, bt)) { 2016 return false; 2017 } 2018 2019 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 2020 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 2021 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 2022 return false; 2023 } 2024 switch(opcode) { 2025 // Unary masked operations 2026 case Op_AbsVB: 2027 case Op_AbsVS: 2028 if(!VM_Version::supports_avx512bw()) { 2029 return false; // Implementation limitation 2030 } 2031 case Op_AbsVI: 2032 case Op_AbsVL: 2033 return true; 2034 2035 // Ternary masked operations 2036 case Op_FmaVF: 2037 case Op_FmaVD: 2038 return true; 2039 2040 case Op_MacroLogicV: 2041 if(bt != T_INT && bt != T_LONG) { 2042 return false; 2043 } 2044 return true; 2045 2046 // Binary masked operations 2047 case Op_AddVB: 2048 case Op_AddVS: 2049 case Op_SubVB: 2050 case Op_SubVS: 2051 case Op_MulVS: 2052 case Op_LShiftVS: 2053 case Op_RShiftVS: 2054 case Op_URShiftVS: 2055 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2056 if (!VM_Version::supports_avx512bw()) { 2057 return false; // Implementation limitation 2058 } 2059 return true; 2060 2061 case Op_MulVL: 2062 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2063 if (!VM_Version::supports_avx512dq()) { 2064 return false; // Implementation limitation 2065 } 2066 return true; 2067 2068 case Op_AndV: 2069 case Op_OrV: 2070 case Op_XorV: 2071 case Op_RotateRightV: 2072 case Op_RotateLeftV: 2073 if (bt != T_INT && bt != T_LONG) { 2074 return false; // Implementation limitation 2075 } 2076 return true; 2077 2078 case Op_VectorLoadMask: 2079 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2080 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2081 return false; 2082 } 2083 return true; 2084 2085 case Op_AddVI: 2086 case Op_AddVL: 2087 case Op_AddVF: 2088 case Op_AddVD: 2089 case Op_SubVI: 2090 case Op_SubVL: 2091 case Op_SubVF: 2092 case Op_SubVD: 2093 case Op_MulVI: 2094 case Op_MulVF: 2095 case Op_MulVD: 2096 case Op_DivVF: 2097 case Op_DivVD: 2098 case Op_SqrtVF: 2099 case Op_SqrtVD: 2100 case Op_LShiftVI: 2101 case Op_LShiftVL: 2102 case Op_RShiftVI: 2103 case Op_RShiftVL: 2104 case Op_URShiftVI: 2105 case Op_URShiftVL: 2106 case Op_LoadVectorMasked: 2107 case Op_StoreVectorMasked: 2108 case Op_LoadVectorGatherMasked: 2109 case Op_StoreVectorScatterMasked: 2110 return true; 2111 2112 case Op_MaxV: 2113 case Op_MinV: 2114 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2115 return false; // Implementation limitation 2116 } 2117 if (is_floating_point_type(bt)) { 2118 return false; // Implementation limitation 2119 } 2120 return true; 2121 2122 case Op_VectorMaskCmp: 2123 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2124 return false; // Implementation limitation 2125 } 2126 return true; 2127 2128 case Op_VectorRearrange: 2129 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2130 return false; // Implementation limitation 2131 } 2132 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2133 return false; // Implementation limitation 2134 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2135 return false; // Implementation limitation 2136 } 2137 return true; 2138 2139 // Binary Logical operations 2140 case Op_AndVMask: 2141 case Op_OrVMask: 2142 case Op_XorVMask: 2143 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2144 return false; // Implementation limitation 2145 } 2146 return true; 2147 2148 case Op_PopCountVI: 2149 case Op_PopCountVL: 2150 if (!is_pop_count_instr_target(bt)) { 2151 return false; 2152 } 2153 return true; 2154 2155 case Op_MaskAll: 2156 return true; 2157 2158 case Op_CountLeadingZerosV: 2159 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2160 return true; 2161 } 2162 default: 2163 return false; 2164 } 2165 } 2166 2167 bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2168 return false; 2169 } 2170 2171 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2172 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2173 bool legacy = (generic_opnd->opcode() == LEGVEC); 2174 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2175 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2176 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2177 return new legVecZOper(); 2178 } 2179 if (legacy) { 2180 switch (ideal_reg) { 2181 case Op_VecS: return new legVecSOper(); 2182 case Op_VecD: return new legVecDOper(); 2183 case Op_VecX: return new legVecXOper(); 2184 case Op_VecY: return new legVecYOper(); 2185 case Op_VecZ: return new legVecZOper(); 2186 } 2187 } else { 2188 switch (ideal_reg) { 2189 case Op_VecS: return new vecSOper(); 2190 case Op_VecD: return new vecDOper(); 2191 case Op_VecX: return new vecXOper(); 2192 case Op_VecY: return new vecYOper(); 2193 case Op_VecZ: return new vecZOper(); 2194 } 2195 } 2196 ShouldNotReachHere(); 2197 return nullptr; 2198 } 2199 2200 bool Matcher::is_reg2reg_move(MachNode* m) { 2201 switch (m->rule()) { 2202 case MoveVec2Leg_rule: 2203 case MoveLeg2Vec_rule: 2204 case MoveF2VL_rule: 2205 case MoveF2LEG_rule: 2206 case MoveVL2F_rule: 2207 case MoveLEG2F_rule: 2208 case MoveD2VL_rule: 2209 case MoveD2LEG_rule: 2210 case MoveVL2D_rule: 2211 case MoveLEG2D_rule: 2212 return true; 2213 default: 2214 return false; 2215 } 2216 } 2217 2218 bool Matcher::is_generic_vector(MachOper* opnd) { 2219 switch (opnd->opcode()) { 2220 case VEC: 2221 case LEGVEC: 2222 return true; 2223 default: 2224 return false; 2225 } 2226 } 2227 2228 //------------------------------------------------------------------------ 2229 2230 const RegMask* Matcher::predicate_reg_mask(void) { 2231 return &_VECTMASK_REG_mask; 2232 } 2233 2234 const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) { 2235 return new TypeVectMask(elemTy, length); 2236 } 2237 2238 // Max vector size in bytes. 0 if not supported. 2239 int Matcher::vector_width_in_bytes(BasicType bt) { 2240 assert(is_java_primitive(bt), "only primitive type vectors"); 2241 if (UseSSE < 2) return 0; 2242 // SSE2 supports 128bit vectors for all types. 2243 // AVX2 supports 256bit vectors for all types. 2244 // AVX2/EVEX supports 512bit vectors for all types. 2245 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2246 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2247 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2248 size = (UseAVX > 2) ? 64 : 32; 2249 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2250 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2251 // Use flag to limit vector size. 2252 size = MIN2(size,(int)MaxVectorSize); 2253 // Minimum 2 values in vector (or 4 for bytes). 2254 switch (bt) { 2255 case T_DOUBLE: 2256 case T_LONG: 2257 if (size < 16) return 0; 2258 break; 2259 case T_FLOAT: 2260 case T_INT: 2261 if (size < 8) return 0; 2262 break; 2263 case T_BOOLEAN: 2264 if (size < 4) return 0; 2265 break; 2266 case T_CHAR: 2267 if (size < 4) return 0; 2268 break; 2269 case T_BYTE: 2270 if (size < 4) return 0; 2271 break; 2272 case T_SHORT: 2273 if (size < 4) return 0; 2274 break; 2275 default: 2276 ShouldNotReachHere(); 2277 } 2278 return size; 2279 } 2280 2281 // Limits on vector size (number of elements) loaded into vector. 2282 int Matcher::max_vector_size(const BasicType bt) { 2283 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2284 } 2285 int Matcher::min_vector_size(const BasicType bt) { 2286 int max_size = max_vector_size(bt); 2287 // Min size which can be loaded into vector is 4 bytes. 2288 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2289 // Support for calling svml double64 vectors 2290 if (bt == T_DOUBLE) { 2291 size = 1; 2292 } 2293 return MIN2(size,max_size); 2294 } 2295 2296 int Matcher::max_vector_size_auto_vectorization(const BasicType bt) { 2297 // Limit the max vector size for auto vectorization to 256 bits (32 bytes) 2298 // by default on Cascade Lake 2299 if (VM_Version::is_default_intel_cascade_lake()) { 2300 return MIN2(Matcher::max_vector_size(bt), 32 / type2aelembytes(bt)); 2301 } 2302 return Matcher::max_vector_size(bt); 2303 } 2304 2305 int Matcher::scalable_vector_reg_size(const BasicType bt) { 2306 return -1; 2307 } 2308 2309 // Vector ideal reg corresponding to specified size in bytes 2310 uint Matcher::vector_ideal_reg(int size) { 2311 assert(MaxVectorSize >= size, ""); 2312 switch(size) { 2313 case 4: return Op_VecS; 2314 case 8: return Op_VecD; 2315 case 16: return Op_VecX; 2316 case 32: return Op_VecY; 2317 case 64: return Op_VecZ; 2318 } 2319 ShouldNotReachHere(); 2320 return 0; 2321 } 2322 2323 // Check for shift by small constant as well 2324 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2325 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2326 shift->in(2)->get_int() <= 3 && 2327 // Are there other uses besides address expressions? 2328 !matcher->is_visited(shift)) { 2329 address_visited.set(shift->_idx); // Flag as address_visited 2330 mstack.push(shift->in(2), Matcher::Visit); 2331 Node *conv = shift->in(1); 2332 #ifdef _LP64 2333 // Allow Matcher to match the rule which bypass 2334 // ConvI2L operation for an array index on LP64 2335 // if the index value is positive. 2336 if (conv->Opcode() == Op_ConvI2L && 2337 conv->as_Type()->type()->is_long()->_lo >= 0 && 2338 // Are there other uses besides address expressions? 2339 !matcher->is_visited(conv)) { 2340 address_visited.set(conv->_idx); // Flag as address_visited 2341 mstack.push(conv->in(1), Matcher::Pre_Visit); 2342 } else 2343 #endif 2344 mstack.push(conv, Matcher::Pre_Visit); 2345 return true; 2346 } 2347 return false; 2348 } 2349 2350 // This function identifies sub-graphs in which a 'load' node is 2351 // input to two different nodes, and such that it can be matched 2352 // with BMI instructions like blsi, blsr, etc. 2353 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2354 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2355 // refers to the same node. 2356 // 2357 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2358 // This is a temporary solution until we make DAGs expressible in ADL. 2359 template<typename ConType> 2360 class FusedPatternMatcher { 2361 Node* _op1_node; 2362 Node* _mop_node; 2363 int _con_op; 2364 2365 static int match_next(Node* n, int next_op, int next_op_idx) { 2366 if (n->in(1) == nullptr || n->in(2) == nullptr) { 2367 return -1; 2368 } 2369 2370 if (next_op_idx == -1) { // n is commutative, try rotations 2371 if (n->in(1)->Opcode() == next_op) { 2372 return 1; 2373 } else if (n->in(2)->Opcode() == next_op) { 2374 return 2; 2375 } 2376 } else { 2377 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2378 if (n->in(next_op_idx)->Opcode() == next_op) { 2379 return next_op_idx; 2380 } 2381 } 2382 return -1; 2383 } 2384 2385 public: 2386 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2387 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2388 2389 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2390 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2391 typename ConType::NativeType con_value) { 2392 if (_op1_node->Opcode() != op1) { 2393 return false; 2394 } 2395 if (_mop_node->outcnt() > 2) { 2396 return false; 2397 } 2398 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2399 if (op1_op2_idx == -1) { 2400 return false; 2401 } 2402 // Memory operation must be the other edge 2403 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2404 2405 // Check that the mop node is really what we want 2406 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2407 Node* op2_node = _op1_node->in(op1_op2_idx); 2408 if (op2_node->outcnt() > 1) { 2409 return false; 2410 } 2411 assert(op2_node->Opcode() == op2, "Should be"); 2412 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2413 if (op2_con_idx == -1) { 2414 return false; 2415 } 2416 // Memory operation must be the other edge 2417 int op2_mop_idx = (op2_con_idx & 1) + 1; 2418 // Check that the memory operation is the same node 2419 if (op2_node->in(op2_mop_idx) == _mop_node) { 2420 // Now check the constant 2421 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2422 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2423 return true; 2424 } 2425 } 2426 } 2427 return false; 2428 } 2429 }; 2430 2431 static bool is_bmi_pattern(Node* n, Node* m) { 2432 assert(UseBMI1Instructions, "sanity"); 2433 if (n != nullptr && m != nullptr) { 2434 if (m->Opcode() == Op_LoadI) { 2435 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2436 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2437 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2438 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2439 } else if (m->Opcode() == Op_LoadL) { 2440 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2441 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2442 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2443 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2444 } 2445 } 2446 return false; 2447 } 2448 2449 // Should the matcher clone input 'm' of node 'n'? 2450 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2451 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2452 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2453 mstack.push(m, Visit); 2454 return true; 2455 } 2456 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2457 mstack.push(m, Visit); // m = ShiftCntV 2458 return true; 2459 } 2460 return false; 2461 } 2462 2463 // Should the Matcher clone shifts on addressing modes, expecting them 2464 // to be subsumed into complex addressing expressions or compute them 2465 // into registers? 2466 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2467 Node *off = m->in(AddPNode::Offset); 2468 if (off->is_Con()) { 2469 address_visited.test_set(m->_idx); // Flag as address_visited 2470 Node *adr = m->in(AddPNode::Address); 2471 2472 // Intel can handle 2 adds in addressing mode 2473 // AtomicAdd is not an addressing expression. 2474 // Cheap to find it by looking for screwy base. 2475 if (adr->is_AddP() && 2476 !adr->in(AddPNode::Base)->is_top() && 2477 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2478 // Are there other uses besides address expressions? 2479 !is_visited(adr)) { 2480 address_visited.set(adr->_idx); // Flag as address_visited 2481 Node *shift = adr->in(AddPNode::Offset); 2482 if (!clone_shift(shift, this, mstack, address_visited)) { 2483 mstack.push(shift, Pre_Visit); 2484 } 2485 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2486 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2487 } else { 2488 mstack.push(adr, Pre_Visit); 2489 } 2490 2491 // Clone X+offset as it also folds into most addressing expressions 2492 mstack.push(off, Visit); 2493 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2494 return true; 2495 } else if (clone_shift(off, this, mstack, address_visited)) { 2496 address_visited.test_set(m->_idx); // Flag as address_visited 2497 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2498 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2499 return true; 2500 } 2501 return false; 2502 } 2503 2504 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2505 switch (bt) { 2506 case BoolTest::eq: 2507 return Assembler::eq; 2508 case BoolTest::ne: 2509 return Assembler::neq; 2510 case BoolTest::le: 2511 case BoolTest::ule: 2512 return Assembler::le; 2513 case BoolTest::ge: 2514 case BoolTest::uge: 2515 return Assembler::nlt; 2516 case BoolTest::lt: 2517 case BoolTest::ult: 2518 return Assembler::lt; 2519 case BoolTest::gt: 2520 case BoolTest::ugt: 2521 return Assembler::nle; 2522 default : ShouldNotReachHere(); return Assembler::_false; 2523 } 2524 } 2525 2526 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2527 switch (bt) { 2528 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2529 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2530 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2531 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2532 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2533 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2534 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2535 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2536 } 2537 } 2538 2539 // Helper methods for MachSpillCopyNode::implementation(). 2540 static void vec_mov_helper(C2_MacroAssembler *masm, int src_lo, int dst_lo, 2541 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2542 assert(ireg == Op_VecS || // 32bit vector 2543 ((src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2544 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi), 2545 "no non-adjacent vector moves" ); 2546 if (masm) { 2547 switch (ireg) { 2548 case Op_VecS: // copy whole register 2549 case Op_VecD: 2550 case Op_VecX: 2551 #ifndef _LP64 2552 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2553 #else 2554 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2555 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2556 } else { 2557 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2558 } 2559 #endif 2560 break; 2561 case Op_VecY: 2562 #ifndef _LP64 2563 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2564 #else 2565 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2566 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2567 } else { 2568 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2569 } 2570 #endif 2571 break; 2572 case Op_VecZ: 2573 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2574 break; 2575 default: 2576 ShouldNotReachHere(); 2577 } 2578 #ifndef PRODUCT 2579 } else { 2580 switch (ireg) { 2581 case Op_VecS: 2582 case Op_VecD: 2583 case Op_VecX: 2584 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2585 break; 2586 case Op_VecY: 2587 case Op_VecZ: 2588 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2589 break; 2590 default: 2591 ShouldNotReachHere(); 2592 } 2593 #endif 2594 } 2595 } 2596 2597 void vec_spill_helper(C2_MacroAssembler *masm, bool is_load, 2598 int stack_offset, int reg, uint ireg, outputStream* st) { 2599 if (masm) { 2600 if (is_load) { 2601 switch (ireg) { 2602 case Op_VecS: 2603 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2604 break; 2605 case Op_VecD: 2606 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2607 break; 2608 case Op_VecX: 2609 #ifndef _LP64 2610 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2611 #else 2612 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2613 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2614 } else { 2615 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2616 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2617 } 2618 #endif 2619 break; 2620 case Op_VecY: 2621 #ifndef _LP64 2622 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2623 #else 2624 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2625 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2626 } else { 2627 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2628 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2629 } 2630 #endif 2631 break; 2632 case Op_VecZ: 2633 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2634 break; 2635 default: 2636 ShouldNotReachHere(); 2637 } 2638 } else { // store 2639 switch (ireg) { 2640 case Op_VecS: 2641 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2642 break; 2643 case Op_VecD: 2644 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2645 break; 2646 case Op_VecX: 2647 #ifndef _LP64 2648 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2649 #else 2650 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2651 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2652 } 2653 else { 2654 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2655 } 2656 #endif 2657 break; 2658 case Op_VecY: 2659 #ifndef _LP64 2660 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2661 #else 2662 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2663 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2664 } 2665 else { 2666 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2667 } 2668 #endif 2669 break; 2670 case Op_VecZ: 2671 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2672 break; 2673 default: 2674 ShouldNotReachHere(); 2675 } 2676 } 2677 #ifndef PRODUCT 2678 } else { 2679 if (is_load) { 2680 switch (ireg) { 2681 case Op_VecS: 2682 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2683 break; 2684 case Op_VecD: 2685 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2686 break; 2687 case Op_VecX: 2688 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2689 break; 2690 case Op_VecY: 2691 case Op_VecZ: 2692 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2693 break; 2694 default: 2695 ShouldNotReachHere(); 2696 } 2697 } else { // store 2698 switch (ireg) { 2699 case Op_VecS: 2700 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2701 break; 2702 case Op_VecD: 2703 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2704 break; 2705 case Op_VecX: 2706 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2707 break; 2708 case Op_VecY: 2709 case Op_VecZ: 2710 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2711 break; 2712 default: 2713 ShouldNotReachHere(); 2714 } 2715 } 2716 #endif 2717 } 2718 } 2719 2720 template <class T> 2721 static inline GrowableArray<jvalue>* vreplicate_imm(BasicType bt, T con, int len) { 2722 GrowableArray<jvalue>* val = new GrowableArray<jvalue>(len); 2723 jvalue ele; 2724 switch (bt) { 2725 case T_BYTE: ele.b = con; break; 2726 case T_SHORT: ele.s = con; break; 2727 case T_INT: ele.i = con; break; 2728 case T_LONG: ele.j = con; break; 2729 case T_FLOAT: ele.f = con; break; 2730 case T_DOUBLE: ele.d = con; break; 2731 default: ShouldNotReachHere(); 2732 } 2733 for (int i = 0; i < len; i++) { 2734 val->append(ele); 2735 } 2736 return val; 2737 } 2738 2739 static inline jlong high_bit_set(BasicType bt) { 2740 switch (bt) { 2741 case T_BYTE: return 0x8080808080808080; 2742 case T_SHORT: return 0x8000800080008000; 2743 case T_INT: return 0x8000000080000000; 2744 case T_LONG: return 0x8000000000000000; 2745 default: 2746 ShouldNotReachHere(); 2747 return 0; 2748 } 2749 } 2750 2751 #ifndef PRODUCT 2752 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2753 st->print("nop \t# %d bytes pad for loops and calls", _count); 2754 } 2755 #endif 2756 2757 void MachNopNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc*) const { 2758 __ nop(_count); 2759 } 2760 2761 uint MachNopNode::size(PhaseRegAlloc*) const { 2762 return _count; 2763 } 2764 2765 #ifndef PRODUCT 2766 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2767 st->print("# breakpoint"); 2768 } 2769 #endif 2770 2771 void MachBreakpointNode::emit(C2_MacroAssembler *masm, PhaseRegAlloc* ra_) const { 2772 __ int3(); 2773 } 2774 2775 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2776 return MachNode::size(ra_); 2777 } 2778 2779 %} 2780 2781 encode %{ 2782 2783 enc_class call_epilog %{ 2784 if (VerifyStackAtCalls) { 2785 // Check that stack depth is unchanged: find majik cookie on stack 2786 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2787 Label L; 2788 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2789 __ jccb(Assembler::equal, L); 2790 // Die if stack mismatch 2791 __ int3(); 2792 __ bind(L); 2793 } 2794 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) { 2795 // The last return value is not set by the callee but used to pass IsInit information to compiled code. 2796 // Search for the corresponding projection, get the register and emit code that initialized it. 2797 uint con = (tf()->range_cc()->cnt() - 1); 2798 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { 2799 ProjNode* proj = fast_out(i)->as_Proj(); 2800 if (proj->_con == con) { 2801 // Set IsInit if rax is non-null (a non-null value is returned buffered or scalarized) 2802 OptoReg::Name optoReg = ra_->get_reg_first(proj); 2803 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP)); 2804 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1; 2805 __ testq(rax, rax); 2806 __ setb(Assembler::notZero, toReg); 2807 __ movzbl(toReg, toReg); 2808 if (reg->is_stack()) { 2809 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size; 2810 __ movq(Address(rsp, st_off), toReg); 2811 } 2812 break; 2813 } 2814 } 2815 if (return_value_is_used()) { 2816 // An inline type is returned as fields in multiple registers. 2817 // Rax either contains an oop if the inline type is buffered or a pointer 2818 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax 2819 // if the lowest bit is set to allow C2 to use the oop after null checking. 2820 // rax &= (rax & 1) - 1 2821 __ movptr(rscratch1, rax); 2822 __ andptr(rscratch1, 0x1); 2823 __ subptr(rscratch1, 0x1); 2824 __ andptr(rax, rscratch1); 2825 } 2826 } 2827 %} 2828 2829 %} 2830 2831 // Operands for bound floating pointer register arguments 2832 operand rxmm0() %{ 2833 constraint(ALLOC_IN_RC(xmm0_reg)); 2834 match(VecX); 2835 format%{%} 2836 interface(REG_INTER); 2837 %} 2838 2839 //----------OPERANDS----------------------------------------------------------- 2840 // Operand definitions must precede instruction definitions for correct parsing 2841 // in the ADLC because operands constitute user defined types which are used in 2842 // instruction definitions. 2843 2844 // Vectors 2845 2846 // Dummy generic vector class. Should be used for all vector operands. 2847 // Replaced with vec[SDXYZ] during post-selection pass. 2848 operand vec() %{ 2849 constraint(ALLOC_IN_RC(dynamic)); 2850 match(VecX); 2851 match(VecY); 2852 match(VecZ); 2853 match(VecS); 2854 match(VecD); 2855 2856 format %{ %} 2857 interface(REG_INTER); 2858 %} 2859 2860 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2861 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2862 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2863 // runtime code generation via reg_class_dynamic. 2864 operand legVec() %{ 2865 constraint(ALLOC_IN_RC(dynamic)); 2866 match(VecX); 2867 match(VecY); 2868 match(VecZ); 2869 match(VecS); 2870 match(VecD); 2871 2872 format %{ %} 2873 interface(REG_INTER); 2874 %} 2875 2876 // Replaces vec during post-selection cleanup. See above. 2877 operand vecS() %{ 2878 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2879 match(VecS); 2880 2881 format %{ %} 2882 interface(REG_INTER); 2883 %} 2884 2885 // Replaces legVec during post-selection cleanup. See above. 2886 operand legVecS() %{ 2887 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2888 match(VecS); 2889 2890 format %{ %} 2891 interface(REG_INTER); 2892 %} 2893 2894 // Replaces vec during post-selection cleanup. See above. 2895 operand vecD() %{ 2896 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2897 match(VecD); 2898 2899 format %{ %} 2900 interface(REG_INTER); 2901 %} 2902 2903 // Replaces legVec during post-selection cleanup. See above. 2904 operand legVecD() %{ 2905 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2906 match(VecD); 2907 2908 format %{ %} 2909 interface(REG_INTER); 2910 %} 2911 2912 // Replaces vec during post-selection cleanup. See above. 2913 operand vecX() %{ 2914 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2915 match(VecX); 2916 2917 format %{ %} 2918 interface(REG_INTER); 2919 %} 2920 2921 // Replaces legVec during post-selection cleanup. See above. 2922 operand legVecX() %{ 2923 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2924 match(VecX); 2925 2926 format %{ %} 2927 interface(REG_INTER); 2928 %} 2929 2930 // Replaces vec during post-selection cleanup. See above. 2931 operand vecY() %{ 2932 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2933 match(VecY); 2934 2935 format %{ %} 2936 interface(REG_INTER); 2937 %} 2938 2939 // Replaces legVec during post-selection cleanup. See above. 2940 operand legVecY() %{ 2941 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2942 match(VecY); 2943 2944 format %{ %} 2945 interface(REG_INTER); 2946 %} 2947 2948 // Replaces vec during post-selection cleanup. See above. 2949 operand vecZ() %{ 2950 constraint(ALLOC_IN_RC(vectorz_reg)); 2951 match(VecZ); 2952 2953 format %{ %} 2954 interface(REG_INTER); 2955 %} 2956 2957 // Replaces legVec during post-selection cleanup. See above. 2958 operand legVecZ() %{ 2959 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2960 match(VecZ); 2961 2962 format %{ %} 2963 interface(REG_INTER); 2964 %} 2965 2966 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2967 2968 // ============================================================================ 2969 2970 instruct ShouldNotReachHere() %{ 2971 match(Halt); 2972 format %{ "stop\t# ShouldNotReachHere" %} 2973 ins_encode %{ 2974 if (is_reachable()) { 2975 __ stop(_halt_reason); 2976 } 2977 %} 2978 ins_pipe(pipe_slow); 2979 %} 2980 2981 // ============================================================================ 2982 2983 instruct addF_reg(regF dst, regF src) %{ 2984 predicate((UseSSE>=1) && (UseAVX == 0)); 2985 match(Set dst (AddF dst src)); 2986 2987 format %{ "addss $dst, $src" %} 2988 ins_cost(150); 2989 ins_encode %{ 2990 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2991 %} 2992 ins_pipe(pipe_slow); 2993 %} 2994 2995 instruct addF_mem(regF dst, memory src) %{ 2996 predicate((UseSSE>=1) && (UseAVX == 0)); 2997 match(Set dst (AddF dst (LoadF src))); 2998 2999 format %{ "addss $dst, $src" %} 3000 ins_cost(150); 3001 ins_encode %{ 3002 __ addss($dst$$XMMRegister, $src$$Address); 3003 %} 3004 ins_pipe(pipe_slow); 3005 %} 3006 3007 instruct addF_imm(regF dst, immF con) %{ 3008 predicate((UseSSE>=1) && (UseAVX == 0)); 3009 match(Set dst (AddF dst con)); 3010 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3011 ins_cost(150); 3012 ins_encode %{ 3013 __ addss($dst$$XMMRegister, $constantaddress($con)); 3014 %} 3015 ins_pipe(pipe_slow); 3016 %} 3017 3018 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 3019 predicate(UseAVX > 0); 3020 match(Set dst (AddF src1 src2)); 3021 3022 format %{ "vaddss $dst, $src1, $src2" %} 3023 ins_cost(150); 3024 ins_encode %{ 3025 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3026 %} 3027 ins_pipe(pipe_slow); 3028 %} 3029 3030 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 3031 predicate(UseAVX > 0); 3032 match(Set dst (AddF src1 (LoadF src2))); 3033 3034 format %{ "vaddss $dst, $src1, $src2" %} 3035 ins_cost(150); 3036 ins_encode %{ 3037 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3038 %} 3039 ins_pipe(pipe_slow); 3040 %} 3041 3042 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 3043 predicate(UseAVX > 0); 3044 match(Set dst (AddF src con)); 3045 3046 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3047 ins_cost(150); 3048 ins_encode %{ 3049 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3050 %} 3051 ins_pipe(pipe_slow); 3052 %} 3053 3054 instruct addD_reg(regD dst, regD src) %{ 3055 predicate((UseSSE>=2) && (UseAVX == 0)); 3056 match(Set dst (AddD dst src)); 3057 3058 format %{ "addsd $dst, $src" %} 3059 ins_cost(150); 3060 ins_encode %{ 3061 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 3062 %} 3063 ins_pipe(pipe_slow); 3064 %} 3065 3066 instruct addD_mem(regD dst, memory src) %{ 3067 predicate((UseSSE>=2) && (UseAVX == 0)); 3068 match(Set dst (AddD dst (LoadD src))); 3069 3070 format %{ "addsd $dst, $src" %} 3071 ins_cost(150); 3072 ins_encode %{ 3073 __ addsd($dst$$XMMRegister, $src$$Address); 3074 %} 3075 ins_pipe(pipe_slow); 3076 %} 3077 3078 instruct addD_imm(regD dst, immD con) %{ 3079 predicate((UseSSE>=2) && (UseAVX == 0)); 3080 match(Set dst (AddD dst con)); 3081 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3082 ins_cost(150); 3083 ins_encode %{ 3084 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3085 %} 3086 ins_pipe(pipe_slow); 3087 %} 3088 3089 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3090 predicate(UseAVX > 0); 3091 match(Set dst (AddD src1 src2)); 3092 3093 format %{ "vaddsd $dst, $src1, $src2" %} 3094 ins_cost(150); 3095 ins_encode %{ 3096 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3097 %} 3098 ins_pipe(pipe_slow); 3099 %} 3100 3101 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3102 predicate(UseAVX > 0); 3103 match(Set dst (AddD src1 (LoadD src2))); 3104 3105 format %{ "vaddsd $dst, $src1, $src2" %} 3106 ins_cost(150); 3107 ins_encode %{ 3108 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3109 %} 3110 ins_pipe(pipe_slow); 3111 %} 3112 3113 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3114 predicate(UseAVX > 0); 3115 match(Set dst (AddD src con)); 3116 3117 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3118 ins_cost(150); 3119 ins_encode %{ 3120 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3121 %} 3122 ins_pipe(pipe_slow); 3123 %} 3124 3125 instruct subF_reg(regF dst, regF src) %{ 3126 predicate((UseSSE>=1) && (UseAVX == 0)); 3127 match(Set dst (SubF dst src)); 3128 3129 format %{ "subss $dst, $src" %} 3130 ins_cost(150); 3131 ins_encode %{ 3132 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3133 %} 3134 ins_pipe(pipe_slow); 3135 %} 3136 3137 instruct subF_mem(regF dst, memory src) %{ 3138 predicate((UseSSE>=1) && (UseAVX == 0)); 3139 match(Set dst (SubF dst (LoadF src))); 3140 3141 format %{ "subss $dst, $src" %} 3142 ins_cost(150); 3143 ins_encode %{ 3144 __ subss($dst$$XMMRegister, $src$$Address); 3145 %} 3146 ins_pipe(pipe_slow); 3147 %} 3148 3149 instruct subF_imm(regF dst, immF con) %{ 3150 predicate((UseSSE>=1) && (UseAVX == 0)); 3151 match(Set dst (SubF dst con)); 3152 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3153 ins_cost(150); 3154 ins_encode %{ 3155 __ subss($dst$$XMMRegister, $constantaddress($con)); 3156 %} 3157 ins_pipe(pipe_slow); 3158 %} 3159 3160 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3161 predicate(UseAVX > 0); 3162 match(Set dst (SubF src1 src2)); 3163 3164 format %{ "vsubss $dst, $src1, $src2" %} 3165 ins_cost(150); 3166 ins_encode %{ 3167 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3168 %} 3169 ins_pipe(pipe_slow); 3170 %} 3171 3172 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3173 predicate(UseAVX > 0); 3174 match(Set dst (SubF src1 (LoadF src2))); 3175 3176 format %{ "vsubss $dst, $src1, $src2" %} 3177 ins_cost(150); 3178 ins_encode %{ 3179 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3180 %} 3181 ins_pipe(pipe_slow); 3182 %} 3183 3184 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3185 predicate(UseAVX > 0); 3186 match(Set dst (SubF src con)); 3187 3188 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3189 ins_cost(150); 3190 ins_encode %{ 3191 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3192 %} 3193 ins_pipe(pipe_slow); 3194 %} 3195 3196 instruct subD_reg(regD dst, regD src) %{ 3197 predicate((UseSSE>=2) && (UseAVX == 0)); 3198 match(Set dst (SubD dst src)); 3199 3200 format %{ "subsd $dst, $src" %} 3201 ins_cost(150); 3202 ins_encode %{ 3203 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3204 %} 3205 ins_pipe(pipe_slow); 3206 %} 3207 3208 instruct subD_mem(regD dst, memory src) %{ 3209 predicate((UseSSE>=2) && (UseAVX == 0)); 3210 match(Set dst (SubD dst (LoadD src))); 3211 3212 format %{ "subsd $dst, $src" %} 3213 ins_cost(150); 3214 ins_encode %{ 3215 __ subsd($dst$$XMMRegister, $src$$Address); 3216 %} 3217 ins_pipe(pipe_slow); 3218 %} 3219 3220 instruct subD_imm(regD dst, immD con) %{ 3221 predicate((UseSSE>=2) && (UseAVX == 0)); 3222 match(Set dst (SubD dst con)); 3223 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3224 ins_cost(150); 3225 ins_encode %{ 3226 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3227 %} 3228 ins_pipe(pipe_slow); 3229 %} 3230 3231 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3232 predicate(UseAVX > 0); 3233 match(Set dst (SubD src1 src2)); 3234 3235 format %{ "vsubsd $dst, $src1, $src2" %} 3236 ins_cost(150); 3237 ins_encode %{ 3238 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3239 %} 3240 ins_pipe(pipe_slow); 3241 %} 3242 3243 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3244 predicate(UseAVX > 0); 3245 match(Set dst (SubD src1 (LoadD src2))); 3246 3247 format %{ "vsubsd $dst, $src1, $src2" %} 3248 ins_cost(150); 3249 ins_encode %{ 3250 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3251 %} 3252 ins_pipe(pipe_slow); 3253 %} 3254 3255 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3256 predicate(UseAVX > 0); 3257 match(Set dst (SubD src con)); 3258 3259 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3260 ins_cost(150); 3261 ins_encode %{ 3262 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3263 %} 3264 ins_pipe(pipe_slow); 3265 %} 3266 3267 instruct mulF_reg(regF dst, regF src) %{ 3268 predicate((UseSSE>=1) && (UseAVX == 0)); 3269 match(Set dst (MulF dst src)); 3270 3271 format %{ "mulss $dst, $src" %} 3272 ins_cost(150); 3273 ins_encode %{ 3274 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3275 %} 3276 ins_pipe(pipe_slow); 3277 %} 3278 3279 instruct mulF_mem(regF dst, memory src) %{ 3280 predicate((UseSSE>=1) && (UseAVX == 0)); 3281 match(Set dst (MulF dst (LoadF src))); 3282 3283 format %{ "mulss $dst, $src" %} 3284 ins_cost(150); 3285 ins_encode %{ 3286 __ mulss($dst$$XMMRegister, $src$$Address); 3287 %} 3288 ins_pipe(pipe_slow); 3289 %} 3290 3291 instruct mulF_imm(regF dst, immF con) %{ 3292 predicate((UseSSE>=1) && (UseAVX == 0)); 3293 match(Set dst (MulF dst con)); 3294 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3295 ins_cost(150); 3296 ins_encode %{ 3297 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3298 %} 3299 ins_pipe(pipe_slow); 3300 %} 3301 3302 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3303 predicate(UseAVX > 0); 3304 match(Set dst (MulF src1 src2)); 3305 3306 format %{ "vmulss $dst, $src1, $src2" %} 3307 ins_cost(150); 3308 ins_encode %{ 3309 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3310 %} 3311 ins_pipe(pipe_slow); 3312 %} 3313 3314 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3315 predicate(UseAVX > 0); 3316 match(Set dst (MulF src1 (LoadF src2))); 3317 3318 format %{ "vmulss $dst, $src1, $src2" %} 3319 ins_cost(150); 3320 ins_encode %{ 3321 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3322 %} 3323 ins_pipe(pipe_slow); 3324 %} 3325 3326 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3327 predicate(UseAVX > 0); 3328 match(Set dst (MulF src con)); 3329 3330 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3331 ins_cost(150); 3332 ins_encode %{ 3333 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3334 %} 3335 ins_pipe(pipe_slow); 3336 %} 3337 3338 instruct mulD_reg(regD dst, regD src) %{ 3339 predicate((UseSSE>=2) && (UseAVX == 0)); 3340 match(Set dst (MulD dst src)); 3341 3342 format %{ "mulsd $dst, $src" %} 3343 ins_cost(150); 3344 ins_encode %{ 3345 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3346 %} 3347 ins_pipe(pipe_slow); 3348 %} 3349 3350 instruct mulD_mem(regD dst, memory src) %{ 3351 predicate((UseSSE>=2) && (UseAVX == 0)); 3352 match(Set dst (MulD dst (LoadD src))); 3353 3354 format %{ "mulsd $dst, $src" %} 3355 ins_cost(150); 3356 ins_encode %{ 3357 __ mulsd($dst$$XMMRegister, $src$$Address); 3358 %} 3359 ins_pipe(pipe_slow); 3360 %} 3361 3362 instruct mulD_imm(regD dst, immD con) %{ 3363 predicate((UseSSE>=2) && (UseAVX == 0)); 3364 match(Set dst (MulD dst con)); 3365 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3366 ins_cost(150); 3367 ins_encode %{ 3368 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3369 %} 3370 ins_pipe(pipe_slow); 3371 %} 3372 3373 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3374 predicate(UseAVX > 0); 3375 match(Set dst (MulD src1 src2)); 3376 3377 format %{ "vmulsd $dst, $src1, $src2" %} 3378 ins_cost(150); 3379 ins_encode %{ 3380 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3381 %} 3382 ins_pipe(pipe_slow); 3383 %} 3384 3385 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3386 predicate(UseAVX > 0); 3387 match(Set dst (MulD src1 (LoadD src2))); 3388 3389 format %{ "vmulsd $dst, $src1, $src2" %} 3390 ins_cost(150); 3391 ins_encode %{ 3392 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3393 %} 3394 ins_pipe(pipe_slow); 3395 %} 3396 3397 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3398 predicate(UseAVX > 0); 3399 match(Set dst (MulD src con)); 3400 3401 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3402 ins_cost(150); 3403 ins_encode %{ 3404 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3405 %} 3406 ins_pipe(pipe_slow); 3407 %} 3408 3409 instruct divF_reg(regF dst, regF src) %{ 3410 predicate((UseSSE>=1) && (UseAVX == 0)); 3411 match(Set dst (DivF dst src)); 3412 3413 format %{ "divss $dst, $src" %} 3414 ins_cost(150); 3415 ins_encode %{ 3416 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3417 %} 3418 ins_pipe(pipe_slow); 3419 %} 3420 3421 instruct divF_mem(regF dst, memory src) %{ 3422 predicate((UseSSE>=1) && (UseAVX == 0)); 3423 match(Set dst (DivF dst (LoadF src))); 3424 3425 format %{ "divss $dst, $src" %} 3426 ins_cost(150); 3427 ins_encode %{ 3428 __ divss($dst$$XMMRegister, $src$$Address); 3429 %} 3430 ins_pipe(pipe_slow); 3431 %} 3432 3433 instruct divF_imm(regF dst, immF con) %{ 3434 predicate((UseSSE>=1) && (UseAVX == 0)); 3435 match(Set dst (DivF dst con)); 3436 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3437 ins_cost(150); 3438 ins_encode %{ 3439 __ divss($dst$$XMMRegister, $constantaddress($con)); 3440 %} 3441 ins_pipe(pipe_slow); 3442 %} 3443 3444 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3445 predicate(UseAVX > 0); 3446 match(Set dst (DivF src1 src2)); 3447 3448 format %{ "vdivss $dst, $src1, $src2" %} 3449 ins_cost(150); 3450 ins_encode %{ 3451 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3452 %} 3453 ins_pipe(pipe_slow); 3454 %} 3455 3456 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3457 predicate(UseAVX > 0); 3458 match(Set dst (DivF src1 (LoadF src2))); 3459 3460 format %{ "vdivss $dst, $src1, $src2" %} 3461 ins_cost(150); 3462 ins_encode %{ 3463 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3464 %} 3465 ins_pipe(pipe_slow); 3466 %} 3467 3468 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3469 predicate(UseAVX > 0); 3470 match(Set dst (DivF src con)); 3471 3472 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3473 ins_cost(150); 3474 ins_encode %{ 3475 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3476 %} 3477 ins_pipe(pipe_slow); 3478 %} 3479 3480 instruct divD_reg(regD dst, regD src) %{ 3481 predicate((UseSSE>=2) && (UseAVX == 0)); 3482 match(Set dst (DivD dst src)); 3483 3484 format %{ "divsd $dst, $src" %} 3485 ins_cost(150); 3486 ins_encode %{ 3487 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3488 %} 3489 ins_pipe(pipe_slow); 3490 %} 3491 3492 instruct divD_mem(regD dst, memory src) %{ 3493 predicate((UseSSE>=2) && (UseAVX == 0)); 3494 match(Set dst (DivD dst (LoadD src))); 3495 3496 format %{ "divsd $dst, $src" %} 3497 ins_cost(150); 3498 ins_encode %{ 3499 __ divsd($dst$$XMMRegister, $src$$Address); 3500 %} 3501 ins_pipe(pipe_slow); 3502 %} 3503 3504 instruct divD_imm(regD dst, immD con) %{ 3505 predicate((UseSSE>=2) && (UseAVX == 0)); 3506 match(Set dst (DivD dst con)); 3507 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3508 ins_cost(150); 3509 ins_encode %{ 3510 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3511 %} 3512 ins_pipe(pipe_slow); 3513 %} 3514 3515 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3516 predicate(UseAVX > 0); 3517 match(Set dst (DivD src1 src2)); 3518 3519 format %{ "vdivsd $dst, $src1, $src2" %} 3520 ins_cost(150); 3521 ins_encode %{ 3522 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3523 %} 3524 ins_pipe(pipe_slow); 3525 %} 3526 3527 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3528 predicate(UseAVX > 0); 3529 match(Set dst (DivD src1 (LoadD src2))); 3530 3531 format %{ "vdivsd $dst, $src1, $src2" %} 3532 ins_cost(150); 3533 ins_encode %{ 3534 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3535 %} 3536 ins_pipe(pipe_slow); 3537 %} 3538 3539 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3540 predicate(UseAVX > 0); 3541 match(Set dst (DivD src con)); 3542 3543 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3544 ins_cost(150); 3545 ins_encode %{ 3546 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3547 %} 3548 ins_pipe(pipe_slow); 3549 %} 3550 3551 instruct absF_reg(regF dst) %{ 3552 predicate((UseSSE>=1) && (UseAVX == 0)); 3553 match(Set dst (AbsF dst)); 3554 ins_cost(150); 3555 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3556 ins_encode %{ 3557 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3558 %} 3559 ins_pipe(pipe_slow); 3560 %} 3561 3562 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3563 predicate(UseAVX > 0); 3564 match(Set dst (AbsF src)); 3565 ins_cost(150); 3566 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3567 ins_encode %{ 3568 int vlen_enc = Assembler::AVX_128bit; 3569 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3570 ExternalAddress(float_signmask()), vlen_enc); 3571 %} 3572 ins_pipe(pipe_slow); 3573 %} 3574 3575 instruct absD_reg(regD dst) %{ 3576 predicate((UseSSE>=2) && (UseAVX == 0)); 3577 match(Set dst (AbsD dst)); 3578 ins_cost(150); 3579 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3580 "# abs double by sign masking" %} 3581 ins_encode %{ 3582 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3583 %} 3584 ins_pipe(pipe_slow); 3585 %} 3586 3587 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3588 predicate(UseAVX > 0); 3589 match(Set dst (AbsD src)); 3590 ins_cost(150); 3591 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3592 "# abs double by sign masking" %} 3593 ins_encode %{ 3594 int vlen_enc = Assembler::AVX_128bit; 3595 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3596 ExternalAddress(double_signmask()), vlen_enc); 3597 %} 3598 ins_pipe(pipe_slow); 3599 %} 3600 3601 instruct negF_reg(regF dst) %{ 3602 predicate((UseSSE>=1) && (UseAVX == 0)); 3603 match(Set dst (NegF dst)); 3604 ins_cost(150); 3605 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3606 ins_encode %{ 3607 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3608 %} 3609 ins_pipe(pipe_slow); 3610 %} 3611 3612 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3613 predicate(UseAVX > 0); 3614 match(Set dst (NegF src)); 3615 ins_cost(150); 3616 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3617 ins_encode %{ 3618 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3619 ExternalAddress(float_signflip())); 3620 %} 3621 ins_pipe(pipe_slow); 3622 %} 3623 3624 instruct negD_reg(regD dst) %{ 3625 predicate((UseSSE>=2) && (UseAVX == 0)); 3626 match(Set dst (NegD dst)); 3627 ins_cost(150); 3628 format %{ "xorpd $dst, [0x8000000000000000]\t" 3629 "# neg double by sign flipping" %} 3630 ins_encode %{ 3631 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3632 %} 3633 ins_pipe(pipe_slow); 3634 %} 3635 3636 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3637 predicate(UseAVX > 0); 3638 match(Set dst (NegD src)); 3639 ins_cost(150); 3640 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3641 "# neg double by sign flipping" %} 3642 ins_encode %{ 3643 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3644 ExternalAddress(double_signflip())); 3645 %} 3646 ins_pipe(pipe_slow); 3647 %} 3648 3649 // sqrtss instruction needs destination register to be pre initialized for best performance 3650 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3651 instruct sqrtF_reg(regF dst) %{ 3652 predicate(UseSSE>=1); 3653 match(Set dst (SqrtF dst)); 3654 format %{ "sqrtss $dst, $dst" %} 3655 ins_encode %{ 3656 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3657 %} 3658 ins_pipe(pipe_slow); 3659 %} 3660 3661 // sqrtsd instruction needs destination register to be pre initialized for best performance 3662 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3663 instruct sqrtD_reg(regD dst) %{ 3664 predicate(UseSSE>=2); 3665 match(Set dst (SqrtD dst)); 3666 format %{ "sqrtsd $dst, $dst" %} 3667 ins_encode %{ 3668 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3669 %} 3670 ins_pipe(pipe_slow); 3671 %} 3672 3673 instruct convF2HF_reg_reg(rRegI dst, vlRegF src, vlRegF tmp) %{ 3674 effect(TEMP tmp); 3675 match(Set dst (ConvF2HF src)); 3676 ins_cost(125); 3677 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3678 ins_encode %{ 3679 __ flt_to_flt16($dst$$Register, $src$$XMMRegister, $tmp$$XMMRegister); 3680 %} 3681 ins_pipe( pipe_slow ); 3682 %} 3683 3684 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3685 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3686 effect(TEMP ktmp, TEMP rtmp); 3687 match(Set mem (StoreC mem (ConvF2HF src))); 3688 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3689 ins_encode %{ 3690 __ movl($rtmp$$Register, 0x1); 3691 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3692 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3693 %} 3694 ins_pipe( pipe_slow ); 3695 %} 3696 3697 instruct vconvF2HF(vec dst, vec src) %{ 3698 match(Set dst (VectorCastF2HF src)); 3699 format %{ "vector_conv_F2HF $dst $src" %} 3700 ins_encode %{ 3701 int vlen_enc = vector_length_encoding(this, $src); 3702 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3703 %} 3704 ins_pipe( pipe_slow ); 3705 %} 3706 3707 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3708 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3709 format %{ "vcvtps2ph $mem,$src" %} 3710 ins_encode %{ 3711 int vlen_enc = vector_length_encoding(this, $src); 3712 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3713 %} 3714 ins_pipe( pipe_slow ); 3715 %} 3716 3717 instruct convHF2F_reg_reg(vlRegF dst, rRegI src) %{ 3718 match(Set dst (ConvHF2F src)); 3719 format %{ "vcvtph2ps $dst,$src" %} 3720 ins_encode %{ 3721 __ flt16_to_flt($dst$$XMMRegister, $src$$Register); 3722 %} 3723 ins_pipe( pipe_slow ); 3724 %} 3725 3726 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3727 match(Set dst (VectorCastHF2F (LoadVector mem))); 3728 format %{ "vcvtph2ps $dst,$mem" %} 3729 ins_encode %{ 3730 int vlen_enc = vector_length_encoding(this); 3731 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3732 %} 3733 ins_pipe( pipe_slow ); 3734 %} 3735 3736 instruct vconvHF2F(vec dst, vec src) %{ 3737 match(Set dst (VectorCastHF2F src)); 3738 ins_cost(125); 3739 format %{ "vector_conv_HF2F $dst,$src" %} 3740 ins_encode %{ 3741 int vlen_enc = vector_length_encoding(this); 3742 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3743 %} 3744 ins_pipe( pipe_slow ); 3745 %} 3746 3747 // ---------------------------------------- VectorReinterpret ------------------------------------ 3748 instruct reinterpret_mask(kReg dst) %{ 3749 predicate(n->bottom_type()->isa_vectmask() && 3750 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3751 match(Set dst (VectorReinterpret dst)); 3752 ins_cost(125); 3753 format %{ "vector_reinterpret $dst\t!" %} 3754 ins_encode %{ 3755 // empty 3756 %} 3757 ins_pipe( pipe_slow ); 3758 %} 3759 3760 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3761 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3762 n->bottom_type()->isa_vectmask() && 3763 n->in(1)->bottom_type()->isa_vectmask() && 3764 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3765 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3766 match(Set dst (VectorReinterpret src)); 3767 effect(TEMP xtmp); 3768 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3769 ins_encode %{ 3770 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3771 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3772 assert(src_sz == dst_sz , "src and dst size mismatch"); 3773 int vlen_enc = vector_length_encoding(src_sz); 3774 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3775 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3776 %} 3777 ins_pipe( pipe_slow ); 3778 %} 3779 3780 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3781 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3782 n->bottom_type()->isa_vectmask() && 3783 n->in(1)->bottom_type()->isa_vectmask() && 3784 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3785 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3786 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3787 match(Set dst (VectorReinterpret src)); 3788 effect(TEMP xtmp); 3789 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3790 ins_encode %{ 3791 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3792 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3793 assert(src_sz == dst_sz , "src and dst size mismatch"); 3794 int vlen_enc = vector_length_encoding(src_sz); 3795 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3796 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3797 %} 3798 ins_pipe( pipe_slow ); 3799 %} 3800 3801 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3802 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3803 n->bottom_type()->isa_vectmask() && 3804 n->in(1)->bottom_type()->isa_vectmask() && 3805 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3806 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3807 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3808 match(Set dst (VectorReinterpret src)); 3809 effect(TEMP xtmp); 3810 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3811 ins_encode %{ 3812 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3813 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3814 assert(src_sz == dst_sz , "src and dst size mismatch"); 3815 int vlen_enc = vector_length_encoding(src_sz); 3816 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3817 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3818 %} 3819 ins_pipe( pipe_slow ); 3820 %} 3821 3822 instruct reinterpret(vec dst) %{ 3823 predicate(!n->bottom_type()->isa_vectmask() && 3824 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3825 match(Set dst (VectorReinterpret dst)); 3826 ins_cost(125); 3827 format %{ "vector_reinterpret $dst\t!" %} 3828 ins_encode %{ 3829 // empty 3830 %} 3831 ins_pipe( pipe_slow ); 3832 %} 3833 3834 instruct reinterpret_expand(vec dst, vec src) %{ 3835 predicate(UseAVX == 0 && 3836 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3837 match(Set dst (VectorReinterpret src)); 3838 ins_cost(125); 3839 effect(TEMP dst); 3840 format %{ "vector_reinterpret_expand $dst,$src" %} 3841 ins_encode %{ 3842 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3843 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3844 3845 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3846 if (src_vlen_in_bytes == 4) { 3847 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3848 } else { 3849 assert(src_vlen_in_bytes == 8, ""); 3850 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3851 } 3852 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3853 %} 3854 ins_pipe( pipe_slow ); 3855 %} 3856 3857 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3858 predicate(UseAVX > 0 && 3859 !n->bottom_type()->isa_vectmask() && 3860 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3861 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3862 match(Set dst (VectorReinterpret src)); 3863 ins_cost(125); 3864 format %{ "vector_reinterpret_expand $dst,$src" %} 3865 ins_encode %{ 3866 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3867 %} 3868 ins_pipe( pipe_slow ); 3869 %} 3870 3871 3872 instruct vreinterpret_expand(legVec dst, vec src) %{ 3873 predicate(UseAVX > 0 && 3874 !n->bottom_type()->isa_vectmask() && 3875 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3876 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3877 match(Set dst (VectorReinterpret src)); 3878 ins_cost(125); 3879 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3880 ins_encode %{ 3881 switch (Matcher::vector_length_in_bytes(this, $src)) { 3882 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3883 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3884 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3885 default: ShouldNotReachHere(); 3886 } 3887 %} 3888 ins_pipe( pipe_slow ); 3889 %} 3890 3891 instruct reinterpret_shrink(vec dst, legVec src) %{ 3892 predicate(!n->bottom_type()->isa_vectmask() && 3893 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3894 match(Set dst (VectorReinterpret src)); 3895 ins_cost(125); 3896 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3897 ins_encode %{ 3898 switch (Matcher::vector_length_in_bytes(this)) { 3899 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3900 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3901 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3902 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3903 default: ShouldNotReachHere(); 3904 } 3905 %} 3906 ins_pipe( pipe_slow ); 3907 %} 3908 3909 // ---------------------------------------------------------------------------------------------------- 3910 3911 #ifdef _LP64 3912 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3913 match(Set dst (RoundDoubleMode src rmode)); 3914 format %{ "roundsd $dst,$src" %} 3915 ins_cost(150); 3916 ins_encode %{ 3917 assert(UseSSE >= 4, "required"); 3918 if ((UseAVX == 0) && ($dst$$XMMRegister != $src$$XMMRegister)) { 3919 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 3920 } 3921 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3922 %} 3923 ins_pipe(pipe_slow); 3924 %} 3925 3926 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3927 match(Set dst (RoundDoubleMode con rmode)); 3928 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3929 ins_cost(150); 3930 ins_encode %{ 3931 assert(UseSSE >= 4, "required"); 3932 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3933 %} 3934 ins_pipe(pipe_slow); 3935 %} 3936 3937 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3938 predicate(Matcher::vector_length(n) < 8); 3939 match(Set dst (RoundDoubleModeV src rmode)); 3940 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3941 ins_encode %{ 3942 assert(UseAVX > 0, "required"); 3943 int vlen_enc = vector_length_encoding(this); 3944 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3945 %} 3946 ins_pipe( pipe_slow ); 3947 %} 3948 3949 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3950 predicate(Matcher::vector_length(n) == 8); 3951 match(Set dst (RoundDoubleModeV src rmode)); 3952 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3953 ins_encode %{ 3954 assert(UseAVX > 2, "required"); 3955 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3956 %} 3957 ins_pipe( pipe_slow ); 3958 %} 3959 3960 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3961 predicate(Matcher::vector_length(n) < 8); 3962 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3963 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3964 ins_encode %{ 3965 assert(UseAVX > 0, "required"); 3966 int vlen_enc = vector_length_encoding(this); 3967 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3968 %} 3969 ins_pipe( pipe_slow ); 3970 %} 3971 3972 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3973 predicate(Matcher::vector_length(n) == 8); 3974 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3975 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3976 ins_encode %{ 3977 assert(UseAVX > 2, "required"); 3978 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3979 %} 3980 ins_pipe( pipe_slow ); 3981 %} 3982 #endif // _LP64 3983 3984 instruct onspinwait() %{ 3985 match(OnSpinWait); 3986 ins_cost(200); 3987 3988 format %{ 3989 $$template 3990 $$emit$$"pause\t! membar_onspinwait" 3991 %} 3992 ins_encode %{ 3993 __ pause(); 3994 %} 3995 ins_pipe(pipe_slow); 3996 %} 3997 3998 // a * b + c 3999 instruct fmaD_reg(regD a, regD b, regD c) %{ 4000 match(Set c (FmaD c (Binary a b))); 4001 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 4002 ins_cost(150); 4003 ins_encode %{ 4004 assert(UseFMA, "Needs FMA instructions support."); 4005 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4006 %} 4007 ins_pipe( pipe_slow ); 4008 %} 4009 4010 // a * b + c 4011 instruct fmaF_reg(regF a, regF b, regF c) %{ 4012 match(Set c (FmaF c (Binary a b))); 4013 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 4014 ins_cost(150); 4015 ins_encode %{ 4016 assert(UseFMA, "Needs FMA instructions support."); 4017 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4018 %} 4019 ins_pipe( pipe_slow ); 4020 %} 4021 4022 // ====================VECTOR INSTRUCTIONS===================================== 4023 4024 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4025 instruct MoveVec2Leg(legVec dst, vec src) %{ 4026 match(Set dst src); 4027 format %{ "" %} 4028 ins_encode %{ 4029 ShouldNotReachHere(); 4030 %} 4031 ins_pipe( fpu_reg_reg ); 4032 %} 4033 4034 instruct MoveLeg2Vec(vec dst, legVec src) %{ 4035 match(Set dst src); 4036 format %{ "" %} 4037 ins_encode %{ 4038 ShouldNotReachHere(); 4039 %} 4040 ins_pipe( fpu_reg_reg ); 4041 %} 4042 4043 // ============================================================================ 4044 4045 // Load vectors generic operand pattern 4046 instruct loadV(vec dst, memory mem) %{ 4047 match(Set dst (LoadVector mem)); 4048 ins_cost(125); 4049 format %{ "load_vector $dst,$mem" %} 4050 ins_encode %{ 4051 __ load_vector($dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 4052 %} 4053 ins_pipe( pipe_slow ); 4054 %} 4055 4056 // Store vectors generic operand pattern. 4057 instruct storeV(memory mem, vec src) %{ 4058 match(Set mem (StoreVector mem src)); 4059 ins_cost(145); 4060 format %{ "store_vector $mem,$src\n\t" %} 4061 ins_encode %{ 4062 switch (Matcher::vector_length_in_bytes(this, $src)) { 4063 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 4064 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 4065 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 4066 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 4067 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 4068 default: ShouldNotReachHere(); 4069 } 4070 %} 4071 ins_pipe( pipe_slow ); 4072 %} 4073 4074 // ---------------------------------------- Gather ------------------------------------ 4075 4076 // Gather BYTE, SHORT, INT, LONG, FLOAT, DOUBLE 4077 4078 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 4079 predicate(!VM_Version::supports_avx512vl() && !is_subword_type(Matcher::vector_element_basic_type(n)) && 4080 Matcher::vector_length_in_bytes(n) <= 32); 4081 match(Set dst (LoadVectorGather mem idx)); 4082 effect(TEMP dst, TEMP tmp, TEMP mask); 4083 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 4084 ins_encode %{ 4085 int vlen_enc = vector_length_encoding(this); 4086 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4087 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4088 __ vpcmpeqd($mask$$XMMRegister, $mask$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4089 __ lea($tmp$$Register, $mem$$Address); 4090 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4091 %} 4092 ins_pipe( pipe_slow ); 4093 %} 4094 4095 4096 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 4097 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4098 !is_subword_type(Matcher::vector_element_basic_type(n))); 4099 match(Set dst (LoadVectorGather mem idx)); 4100 effect(TEMP dst, TEMP tmp, TEMP ktmp); 4101 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 4102 ins_encode %{ 4103 int vlen_enc = vector_length_encoding(this); 4104 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4105 __ kxnorwl($ktmp$$KRegister, $ktmp$$KRegister, $ktmp$$KRegister); 4106 __ lea($tmp$$Register, $mem$$Address); 4107 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4108 %} 4109 ins_pipe( pipe_slow ); 4110 %} 4111 4112 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4113 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64) && 4114 !is_subword_type(Matcher::vector_element_basic_type(n))); 4115 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4116 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4117 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4118 ins_encode %{ 4119 assert(UseAVX > 2, "sanity"); 4120 int vlen_enc = vector_length_encoding(this); 4121 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4122 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4123 // Note: Since gather instruction partially updates the opmask register used 4124 // for predication hense moving mask operand to a temporary. 4125 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4126 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4127 __ lea($tmp$$Register, $mem$$Address); 4128 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4129 %} 4130 ins_pipe( pipe_slow ); 4131 %} 4132 4133 instruct vgather_subwordLE8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegI rtmp) %{ 4134 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4135 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4136 effect(TEMP tmp, TEMP rtmp); 4137 format %{ "vector_gatherLE8 $dst, $mem, $idx_base\t! using $tmp and $rtmp as TEMP" %} 4138 ins_encode %{ 4139 int vlen_enc = vector_length_encoding(this); 4140 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4141 __ lea($tmp$$Register, $mem$$Address); 4142 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp$$Register, vlen_enc); 4143 %} 4144 ins_pipe( pipe_slow ); 4145 %} 4146 4147 instruct vgather_subwordGT8B(vec dst, memory mem, rRegP idx_base, immI_0 offset, rRegP tmp, rRegP idx_base_temp, 4148 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4149 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4150 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4151 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4152 format %{ "vector_gatherGT8 $dst, $mem, $idx_base\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4153 ins_encode %{ 4154 int vlen_enc = vector_length_encoding(this); 4155 int vector_len = Matcher::vector_length(this); 4156 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4157 __ lea($tmp$$Register, $mem$$Address); 4158 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4159 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, noreg, $xtmp1$$XMMRegister, 4160 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4161 %} 4162 ins_pipe( pipe_slow ); 4163 %} 4164 4165 instruct vgather_subwordLE8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegI rtmp, rFlagsReg cr) %{ 4166 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4167 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4168 effect(TEMP tmp, TEMP rtmp, KILL cr); 4169 format %{ "vector_gatherLE8_off $dst, $mem, $idx_base, $offset\t! using $tmp and $rtmp as TEMP" %} 4170 ins_encode %{ 4171 int vlen_enc = vector_length_encoding(this); 4172 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4173 __ lea($tmp$$Register, $mem$$Address); 4174 __ vgather8b_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, $rtmp$$Register, vlen_enc); 4175 %} 4176 ins_pipe( pipe_slow ); 4177 %} 4178 4179 4180 instruct vgather_subwordGT8B_off(vec dst, memory mem, rRegP idx_base, rRegI offset, rRegP tmp, rRegP idx_base_temp, 4181 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI length, rFlagsReg cr) %{ 4182 predicate(is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4183 match(Set dst (LoadVectorGather mem (Binary idx_base offset))); 4184 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP length, KILL cr); 4185 format %{ "vector_gatherGT8_off $dst, $mem, $idx_base, $offset\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp and $length as TEMP" %} 4186 ins_encode %{ 4187 int vlen_enc = vector_length_encoding(this); 4188 int vector_len = Matcher::vector_length(this); 4189 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4190 __ lea($tmp$$Register, $mem$$Address); 4191 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4192 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, noreg, $xtmp1$$XMMRegister, 4193 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, noreg, $length$$Register, vector_len, vlen_enc); 4194 %} 4195 ins_pipe( pipe_slow ); 4196 %} 4197 4198 4199 #ifdef _LP64 4200 instruct vgather_masked_subwordLE8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4201 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4202 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4203 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4204 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4205 ins_encode %{ 4206 int vlen_enc = vector_length_encoding(this); 4207 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4208 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4209 __ lea($tmp$$Register, $mem$$Address); 4210 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4211 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4212 %} 4213 ins_pipe( pipe_slow ); 4214 %} 4215 4216 instruct vgather_masked_subwordGT8B_avx3(vec dst, memory mem, rRegP idx_base, immI_0 offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4217 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4218 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4219 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4220 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4221 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4222 ins_encode %{ 4223 int vlen_enc = vector_length_encoding(this); 4224 int vector_len = Matcher::vector_length(this); 4225 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4226 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4227 __ lea($tmp$$Register, $mem$$Address); 4228 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4229 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4230 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4231 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4232 %} 4233 ins_pipe( pipe_slow ); 4234 %} 4235 4236 instruct vgather_masked_subwordLE8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegL mask_idx, rRegP tmp, rRegI rtmp, rRegL rtmp2, rFlagsReg cr) %{ 4237 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4238 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4239 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4240 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4241 ins_encode %{ 4242 int vlen_enc = vector_length_encoding(this); 4243 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4244 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4245 __ lea($tmp$$Register, $mem$$Address); 4246 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4247 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4248 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4249 %} 4250 ins_pipe( pipe_slow ); 4251 %} 4252 4253 instruct vgather_masked_subwordGT8B_off_avx3(vec dst, memory mem, rRegP idx_base, rRegI offset, kReg mask, rRegP tmp, rRegP idx_base_temp, 4254 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegL rtmp2, rRegL mask_idx, rRegI length, rFlagsReg cr) %{ 4255 predicate(VM_Version::supports_avx512bw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4256 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4257 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4258 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4259 ins_encode %{ 4260 int vlen_enc = vector_length_encoding(this); 4261 int vector_len = Matcher::vector_length(this); 4262 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4263 __ xorq($mask_idx$$Register, $mask_idx$$Register); 4264 __ lea($tmp$$Register, $mem$$Address); 4265 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4266 __ kmovql($rtmp2$$Register, $mask$$KRegister); 4267 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4268 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4269 %} 4270 ins_pipe( pipe_slow ); 4271 %} 4272 4273 instruct vgather_masked_subwordLE8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4274 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4275 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4276 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4277 format %{ "vector_masked_gatherLE8 $dst, $mem, $idx_base, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4278 ins_encode %{ 4279 int vlen_enc = vector_length_encoding(this); 4280 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4281 __ lea($tmp$$Register, $mem$$Address); 4282 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4283 if (elem_bt == T_SHORT) { 4284 __ movl($mask_idx$$Register, 0x55555555); 4285 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4286 } 4287 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4288 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, noreg, $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4289 %} 4290 ins_pipe( pipe_slow ); 4291 %} 4292 4293 instruct vgather_masked_subwordGT8B_avx2(vec dst, memory mem, rRegP idx_base, immI_0 offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4294 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4295 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4296 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4297 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4298 format %{ "vector_gatherGT8_masked $dst, $mem, $idx_base, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4299 ins_encode %{ 4300 int vlen_enc = vector_length_encoding(this); 4301 int vector_len = Matcher::vector_length(this); 4302 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4303 __ lea($tmp$$Register, $mem$$Address); 4304 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4305 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4306 if (elem_bt == T_SHORT) { 4307 __ movl($mask_idx$$Register, 0x55555555); 4308 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4309 } 4310 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4311 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, noreg, $rtmp2$$Register, $xtmp1$$XMMRegister, 4312 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4313 %} 4314 ins_pipe( pipe_slow ); 4315 %} 4316 4317 instruct vgather_masked_subwordLE8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegI mask_idx, rRegP tmp, rRegI rtmp, rRegI rtmp2, rFlagsReg cr) %{ 4318 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) <= 8); 4319 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4320 effect(TEMP mask_idx, TEMP tmp, TEMP rtmp, TEMP rtmp2, KILL cr); 4321 format %{ "vector_masked_gatherLE8_off $dst, $mem, $idx_base, $offset, $mask\t! using $mask_idx, $tmp, $rtmp and $rtmp2 as TEMP" %} 4322 ins_encode %{ 4323 int vlen_enc = vector_length_encoding(this); 4324 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4325 __ lea($tmp$$Register, $mem$$Address); 4326 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4327 if (elem_bt == T_SHORT) { 4328 __ movl($mask_idx$$Register, 0x55555555); 4329 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4330 } 4331 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4332 __ vgather8b_masked_offset(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base$$Register, $offset$$Register, 4333 $rtmp2$$Register, $mask_idx$$Register, $rtmp$$Register, vlen_enc); 4334 %} 4335 ins_pipe( pipe_slow ); 4336 %} 4337 4338 instruct vgather_masked_subwordGT8B_off_avx2(vec dst, memory mem, rRegP idx_base, rRegI offset, vec mask, rRegP tmp, rRegP idx_base_temp, 4339 vec xtmp1, vec xtmp2, vec xtmp3, rRegI rtmp, rRegI rtmp2, rRegI mask_idx, rRegI length, rFlagsReg cr) %{ 4340 predicate(!VM_Version::supports_avx512vlbw() && is_subword_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_length_in_bytes(n) > 8); 4341 match(Set dst (LoadVectorGatherMasked mem (Binary idx_base (Binary mask offset)))); 4342 effect(TEMP_DEF dst, TEMP tmp, TEMP idx_base_temp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, TEMP rtmp2, TEMP mask_idx, TEMP length, KILL cr); 4343 format %{ "vector_gatherGT8_masked_off $dst, $mem, $idx_base, $offset, $mask\t! using $tmp, $idx_base_temp, $xtmp1, $xtmp2, $xtmp3, $rtmp, $rtmp2, $mask_idx and $length as TEMP" %} 4344 ins_encode %{ 4345 int vlen_enc = vector_length_encoding(this); 4346 int vector_len = Matcher::vector_length(this); 4347 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4348 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4349 __ lea($tmp$$Register, $mem$$Address); 4350 __ movptr($idx_base_temp$$Register, $idx_base$$Register); 4351 __ vpmovmskb($rtmp2$$Register, $mask$$XMMRegister, vlen_enc); 4352 if (elem_bt == T_SHORT) { 4353 __ movl($mask_idx$$Register, 0x55555555); 4354 __ pextl($rtmp2$$Register, $rtmp2$$Register, $mask_idx$$Register); 4355 } 4356 __ xorl($mask_idx$$Register, $mask_idx$$Register); 4357 __ vgather_subword(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx_base_temp$$Register, $offset$$Register, $rtmp2$$Register, $xtmp1$$XMMRegister, 4358 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, $mask_idx$$Register, $length$$Register, vector_len, vlen_enc); 4359 %} 4360 ins_pipe( pipe_slow ); 4361 %} 4362 #endif 4363 4364 // ====================Scatter======================================= 4365 4366 // Scatter INT, LONG, FLOAT, DOUBLE 4367 4368 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4369 predicate(UseAVX > 2); 4370 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4371 effect(TEMP tmp, TEMP ktmp); 4372 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4373 ins_encode %{ 4374 int vlen_enc = vector_length_encoding(this, $src); 4375 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4376 4377 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4378 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4379 4380 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4381 __ lea($tmp$$Register, $mem$$Address); 4382 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4383 %} 4384 ins_pipe( pipe_slow ); 4385 %} 4386 4387 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4388 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4389 effect(TEMP tmp, TEMP ktmp); 4390 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4391 ins_encode %{ 4392 int vlen_enc = vector_length_encoding(this, $src); 4393 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4394 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4395 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4396 // Note: Since scatter instruction partially updates the opmask register used 4397 // for predication hense moving mask operand to a temporary. 4398 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4399 __ lea($tmp$$Register, $mem$$Address); 4400 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4401 %} 4402 ins_pipe( pipe_slow ); 4403 %} 4404 4405 // ====================REPLICATE======================================= 4406 4407 // Replicate byte scalar to be vector 4408 instruct vReplB_reg(vec dst, rRegI src) %{ 4409 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 4410 match(Set dst (Replicate src)); 4411 format %{ "replicateB $dst,$src" %} 4412 ins_encode %{ 4413 uint vlen = Matcher::vector_length(this); 4414 if (UseAVX >= 2) { 4415 int vlen_enc = vector_length_encoding(this); 4416 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4417 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4418 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4419 } else { 4420 __ movdl($dst$$XMMRegister, $src$$Register); 4421 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4422 } 4423 } else { 4424 assert(UseAVX < 2, ""); 4425 __ movdl($dst$$XMMRegister, $src$$Register); 4426 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4427 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4428 if (vlen >= 16) { 4429 assert(vlen == 16, ""); 4430 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4431 } 4432 } 4433 %} 4434 ins_pipe( pipe_slow ); 4435 %} 4436 4437 instruct ReplB_mem(vec dst, memory mem) %{ 4438 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_BYTE); 4439 match(Set dst (Replicate (LoadB mem))); 4440 format %{ "replicateB $dst,$mem" %} 4441 ins_encode %{ 4442 int vlen_enc = vector_length_encoding(this); 4443 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4444 %} 4445 ins_pipe( pipe_slow ); 4446 %} 4447 4448 // ====================ReplicateS======================================= 4449 4450 instruct vReplS_reg(vec dst, rRegI src) %{ 4451 predicate(Matcher::vector_element_basic_type(n) == T_SHORT); 4452 match(Set dst (Replicate src)); 4453 format %{ "replicateS $dst,$src" %} 4454 ins_encode %{ 4455 uint vlen = Matcher::vector_length(this); 4456 int vlen_enc = vector_length_encoding(this); 4457 if (UseAVX >= 2) { 4458 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4459 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4460 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4461 } else { 4462 __ movdl($dst$$XMMRegister, $src$$Register); 4463 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4464 } 4465 } else { 4466 assert(UseAVX < 2, ""); 4467 __ movdl($dst$$XMMRegister, $src$$Register); 4468 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4469 if (vlen >= 8) { 4470 assert(vlen == 8, ""); 4471 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4472 } 4473 } 4474 %} 4475 ins_pipe( pipe_slow ); 4476 %} 4477 4478 instruct ReplS_mem(vec dst, memory mem) %{ 4479 predicate(UseAVX >= 2 && Matcher::vector_element_basic_type(n) == T_SHORT); 4480 match(Set dst (Replicate (LoadS mem))); 4481 format %{ "replicateS $dst,$mem" %} 4482 ins_encode %{ 4483 int vlen_enc = vector_length_encoding(this); 4484 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4485 %} 4486 ins_pipe( pipe_slow ); 4487 %} 4488 4489 // ====================ReplicateI======================================= 4490 4491 instruct ReplI_reg(vec dst, rRegI src) %{ 4492 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4493 match(Set dst (Replicate src)); 4494 format %{ "replicateI $dst,$src" %} 4495 ins_encode %{ 4496 uint vlen = Matcher::vector_length(this); 4497 int vlen_enc = vector_length_encoding(this); 4498 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4499 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4500 } else if (VM_Version::supports_avx2()) { 4501 __ movdl($dst$$XMMRegister, $src$$Register); 4502 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4503 } else { 4504 __ movdl($dst$$XMMRegister, $src$$Register); 4505 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4506 } 4507 %} 4508 ins_pipe( pipe_slow ); 4509 %} 4510 4511 instruct ReplI_mem(vec dst, memory mem) %{ 4512 predicate(Matcher::vector_element_basic_type(n) == T_INT); 4513 match(Set dst (Replicate (LoadI mem))); 4514 format %{ "replicateI $dst,$mem" %} 4515 ins_encode %{ 4516 int vlen_enc = vector_length_encoding(this); 4517 if (VM_Version::supports_avx2()) { 4518 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4519 } else if (VM_Version::supports_avx()) { 4520 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4521 } else { 4522 __ movdl($dst$$XMMRegister, $mem$$Address); 4523 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4524 } 4525 %} 4526 ins_pipe( pipe_slow ); 4527 %} 4528 4529 instruct ReplI_imm(vec dst, immI con) %{ 4530 predicate(Matcher::is_non_long_integral_vector(n)); 4531 match(Set dst (Replicate con)); 4532 format %{ "replicateI $dst,$con" %} 4533 ins_encode %{ 4534 InternalAddress addr = $constantaddress(Matcher::vector_element_basic_type(this), 4535 vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4536 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 8) / 4537 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4538 BasicType bt = Matcher::vector_element_basic_type(this); 4539 int vlen = Matcher::vector_length_in_bytes(this); 4540 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4541 %} 4542 ins_pipe( pipe_slow ); 4543 %} 4544 4545 // Replicate scalar zero to be vector 4546 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4547 predicate(Matcher::is_non_long_integral_vector(n)); 4548 match(Set dst (Replicate zero)); 4549 format %{ "replicateI $dst,$zero" %} 4550 ins_encode %{ 4551 int vlen_enc = vector_length_encoding(this); 4552 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4553 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4554 } else { 4555 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4556 } 4557 %} 4558 ins_pipe( fpu_reg_reg ); 4559 %} 4560 4561 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4562 predicate(UseSSE >= 2 && Matcher::is_non_long_integral_vector(n)); 4563 match(Set dst (Replicate con)); 4564 format %{ "vallones $dst" %} 4565 ins_encode %{ 4566 int vector_len = vector_length_encoding(this); 4567 __ vallones($dst$$XMMRegister, vector_len); 4568 %} 4569 ins_pipe( pipe_slow ); 4570 %} 4571 4572 // ====================ReplicateL======================================= 4573 4574 #ifdef _LP64 4575 // Replicate long (8 byte) scalar to be vector 4576 instruct ReplL_reg(vec dst, rRegL src) %{ 4577 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4578 match(Set dst (Replicate src)); 4579 format %{ "replicateL $dst,$src" %} 4580 ins_encode %{ 4581 int vlen = Matcher::vector_length(this); 4582 int vlen_enc = vector_length_encoding(this); 4583 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4584 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4585 } else if (VM_Version::supports_avx2()) { 4586 __ movdq($dst$$XMMRegister, $src$$Register); 4587 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4588 } else { 4589 __ movdq($dst$$XMMRegister, $src$$Register); 4590 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4591 } 4592 %} 4593 ins_pipe( pipe_slow ); 4594 %} 4595 #else // _LP64 4596 // Replicate long (8 byte) scalar to be vector 4597 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 4598 predicate(Matcher::vector_length(n) <= 4 && Matcher::vector_element_basic_type(n) == T_LONG); 4599 match(Set dst (Replicate src)); 4600 effect(TEMP dst, USE src, TEMP tmp); 4601 format %{ "replicateL $dst,$src" %} 4602 ins_encode %{ 4603 uint vlen = Matcher::vector_length(this); 4604 if (vlen == 2) { 4605 __ movdl($dst$$XMMRegister, $src$$Register); 4606 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4607 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4608 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4609 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4610 int vlen_enc = Assembler::AVX_256bit; 4611 __ movdl($dst$$XMMRegister, $src$$Register); 4612 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4613 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4614 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4615 } else { 4616 __ movdl($dst$$XMMRegister, $src$$Register); 4617 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4618 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4619 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4620 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4621 } 4622 %} 4623 ins_pipe( pipe_slow ); 4624 %} 4625 4626 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 4627 predicate(Matcher::vector_length(n) == 8 && Matcher::vector_element_basic_type(n) == T_LONG); 4628 match(Set dst (Replicate src)); 4629 effect(TEMP dst, USE src, TEMP tmp); 4630 format %{ "replicateL $dst,$src" %} 4631 ins_encode %{ 4632 if (VM_Version::supports_avx512vl()) { 4633 __ movdl($dst$$XMMRegister, $src$$Register); 4634 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4635 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4636 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4637 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4638 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4639 } else { 4640 int vlen_enc = Assembler::AVX_512bit; 4641 __ movdl($dst$$XMMRegister, $src$$Register); 4642 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4643 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4644 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4645 } 4646 %} 4647 ins_pipe( pipe_slow ); 4648 %} 4649 #endif // _LP64 4650 4651 instruct ReplL_mem(vec dst, memory mem) %{ 4652 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4653 match(Set dst (Replicate (LoadL mem))); 4654 format %{ "replicateL $dst,$mem" %} 4655 ins_encode %{ 4656 int vlen_enc = vector_length_encoding(this); 4657 if (VM_Version::supports_avx2()) { 4658 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4659 } else if (VM_Version::supports_sse3()) { 4660 __ movddup($dst$$XMMRegister, $mem$$Address); 4661 } else { 4662 __ movq($dst$$XMMRegister, $mem$$Address); 4663 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4664 } 4665 %} 4666 ins_pipe( pipe_slow ); 4667 %} 4668 4669 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4670 instruct ReplL_imm(vec dst, immL con) %{ 4671 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4672 match(Set dst (Replicate con)); 4673 format %{ "replicateL $dst,$con" %} 4674 ins_encode %{ 4675 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, 1)); 4676 int vlen = Matcher::vector_length_in_bytes(this); 4677 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4678 %} 4679 ins_pipe( pipe_slow ); 4680 %} 4681 4682 instruct ReplL_zero(vec dst, immL0 zero) %{ 4683 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 4684 match(Set dst (Replicate zero)); 4685 format %{ "replicateL $dst,$zero" %} 4686 ins_encode %{ 4687 int vlen_enc = vector_length_encoding(this); 4688 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4689 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4690 } else { 4691 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4692 } 4693 %} 4694 ins_pipe( fpu_reg_reg ); 4695 %} 4696 4697 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4698 predicate(UseSSE >= 2 && Matcher::vector_element_basic_type(n) == T_LONG); 4699 match(Set dst (Replicate con)); 4700 format %{ "vallones $dst" %} 4701 ins_encode %{ 4702 int vector_len = vector_length_encoding(this); 4703 __ vallones($dst$$XMMRegister, vector_len); 4704 %} 4705 ins_pipe( pipe_slow ); 4706 %} 4707 4708 // ====================ReplicateF======================================= 4709 4710 instruct vReplF_reg(vec dst, vlRegF src) %{ 4711 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4712 match(Set dst (Replicate src)); 4713 format %{ "replicateF $dst,$src" %} 4714 ins_encode %{ 4715 uint vlen = Matcher::vector_length(this); 4716 int vlen_enc = vector_length_encoding(this); 4717 if (vlen <= 4) { 4718 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4719 } else if (VM_Version::supports_avx2()) { 4720 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4721 } else { 4722 assert(vlen == 8, "sanity"); 4723 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4724 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4725 } 4726 %} 4727 ins_pipe( pipe_slow ); 4728 %} 4729 4730 instruct ReplF_reg(vec dst, vlRegF src) %{ 4731 predicate(UseAVX == 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4732 match(Set dst (Replicate src)); 4733 format %{ "replicateF $dst,$src" %} 4734 ins_encode %{ 4735 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4736 %} 4737 ins_pipe( pipe_slow ); 4738 %} 4739 4740 instruct ReplF_mem(vec dst, memory mem) %{ 4741 predicate(UseAVX > 0 && Matcher::vector_element_basic_type(n) == T_FLOAT); 4742 match(Set dst (Replicate (LoadF mem))); 4743 format %{ "replicateF $dst,$mem" %} 4744 ins_encode %{ 4745 int vlen_enc = vector_length_encoding(this); 4746 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4747 %} 4748 ins_pipe( pipe_slow ); 4749 %} 4750 4751 // Replicate float scalar immediate to be vector by loading from const table. 4752 instruct ReplF_imm(vec dst, immF con) %{ 4753 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4754 match(Set dst (Replicate con)); 4755 format %{ "replicateF $dst,$con" %} 4756 ins_encode %{ 4757 InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant, 4758 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 2)); 4759 int vlen = Matcher::vector_length_in_bytes(this); 4760 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4761 %} 4762 ins_pipe( pipe_slow ); 4763 %} 4764 4765 instruct ReplF_zero(vec dst, immF0 zero) %{ 4766 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 4767 match(Set dst (Replicate zero)); 4768 format %{ "replicateF $dst,$zero" %} 4769 ins_encode %{ 4770 int vlen_enc = vector_length_encoding(this); 4771 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4772 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4773 } else { 4774 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4775 } 4776 %} 4777 ins_pipe( fpu_reg_reg ); 4778 %} 4779 4780 // ====================ReplicateD======================================= 4781 4782 // Replicate double (8 bytes) scalar to be vector 4783 instruct vReplD_reg(vec dst, vlRegD src) %{ 4784 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4785 match(Set dst (Replicate src)); 4786 format %{ "replicateD $dst,$src" %} 4787 ins_encode %{ 4788 uint vlen = Matcher::vector_length(this); 4789 int vlen_enc = vector_length_encoding(this); 4790 if (vlen <= 2) { 4791 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4792 } else if (VM_Version::supports_avx2()) { 4793 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4794 } else { 4795 assert(vlen == 4, "sanity"); 4796 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4797 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4798 } 4799 %} 4800 ins_pipe( pipe_slow ); 4801 %} 4802 4803 instruct ReplD_reg(vec dst, vlRegD src) %{ 4804 predicate(UseSSE < 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4805 match(Set dst (Replicate src)); 4806 format %{ "replicateD $dst,$src" %} 4807 ins_encode %{ 4808 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4809 %} 4810 ins_pipe( pipe_slow ); 4811 %} 4812 4813 instruct ReplD_mem(vec dst, memory mem) %{ 4814 predicate(UseSSE >= 3 && Matcher::vector_element_basic_type(n) == T_DOUBLE); 4815 match(Set dst (Replicate (LoadD mem))); 4816 format %{ "replicateD $dst,$mem" %} 4817 ins_encode %{ 4818 if (Matcher::vector_length(this) >= 4) { 4819 int vlen_enc = vector_length_encoding(this); 4820 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4821 } else { 4822 __ movddup($dst$$XMMRegister, $mem$$Address); 4823 } 4824 %} 4825 ins_pipe( pipe_slow ); 4826 %} 4827 4828 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4829 instruct ReplD_imm(vec dst, immD con) %{ 4830 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4831 match(Set dst (Replicate con)); 4832 format %{ "replicateD $dst,$con" %} 4833 ins_encode %{ 4834 InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, 1)); 4835 int vlen = Matcher::vector_length_in_bytes(this); 4836 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4837 %} 4838 ins_pipe( pipe_slow ); 4839 %} 4840 4841 instruct ReplD_zero(vec dst, immD0 zero) %{ 4842 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 4843 match(Set dst (Replicate zero)); 4844 format %{ "replicateD $dst,$zero" %} 4845 ins_encode %{ 4846 int vlen_enc = vector_length_encoding(this); 4847 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4848 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4849 } else { 4850 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4851 } 4852 %} 4853 ins_pipe( fpu_reg_reg ); 4854 %} 4855 4856 // ====================VECTOR INSERT======================================= 4857 4858 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4859 predicate(Matcher::vector_length_in_bytes(n) < 32); 4860 match(Set dst (VectorInsert (Binary dst val) idx)); 4861 format %{ "vector_insert $dst,$val,$idx" %} 4862 ins_encode %{ 4863 assert(UseSSE >= 4, "required"); 4864 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4865 4866 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4867 4868 assert(is_integral_type(elem_bt), ""); 4869 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4870 4871 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4872 %} 4873 ins_pipe( pipe_slow ); 4874 %} 4875 4876 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4877 predicate(Matcher::vector_length_in_bytes(n) == 32); 4878 match(Set dst (VectorInsert (Binary src val) idx)); 4879 effect(TEMP vtmp); 4880 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4881 ins_encode %{ 4882 int vlen_enc = Assembler::AVX_256bit; 4883 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4884 int elem_per_lane = 16/type2aelembytes(elem_bt); 4885 int log2epr = log2(elem_per_lane); 4886 4887 assert(is_integral_type(elem_bt), "sanity"); 4888 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4889 4890 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4891 uint y_idx = ($idx$$constant >> log2epr) & 1; 4892 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4893 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4894 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4895 %} 4896 ins_pipe( pipe_slow ); 4897 %} 4898 4899 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4900 predicate(Matcher::vector_length_in_bytes(n) == 64); 4901 match(Set dst (VectorInsert (Binary src val) idx)); 4902 effect(TEMP vtmp); 4903 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4904 ins_encode %{ 4905 assert(UseAVX > 2, "sanity"); 4906 4907 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4908 int elem_per_lane = 16/type2aelembytes(elem_bt); 4909 int log2epr = log2(elem_per_lane); 4910 4911 assert(is_integral_type(elem_bt), ""); 4912 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4913 4914 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4915 uint y_idx = ($idx$$constant >> log2epr) & 3; 4916 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4917 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4918 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4919 %} 4920 ins_pipe( pipe_slow ); 4921 %} 4922 4923 #ifdef _LP64 4924 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4925 predicate(Matcher::vector_length(n) == 2); 4926 match(Set dst (VectorInsert (Binary dst val) idx)); 4927 format %{ "vector_insert $dst,$val,$idx" %} 4928 ins_encode %{ 4929 assert(UseSSE >= 4, "required"); 4930 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4931 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4932 4933 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4934 %} 4935 ins_pipe( pipe_slow ); 4936 %} 4937 4938 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4939 predicate(Matcher::vector_length(n) == 4); 4940 match(Set dst (VectorInsert (Binary src val) idx)); 4941 effect(TEMP vtmp); 4942 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4943 ins_encode %{ 4944 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4945 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4946 4947 uint x_idx = $idx$$constant & right_n_bits(1); 4948 uint y_idx = ($idx$$constant >> 1) & 1; 4949 int vlen_enc = Assembler::AVX_256bit; 4950 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4951 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4952 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4953 %} 4954 ins_pipe( pipe_slow ); 4955 %} 4956 4957 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4958 predicate(Matcher::vector_length(n) == 8); 4959 match(Set dst (VectorInsert (Binary src val) idx)); 4960 effect(TEMP vtmp); 4961 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4962 ins_encode %{ 4963 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4964 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4965 4966 uint x_idx = $idx$$constant & right_n_bits(1); 4967 uint y_idx = ($idx$$constant >> 1) & 3; 4968 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4969 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4970 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4971 %} 4972 ins_pipe( pipe_slow ); 4973 %} 4974 #endif 4975 4976 instruct insertF(vec dst, regF val, immU8 idx) %{ 4977 predicate(Matcher::vector_length(n) < 8); 4978 match(Set dst (VectorInsert (Binary dst val) idx)); 4979 format %{ "vector_insert $dst,$val,$idx" %} 4980 ins_encode %{ 4981 assert(UseSSE >= 4, "sanity"); 4982 4983 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4984 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4985 4986 uint x_idx = $idx$$constant & right_n_bits(2); 4987 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4988 %} 4989 ins_pipe( pipe_slow ); 4990 %} 4991 4992 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4993 predicate(Matcher::vector_length(n) >= 8); 4994 match(Set dst (VectorInsert (Binary src val) idx)); 4995 effect(TEMP vtmp); 4996 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4997 ins_encode %{ 4998 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4999 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5000 5001 int vlen = Matcher::vector_length(this); 5002 uint x_idx = $idx$$constant & right_n_bits(2); 5003 if (vlen == 8) { 5004 uint y_idx = ($idx$$constant >> 2) & 1; 5005 int vlen_enc = Assembler::AVX_256bit; 5006 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5007 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5008 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5009 } else { 5010 assert(vlen == 16, "sanity"); 5011 uint y_idx = ($idx$$constant >> 2) & 3; 5012 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5013 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 5014 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5015 } 5016 %} 5017 ins_pipe( pipe_slow ); 5018 %} 5019 5020 #ifdef _LP64 5021 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 5022 predicate(Matcher::vector_length(n) == 2); 5023 match(Set dst (VectorInsert (Binary dst val) idx)); 5024 effect(TEMP tmp); 5025 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 5026 ins_encode %{ 5027 assert(UseSSE >= 4, "sanity"); 5028 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5029 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5030 5031 __ movq($tmp$$Register, $val$$XMMRegister); 5032 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 5033 %} 5034 ins_pipe( pipe_slow ); 5035 %} 5036 5037 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 5038 predicate(Matcher::vector_length(n) == 4); 5039 match(Set dst (VectorInsert (Binary src val) idx)); 5040 effect(TEMP vtmp, TEMP tmp); 5041 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 5042 ins_encode %{ 5043 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5044 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5045 5046 uint x_idx = $idx$$constant & right_n_bits(1); 5047 uint y_idx = ($idx$$constant >> 1) & 1; 5048 int vlen_enc = Assembler::AVX_256bit; 5049 __ movq($tmp$$Register, $val$$XMMRegister); 5050 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5051 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 5052 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5053 %} 5054 ins_pipe( pipe_slow ); 5055 %} 5056 5057 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 5058 predicate(Matcher::vector_length(n) == 8); 5059 match(Set dst (VectorInsert (Binary src val) idx)); 5060 effect(TEMP tmp, TEMP vtmp); 5061 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 5062 ins_encode %{ 5063 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 5064 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 5065 5066 uint x_idx = $idx$$constant & right_n_bits(1); 5067 uint y_idx = ($idx$$constant >> 1) & 3; 5068 __ movq($tmp$$Register, $val$$XMMRegister); 5069 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 5070 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 5071 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 5072 %} 5073 ins_pipe( pipe_slow ); 5074 %} 5075 #endif 5076 5077 // ====================REDUCTION ARITHMETIC======================================= 5078 5079 // =======================Int Reduction========================================== 5080 5081 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5082 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 5083 match(Set dst (AddReductionVI src1 src2)); 5084 match(Set dst (MulReductionVI src1 src2)); 5085 match(Set dst (AndReductionV src1 src2)); 5086 match(Set dst ( OrReductionV src1 src2)); 5087 match(Set dst (XorReductionV src1 src2)); 5088 match(Set dst (MinReductionV src1 src2)); 5089 match(Set dst (MaxReductionV src1 src2)); 5090 effect(TEMP vtmp1, TEMP vtmp2); 5091 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5092 ins_encode %{ 5093 int opcode = this->ideal_Opcode(); 5094 int vlen = Matcher::vector_length(this, $src2); 5095 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5096 %} 5097 ins_pipe( pipe_slow ); 5098 %} 5099 5100 // =======================Long Reduction========================================== 5101 5102 #ifdef _LP64 5103 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5104 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 5105 match(Set dst (AddReductionVL src1 src2)); 5106 match(Set dst (MulReductionVL src1 src2)); 5107 match(Set dst (AndReductionV src1 src2)); 5108 match(Set dst ( OrReductionV src1 src2)); 5109 match(Set dst (XorReductionV src1 src2)); 5110 match(Set dst (MinReductionV src1 src2)); 5111 match(Set dst (MaxReductionV src1 src2)); 5112 effect(TEMP vtmp1, TEMP vtmp2); 5113 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5114 ins_encode %{ 5115 int opcode = this->ideal_Opcode(); 5116 int vlen = Matcher::vector_length(this, $src2); 5117 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5118 %} 5119 ins_pipe( pipe_slow ); 5120 %} 5121 5122 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 5123 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 5124 match(Set dst (AddReductionVL src1 src2)); 5125 match(Set dst (MulReductionVL src1 src2)); 5126 match(Set dst (AndReductionV src1 src2)); 5127 match(Set dst ( OrReductionV src1 src2)); 5128 match(Set dst (XorReductionV src1 src2)); 5129 match(Set dst (MinReductionV src1 src2)); 5130 match(Set dst (MaxReductionV src1 src2)); 5131 effect(TEMP vtmp1, TEMP vtmp2); 5132 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5133 ins_encode %{ 5134 int opcode = this->ideal_Opcode(); 5135 int vlen = Matcher::vector_length(this, $src2); 5136 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5137 %} 5138 ins_pipe( pipe_slow ); 5139 %} 5140 #endif // _LP64 5141 5142 // =======================Float Reduction========================================== 5143 5144 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 5145 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) <= 4); // src 5146 match(Set dst (AddReductionVF dst src)); 5147 match(Set dst (MulReductionVF dst src)); 5148 effect(TEMP dst, TEMP vtmp); 5149 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 5150 ins_encode %{ 5151 int opcode = this->ideal_Opcode(); 5152 int vlen = Matcher::vector_length(this, $src); 5153 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5154 %} 5155 ins_pipe( pipe_slow ); 5156 %} 5157 5158 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 5159 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5160 match(Set dst (AddReductionVF dst src)); 5161 match(Set dst (MulReductionVF dst src)); 5162 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5163 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5164 ins_encode %{ 5165 int opcode = this->ideal_Opcode(); 5166 int vlen = Matcher::vector_length(this, $src); 5167 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5168 %} 5169 ins_pipe( pipe_slow ); 5170 %} 5171 5172 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5173 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src 5174 match(Set dst (AddReductionVF dst src)); 5175 match(Set dst (MulReductionVF dst src)); 5176 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5177 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5178 ins_encode %{ 5179 int opcode = this->ideal_Opcode(); 5180 int vlen = Matcher::vector_length(this, $src); 5181 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5182 %} 5183 ins_pipe( pipe_slow ); 5184 %} 5185 5186 5187 instruct unordered_reduction2F(regF dst, regF src1, vec src2) %{ 5188 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5189 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5190 // src1 contains reduction identity 5191 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5192 match(Set dst (AddReductionVF src1 src2)); 5193 match(Set dst (MulReductionVF src1 src2)); 5194 effect(TEMP dst); 5195 format %{ "vector_reduction_float $dst,$src1,$src2 ;" %} 5196 ins_encode %{ 5197 int opcode = this->ideal_Opcode(); 5198 int vlen = Matcher::vector_length(this, $src2); 5199 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5200 %} 5201 ins_pipe( pipe_slow ); 5202 %} 5203 5204 instruct unordered_reduction4F(regF dst, regF src1, vec src2, vec vtmp) %{ 5205 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5206 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5207 // src1 contains reduction identity 5208 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5209 match(Set dst (AddReductionVF src1 src2)); 5210 match(Set dst (MulReductionVF src1 src2)); 5211 effect(TEMP dst, TEMP vtmp); 5212 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5213 ins_encode %{ 5214 int opcode = this->ideal_Opcode(); 5215 int vlen = Matcher::vector_length(this, $src2); 5216 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5217 %} 5218 ins_pipe( pipe_slow ); 5219 %} 5220 5221 instruct unordered_reduction8F(regF dst, regF src1, vec src2, vec vtmp1, vec vtmp2) %{ 5222 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5223 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5224 // src1 contains reduction identity 5225 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5226 match(Set dst (AddReductionVF src1 src2)); 5227 match(Set dst (MulReductionVF src1 src2)); 5228 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5229 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5230 ins_encode %{ 5231 int opcode = this->ideal_Opcode(); 5232 int vlen = Matcher::vector_length(this, $src2); 5233 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5234 %} 5235 ins_pipe( pipe_slow ); 5236 %} 5237 5238 instruct unordered_reduction16F(regF dst, regF src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5239 // Non-strictly ordered floating-point add/mul reduction for floats. This rule is 5240 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5241 // src1 contains reduction identity 5242 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 16); // src2 5243 match(Set dst (AddReductionVF src1 src2)); 5244 match(Set dst (MulReductionVF src1 src2)); 5245 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5246 format %{ "vector_reduction_float $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5247 ins_encode %{ 5248 int opcode = this->ideal_Opcode(); 5249 int vlen = Matcher::vector_length(this, $src2); 5250 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5251 %} 5252 ins_pipe( pipe_slow ); 5253 %} 5254 5255 // =======================Double Reduction========================================== 5256 5257 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 5258 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src 5259 match(Set dst (AddReductionVD dst src)); 5260 match(Set dst (MulReductionVD dst src)); 5261 effect(TEMP dst, TEMP vtmp); 5262 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 5263 ins_encode %{ 5264 int opcode = this->ideal_Opcode(); 5265 int vlen = Matcher::vector_length(this, $src); 5266 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5267 %} 5268 ins_pipe( pipe_slow ); 5269 %} 5270 5271 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 5272 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src 5273 match(Set dst (AddReductionVD dst src)); 5274 match(Set dst (MulReductionVD dst src)); 5275 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5276 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5277 ins_encode %{ 5278 int opcode = this->ideal_Opcode(); 5279 int vlen = Matcher::vector_length(this, $src); 5280 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5281 %} 5282 ins_pipe( pipe_slow ); 5283 %} 5284 5285 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5286 predicate(n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src 5287 match(Set dst (AddReductionVD dst src)); 5288 match(Set dst (MulReductionVD dst src)); 5289 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5290 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5291 ins_encode %{ 5292 int opcode = this->ideal_Opcode(); 5293 int vlen = Matcher::vector_length(this, $src); 5294 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5295 %} 5296 ins_pipe( pipe_slow ); 5297 %} 5298 5299 instruct unordered_reduction2D(regD dst, regD src1, vec src2) %{ 5300 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5301 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5302 // src1 contains reduction identity 5303 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 2); // src2 5304 match(Set dst (AddReductionVD src1 src2)); 5305 match(Set dst (MulReductionVD src1 src2)); 5306 effect(TEMP dst); 5307 format %{ "vector_reduction_double $dst,$src1,$src2 ;" %} 5308 ins_encode %{ 5309 int opcode = this->ideal_Opcode(); 5310 int vlen = Matcher::vector_length(this, $src2); 5311 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister); 5312 %} 5313 ins_pipe( pipe_slow ); 5314 %} 5315 5316 instruct unordered_reduction4D(regD dst, regD src1, vec src2, vec vtmp) %{ 5317 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5318 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5319 // src1 contains reduction identity 5320 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 4); // src2 5321 match(Set dst (AddReductionVD src1 src2)); 5322 match(Set dst (MulReductionVD src1 src2)); 5323 effect(TEMP dst, TEMP vtmp); 5324 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp as TEMP" %} 5325 ins_encode %{ 5326 int opcode = this->ideal_Opcode(); 5327 int vlen = Matcher::vector_length(this, $src2); 5328 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 5329 %} 5330 ins_pipe( pipe_slow ); 5331 %} 5332 5333 instruct unordered_reduction8D(regD dst, regD src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5334 // Non-strictly ordered floating-point add/mul reduction for doubles. This rule is 5335 // intended for the VectorAPI (which allows for non-strictly ordered add/mul reduction). 5336 // src1 contains reduction identity 5337 predicate(!n->as_Reduction()->requires_strict_order() && Matcher::vector_length(n->in(2)) == 8); // src2 5338 match(Set dst (AddReductionVD src1 src2)); 5339 match(Set dst (MulReductionVD src1 src2)); 5340 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5341 format %{ "vector_reduction_double $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5342 ins_encode %{ 5343 int opcode = this->ideal_Opcode(); 5344 int vlen = Matcher::vector_length(this, $src2); 5345 __ unordered_reduce_fp(opcode, vlen, $dst$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5346 %} 5347 ins_pipe( pipe_slow ); 5348 %} 5349 5350 // =======================Byte Reduction========================================== 5351 5352 #ifdef _LP64 5353 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5354 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5355 match(Set dst (AddReductionVI src1 src2)); 5356 match(Set dst (AndReductionV src1 src2)); 5357 match(Set dst ( OrReductionV src1 src2)); 5358 match(Set dst (XorReductionV src1 src2)); 5359 match(Set dst (MinReductionV src1 src2)); 5360 match(Set dst (MaxReductionV src1 src2)); 5361 effect(TEMP vtmp1, TEMP vtmp2); 5362 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5363 ins_encode %{ 5364 int opcode = this->ideal_Opcode(); 5365 int vlen = Matcher::vector_length(this, $src2); 5366 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5367 %} 5368 ins_pipe( pipe_slow ); 5369 %} 5370 5371 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5372 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5373 match(Set dst (AddReductionVI src1 src2)); 5374 match(Set dst (AndReductionV src1 src2)); 5375 match(Set dst ( OrReductionV src1 src2)); 5376 match(Set dst (XorReductionV src1 src2)); 5377 match(Set dst (MinReductionV src1 src2)); 5378 match(Set dst (MaxReductionV src1 src2)); 5379 effect(TEMP vtmp1, TEMP vtmp2); 5380 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5381 ins_encode %{ 5382 int opcode = this->ideal_Opcode(); 5383 int vlen = Matcher::vector_length(this, $src2); 5384 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5385 %} 5386 ins_pipe( pipe_slow ); 5387 %} 5388 #endif 5389 5390 // =======================Short Reduction========================================== 5391 5392 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5393 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5394 match(Set dst (AddReductionVI src1 src2)); 5395 match(Set dst (MulReductionVI src1 src2)); 5396 match(Set dst (AndReductionV src1 src2)); 5397 match(Set dst ( OrReductionV src1 src2)); 5398 match(Set dst (XorReductionV src1 src2)); 5399 match(Set dst (MinReductionV src1 src2)); 5400 match(Set dst (MaxReductionV src1 src2)); 5401 effect(TEMP vtmp1, TEMP vtmp2); 5402 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5403 ins_encode %{ 5404 int opcode = this->ideal_Opcode(); 5405 int vlen = Matcher::vector_length(this, $src2); 5406 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5407 %} 5408 ins_pipe( pipe_slow ); 5409 %} 5410 5411 // =======================Mul Reduction========================================== 5412 5413 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5414 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5415 Matcher::vector_length(n->in(2)) <= 32); // src2 5416 match(Set dst (MulReductionVI src1 src2)); 5417 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5418 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5419 ins_encode %{ 5420 int opcode = this->ideal_Opcode(); 5421 int vlen = Matcher::vector_length(this, $src2); 5422 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5423 %} 5424 ins_pipe( pipe_slow ); 5425 %} 5426 5427 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5428 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5429 Matcher::vector_length(n->in(2)) == 64); // src2 5430 match(Set dst (MulReductionVI src1 src2)); 5431 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5432 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5433 ins_encode %{ 5434 int opcode = this->ideal_Opcode(); 5435 int vlen = Matcher::vector_length(this, $src2); 5436 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5437 %} 5438 ins_pipe( pipe_slow ); 5439 %} 5440 5441 //--------------------Min/Max Float Reduction -------------------- 5442 // Float Min Reduction 5443 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 5444 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5445 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5446 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5447 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5448 Matcher::vector_length(n->in(2)) == 2); 5449 match(Set dst (MinReductionV src1 src2)); 5450 match(Set dst (MaxReductionV src1 src2)); 5451 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5452 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5453 ins_encode %{ 5454 assert(UseAVX > 0, "sanity"); 5455 5456 int opcode = this->ideal_Opcode(); 5457 int vlen = Matcher::vector_length(this, $src2); 5458 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5459 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5460 %} 5461 ins_pipe( pipe_slow ); 5462 %} 5463 5464 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5465 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5466 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5467 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5468 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5469 Matcher::vector_length(n->in(2)) >= 4); 5470 match(Set dst (MinReductionV src1 src2)); 5471 match(Set dst (MaxReductionV src1 src2)); 5472 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5473 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5474 ins_encode %{ 5475 assert(UseAVX > 0, "sanity"); 5476 5477 int opcode = this->ideal_Opcode(); 5478 int vlen = Matcher::vector_length(this, $src2); 5479 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5480 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5481 %} 5482 ins_pipe( pipe_slow ); 5483 %} 5484 5485 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 5486 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5487 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5488 Matcher::vector_length(n->in(2)) == 2); 5489 match(Set dst (MinReductionV dst src)); 5490 match(Set dst (MaxReductionV dst src)); 5491 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5492 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5493 ins_encode %{ 5494 assert(UseAVX > 0, "sanity"); 5495 5496 int opcode = this->ideal_Opcode(); 5497 int vlen = Matcher::vector_length(this, $src); 5498 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5499 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5500 %} 5501 ins_pipe( pipe_slow ); 5502 %} 5503 5504 5505 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5506 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5507 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5508 Matcher::vector_length(n->in(2)) >= 4); 5509 match(Set dst (MinReductionV dst src)); 5510 match(Set dst (MaxReductionV dst src)); 5511 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5512 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5513 ins_encode %{ 5514 assert(UseAVX > 0, "sanity"); 5515 5516 int opcode = this->ideal_Opcode(); 5517 int vlen = Matcher::vector_length(this, $src); 5518 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5519 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5520 %} 5521 ins_pipe( pipe_slow ); 5522 %} 5523 5524 5525 //--------------------Min Double Reduction -------------------- 5526 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5527 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5528 rFlagsReg cr) %{ 5529 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5530 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5531 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5532 Matcher::vector_length(n->in(2)) == 2); 5533 match(Set dst (MinReductionV src1 src2)); 5534 match(Set dst (MaxReductionV src1 src2)); 5535 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5536 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5537 ins_encode %{ 5538 assert(UseAVX > 0, "sanity"); 5539 5540 int opcode = this->ideal_Opcode(); 5541 int vlen = Matcher::vector_length(this, $src2); 5542 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5543 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5544 %} 5545 ins_pipe( pipe_slow ); 5546 %} 5547 5548 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5549 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5550 rFlagsReg cr) %{ 5551 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5552 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5553 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5554 Matcher::vector_length(n->in(2)) >= 4); 5555 match(Set dst (MinReductionV src1 src2)); 5556 match(Set dst (MaxReductionV src1 src2)); 5557 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5558 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5559 ins_encode %{ 5560 assert(UseAVX > 0, "sanity"); 5561 5562 int opcode = this->ideal_Opcode(); 5563 int vlen = Matcher::vector_length(this, $src2); 5564 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5565 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5566 %} 5567 ins_pipe( pipe_slow ); 5568 %} 5569 5570 5571 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5572 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5573 rFlagsReg cr) %{ 5574 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5575 Matcher::vector_length(n->in(2)) == 2); 5576 match(Set dst (MinReductionV dst src)); 5577 match(Set dst (MaxReductionV dst src)); 5578 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5579 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5580 ins_encode %{ 5581 assert(UseAVX > 0, "sanity"); 5582 5583 int opcode = this->ideal_Opcode(); 5584 int vlen = Matcher::vector_length(this, $src); 5585 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5586 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5587 %} 5588 ins_pipe( pipe_slow ); 5589 %} 5590 5591 instruct minmax_reductionD_av(legRegD dst, legVec src, 5592 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5593 rFlagsReg cr) %{ 5594 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5595 Matcher::vector_length(n->in(2)) >= 4); 5596 match(Set dst (MinReductionV dst src)); 5597 match(Set dst (MaxReductionV dst src)); 5598 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5599 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5600 ins_encode %{ 5601 assert(UseAVX > 0, "sanity"); 5602 5603 int opcode = this->ideal_Opcode(); 5604 int vlen = Matcher::vector_length(this, $src); 5605 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5606 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5607 %} 5608 ins_pipe( pipe_slow ); 5609 %} 5610 5611 // ====================VECTOR ARITHMETIC======================================= 5612 5613 // --------------------------------- ADD -------------------------------------- 5614 5615 // Bytes vector add 5616 instruct vaddB(vec dst, vec src) %{ 5617 predicate(UseAVX == 0); 5618 match(Set dst (AddVB dst src)); 5619 format %{ "paddb $dst,$src\t! add packedB" %} 5620 ins_encode %{ 5621 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5622 %} 5623 ins_pipe( pipe_slow ); 5624 %} 5625 5626 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5627 predicate(UseAVX > 0); 5628 match(Set dst (AddVB src1 src2)); 5629 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5630 ins_encode %{ 5631 int vlen_enc = vector_length_encoding(this); 5632 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5633 %} 5634 ins_pipe( pipe_slow ); 5635 %} 5636 5637 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5638 predicate((UseAVX > 0) && 5639 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5640 match(Set dst (AddVB src (LoadVector mem))); 5641 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5642 ins_encode %{ 5643 int vlen_enc = vector_length_encoding(this); 5644 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5645 %} 5646 ins_pipe( pipe_slow ); 5647 %} 5648 5649 // Shorts/Chars vector add 5650 instruct vaddS(vec dst, vec src) %{ 5651 predicate(UseAVX == 0); 5652 match(Set dst (AddVS dst src)); 5653 format %{ "paddw $dst,$src\t! add packedS" %} 5654 ins_encode %{ 5655 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5656 %} 5657 ins_pipe( pipe_slow ); 5658 %} 5659 5660 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5661 predicate(UseAVX > 0); 5662 match(Set dst (AddVS src1 src2)); 5663 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5664 ins_encode %{ 5665 int vlen_enc = vector_length_encoding(this); 5666 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5667 %} 5668 ins_pipe( pipe_slow ); 5669 %} 5670 5671 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5672 predicate((UseAVX > 0) && 5673 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5674 match(Set dst (AddVS src (LoadVector mem))); 5675 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5676 ins_encode %{ 5677 int vlen_enc = vector_length_encoding(this); 5678 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5679 %} 5680 ins_pipe( pipe_slow ); 5681 %} 5682 5683 // Integers vector add 5684 instruct vaddI(vec dst, vec src) %{ 5685 predicate(UseAVX == 0); 5686 match(Set dst (AddVI dst src)); 5687 format %{ "paddd $dst,$src\t! add packedI" %} 5688 ins_encode %{ 5689 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5690 %} 5691 ins_pipe( pipe_slow ); 5692 %} 5693 5694 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5695 predicate(UseAVX > 0); 5696 match(Set dst (AddVI src1 src2)); 5697 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5698 ins_encode %{ 5699 int vlen_enc = vector_length_encoding(this); 5700 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5701 %} 5702 ins_pipe( pipe_slow ); 5703 %} 5704 5705 5706 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5707 predicate((UseAVX > 0) && 5708 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5709 match(Set dst (AddVI src (LoadVector mem))); 5710 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5711 ins_encode %{ 5712 int vlen_enc = vector_length_encoding(this); 5713 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5714 %} 5715 ins_pipe( pipe_slow ); 5716 %} 5717 5718 // Longs vector add 5719 instruct vaddL(vec dst, vec src) %{ 5720 predicate(UseAVX == 0); 5721 match(Set dst (AddVL dst src)); 5722 format %{ "paddq $dst,$src\t! add packedL" %} 5723 ins_encode %{ 5724 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5725 %} 5726 ins_pipe( pipe_slow ); 5727 %} 5728 5729 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5730 predicate(UseAVX > 0); 5731 match(Set dst (AddVL src1 src2)); 5732 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5733 ins_encode %{ 5734 int vlen_enc = vector_length_encoding(this); 5735 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5736 %} 5737 ins_pipe( pipe_slow ); 5738 %} 5739 5740 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5741 predicate((UseAVX > 0) && 5742 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5743 match(Set dst (AddVL src (LoadVector mem))); 5744 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5745 ins_encode %{ 5746 int vlen_enc = vector_length_encoding(this); 5747 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5748 %} 5749 ins_pipe( pipe_slow ); 5750 %} 5751 5752 // Floats vector add 5753 instruct vaddF(vec dst, vec src) %{ 5754 predicate(UseAVX == 0); 5755 match(Set dst (AddVF dst src)); 5756 format %{ "addps $dst,$src\t! add packedF" %} 5757 ins_encode %{ 5758 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5759 %} 5760 ins_pipe( pipe_slow ); 5761 %} 5762 5763 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5764 predicate(UseAVX > 0); 5765 match(Set dst (AddVF src1 src2)); 5766 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5767 ins_encode %{ 5768 int vlen_enc = vector_length_encoding(this); 5769 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5770 %} 5771 ins_pipe( pipe_slow ); 5772 %} 5773 5774 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5775 predicate((UseAVX > 0) && 5776 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5777 match(Set dst (AddVF src (LoadVector mem))); 5778 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5779 ins_encode %{ 5780 int vlen_enc = vector_length_encoding(this); 5781 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5782 %} 5783 ins_pipe( pipe_slow ); 5784 %} 5785 5786 // Doubles vector add 5787 instruct vaddD(vec dst, vec src) %{ 5788 predicate(UseAVX == 0); 5789 match(Set dst (AddVD dst src)); 5790 format %{ "addpd $dst,$src\t! add packedD" %} 5791 ins_encode %{ 5792 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5793 %} 5794 ins_pipe( pipe_slow ); 5795 %} 5796 5797 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5798 predicate(UseAVX > 0); 5799 match(Set dst (AddVD src1 src2)); 5800 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5801 ins_encode %{ 5802 int vlen_enc = vector_length_encoding(this); 5803 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5804 %} 5805 ins_pipe( pipe_slow ); 5806 %} 5807 5808 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5809 predicate((UseAVX > 0) && 5810 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5811 match(Set dst (AddVD src (LoadVector mem))); 5812 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5813 ins_encode %{ 5814 int vlen_enc = vector_length_encoding(this); 5815 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5816 %} 5817 ins_pipe( pipe_slow ); 5818 %} 5819 5820 // --------------------------------- SUB -------------------------------------- 5821 5822 // Bytes vector sub 5823 instruct vsubB(vec dst, vec src) %{ 5824 predicate(UseAVX == 0); 5825 match(Set dst (SubVB dst src)); 5826 format %{ "psubb $dst,$src\t! sub packedB" %} 5827 ins_encode %{ 5828 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5829 %} 5830 ins_pipe( pipe_slow ); 5831 %} 5832 5833 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5834 predicate(UseAVX > 0); 5835 match(Set dst (SubVB src1 src2)); 5836 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5837 ins_encode %{ 5838 int vlen_enc = vector_length_encoding(this); 5839 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5840 %} 5841 ins_pipe( pipe_slow ); 5842 %} 5843 5844 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5845 predicate((UseAVX > 0) && 5846 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5847 match(Set dst (SubVB src (LoadVector mem))); 5848 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5849 ins_encode %{ 5850 int vlen_enc = vector_length_encoding(this); 5851 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5852 %} 5853 ins_pipe( pipe_slow ); 5854 %} 5855 5856 // Shorts/Chars vector sub 5857 instruct vsubS(vec dst, vec src) %{ 5858 predicate(UseAVX == 0); 5859 match(Set dst (SubVS dst src)); 5860 format %{ "psubw $dst,$src\t! sub packedS" %} 5861 ins_encode %{ 5862 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5863 %} 5864 ins_pipe( pipe_slow ); 5865 %} 5866 5867 5868 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5869 predicate(UseAVX > 0); 5870 match(Set dst (SubVS src1 src2)); 5871 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5872 ins_encode %{ 5873 int vlen_enc = vector_length_encoding(this); 5874 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5875 %} 5876 ins_pipe( pipe_slow ); 5877 %} 5878 5879 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5880 predicate((UseAVX > 0) && 5881 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5882 match(Set dst (SubVS src (LoadVector mem))); 5883 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5884 ins_encode %{ 5885 int vlen_enc = vector_length_encoding(this); 5886 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5887 %} 5888 ins_pipe( pipe_slow ); 5889 %} 5890 5891 // Integers vector sub 5892 instruct vsubI(vec dst, vec src) %{ 5893 predicate(UseAVX == 0); 5894 match(Set dst (SubVI dst src)); 5895 format %{ "psubd $dst,$src\t! sub packedI" %} 5896 ins_encode %{ 5897 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5898 %} 5899 ins_pipe( pipe_slow ); 5900 %} 5901 5902 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5903 predicate(UseAVX > 0); 5904 match(Set dst (SubVI src1 src2)); 5905 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5906 ins_encode %{ 5907 int vlen_enc = vector_length_encoding(this); 5908 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5909 %} 5910 ins_pipe( pipe_slow ); 5911 %} 5912 5913 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5914 predicate((UseAVX > 0) && 5915 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5916 match(Set dst (SubVI src (LoadVector mem))); 5917 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5918 ins_encode %{ 5919 int vlen_enc = vector_length_encoding(this); 5920 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5921 %} 5922 ins_pipe( pipe_slow ); 5923 %} 5924 5925 // Longs vector sub 5926 instruct vsubL(vec dst, vec src) %{ 5927 predicate(UseAVX == 0); 5928 match(Set dst (SubVL dst src)); 5929 format %{ "psubq $dst,$src\t! sub packedL" %} 5930 ins_encode %{ 5931 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5932 %} 5933 ins_pipe( pipe_slow ); 5934 %} 5935 5936 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5937 predicate(UseAVX > 0); 5938 match(Set dst (SubVL src1 src2)); 5939 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5940 ins_encode %{ 5941 int vlen_enc = vector_length_encoding(this); 5942 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5943 %} 5944 ins_pipe( pipe_slow ); 5945 %} 5946 5947 5948 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5949 predicate((UseAVX > 0) && 5950 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5951 match(Set dst (SubVL src (LoadVector mem))); 5952 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5953 ins_encode %{ 5954 int vlen_enc = vector_length_encoding(this); 5955 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5956 %} 5957 ins_pipe( pipe_slow ); 5958 %} 5959 5960 // Floats vector sub 5961 instruct vsubF(vec dst, vec src) %{ 5962 predicate(UseAVX == 0); 5963 match(Set dst (SubVF dst src)); 5964 format %{ "subps $dst,$src\t! sub packedF" %} 5965 ins_encode %{ 5966 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5967 %} 5968 ins_pipe( pipe_slow ); 5969 %} 5970 5971 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5972 predicate(UseAVX > 0); 5973 match(Set dst (SubVF src1 src2)); 5974 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5975 ins_encode %{ 5976 int vlen_enc = vector_length_encoding(this); 5977 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5978 %} 5979 ins_pipe( pipe_slow ); 5980 %} 5981 5982 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5983 predicate((UseAVX > 0) && 5984 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5985 match(Set dst (SubVF src (LoadVector mem))); 5986 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5987 ins_encode %{ 5988 int vlen_enc = vector_length_encoding(this); 5989 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5990 %} 5991 ins_pipe( pipe_slow ); 5992 %} 5993 5994 // Doubles vector sub 5995 instruct vsubD(vec dst, vec src) %{ 5996 predicate(UseAVX == 0); 5997 match(Set dst (SubVD dst src)); 5998 format %{ "subpd $dst,$src\t! sub packedD" %} 5999 ins_encode %{ 6000 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 6001 %} 6002 ins_pipe( pipe_slow ); 6003 %} 6004 6005 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 6006 predicate(UseAVX > 0); 6007 match(Set dst (SubVD src1 src2)); 6008 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 6009 ins_encode %{ 6010 int vlen_enc = vector_length_encoding(this); 6011 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6012 %} 6013 ins_pipe( pipe_slow ); 6014 %} 6015 6016 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 6017 predicate((UseAVX > 0) && 6018 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6019 match(Set dst (SubVD src (LoadVector mem))); 6020 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 6021 ins_encode %{ 6022 int vlen_enc = vector_length_encoding(this); 6023 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6024 %} 6025 ins_pipe( pipe_slow ); 6026 %} 6027 6028 // --------------------------------- MUL -------------------------------------- 6029 6030 // Byte vector mul 6031 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 6032 predicate(Matcher::vector_length_in_bytes(n) <= 8); 6033 match(Set dst (MulVB src1 src2)); 6034 effect(TEMP dst, TEMP xtmp); 6035 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6036 ins_encode %{ 6037 assert(UseSSE > 3, "required"); 6038 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 6039 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 6040 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6041 __ psllw($dst$$XMMRegister, 8); 6042 __ psrlw($dst$$XMMRegister, 8); 6043 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6044 %} 6045 ins_pipe( pipe_slow ); 6046 %} 6047 6048 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 6049 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 6050 match(Set dst (MulVB src1 src2)); 6051 effect(TEMP dst, TEMP xtmp); 6052 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6053 ins_encode %{ 6054 assert(UseSSE > 3, "required"); 6055 // Odd-index elements 6056 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 6057 __ psrlw($dst$$XMMRegister, 8); 6058 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 6059 __ psrlw($xtmp$$XMMRegister, 8); 6060 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 6061 __ psllw($dst$$XMMRegister, 8); 6062 // Even-index elements 6063 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6064 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 6065 __ psllw($xtmp$$XMMRegister, 8); 6066 __ psrlw($xtmp$$XMMRegister, 8); 6067 // Combine 6068 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 6069 %} 6070 ins_pipe( pipe_slow ); 6071 %} 6072 6073 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6074 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 6075 match(Set dst (MulVB src1 src2)); 6076 effect(TEMP xtmp1, TEMP xtmp2); 6077 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6078 ins_encode %{ 6079 int vlen_enc = vector_length_encoding(this); 6080 // Odd-index elements 6081 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 6082 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 6083 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6084 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 6085 // Even-index elements 6086 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6087 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6088 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 6089 // Combine 6090 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6091 %} 6092 ins_pipe( pipe_slow ); 6093 %} 6094 6095 // Shorts/Chars vector mul 6096 instruct vmulS(vec dst, vec src) %{ 6097 predicate(UseAVX == 0); 6098 match(Set dst (MulVS dst src)); 6099 format %{ "pmullw $dst,$src\t! mul packedS" %} 6100 ins_encode %{ 6101 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 6102 %} 6103 ins_pipe( pipe_slow ); 6104 %} 6105 6106 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 6107 predicate(UseAVX > 0); 6108 match(Set dst (MulVS src1 src2)); 6109 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 6110 ins_encode %{ 6111 int vlen_enc = vector_length_encoding(this); 6112 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6113 %} 6114 ins_pipe( pipe_slow ); 6115 %} 6116 6117 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 6118 predicate((UseAVX > 0) && 6119 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6120 match(Set dst (MulVS src (LoadVector mem))); 6121 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 6122 ins_encode %{ 6123 int vlen_enc = vector_length_encoding(this); 6124 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6125 %} 6126 ins_pipe( pipe_slow ); 6127 %} 6128 6129 // Integers vector mul 6130 instruct vmulI(vec dst, vec src) %{ 6131 predicate(UseAVX == 0); 6132 match(Set dst (MulVI dst src)); 6133 format %{ "pmulld $dst,$src\t! mul packedI" %} 6134 ins_encode %{ 6135 assert(UseSSE > 3, "required"); 6136 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 6137 %} 6138 ins_pipe( pipe_slow ); 6139 %} 6140 6141 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 6142 predicate(UseAVX > 0); 6143 match(Set dst (MulVI src1 src2)); 6144 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 6145 ins_encode %{ 6146 int vlen_enc = vector_length_encoding(this); 6147 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6148 %} 6149 ins_pipe( pipe_slow ); 6150 %} 6151 6152 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 6153 predicate((UseAVX > 0) && 6154 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6155 match(Set dst (MulVI src (LoadVector mem))); 6156 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 6157 ins_encode %{ 6158 int vlen_enc = vector_length_encoding(this); 6159 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6160 %} 6161 ins_pipe( pipe_slow ); 6162 %} 6163 6164 // Longs vector mul 6165 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 6166 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6167 VM_Version::supports_avx512dq()) || 6168 VM_Version::supports_avx512vldq()); 6169 match(Set dst (MulVL src1 src2)); 6170 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 6171 ins_encode %{ 6172 assert(UseAVX > 2, "required"); 6173 int vlen_enc = vector_length_encoding(this); 6174 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6175 %} 6176 ins_pipe( pipe_slow ); 6177 %} 6178 6179 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 6180 predicate((Matcher::vector_length_in_bytes(n) == 64 && 6181 VM_Version::supports_avx512dq()) || 6182 (Matcher::vector_length_in_bytes(n) > 8 && 6183 VM_Version::supports_avx512vldq())); 6184 match(Set dst (MulVL src (LoadVector mem))); 6185 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 6186 ins_encode %{ 6187 assert(UseAVX > 2, "required"); 6188 int vlen_enc = vector_length_encoding(this); 6189 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6190 %} 6191 ins_pipe( pipe_slow ); 6192 %} 6193 6194 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 6195 predicate(UseAVX == 0); 6196 match(Set dst (MulVL src1 src2)); 6197 effect(TEMP dst, TEMP xtmp); 6198 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 6199 ins_encode %{ 6200 assert(VM_Version::supports_sse4_1(), "required"); 6201 // Get the lo-hi products, only the lower 32 bits is in concerns 6202 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 6203 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 6204 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 6205 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 6206 __ psllq($dst$$XMMRegister, 32); 6207 // Get the lo-lo products 6208 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 6209 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 6210 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 6211 %} 6212 ins_pipe( pipe_slow ); 6213 %} 6214 6215 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 6216 predicate(UseAVX > 0 && 6217 ((Matcher::vector_length_in_bytes(n) == 64 && 6218 !VM_Version::supports_avx512dq()) || 6219 (Matcher::vector_length_in_bytes(n) < 64 && 6220 !VM_Version::supports_avx512vldq()))); 6221 match(Set dst (MulVL src1 src2)); 6222 effect(TEMP xtmp1, TEMP xtmp2); 6223 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 6224 ins_encode %{ 6225 int vlen_enc = vector_length_encoding(this); 6226 // Get the lo-hi products, only the lower 32 bits is in concerns 6227 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 6228 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6229 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 6230 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 6231 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 6232 // Get the lo-lo products 6233 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6234 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 6235 %} 6236 ins_pipe( pipe_slow ); 6237 %} 6238 6239 // Floats vector mul 6240 instruct vmulF(vec dst, vec src) %{ 6241 predicate(UseAVX == 0); 6242 match(Set dst (MulVF dst src)); 6243 format %{ "mulps $dst,$src\t! mul packedF" %} 6244 ins_encode %{ 6245 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 6246 %} 6247 ins_pipe( pipe_slow ); 6248 %} 6249 6250 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 6251 predicate(UseAVX > 0); 6252 match(Set dst (MulVF src1 src2)); 6253 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 6254 ins_encode %{ 6255 int vlen_enc = vector_length_encoding(this); 6256 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6257 %} 6258 ins_pipe( pipe_slow ); 6259 %} 6260 6261 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 6262 predicate((UseAVX > 0) && 6263 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6264 match(Set dst (MulVF src (LoadVector mem))); 6265 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 6266 ins_encode %{ 6267 int vlen_enc = vector_length_encoding(this); 6268 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6269 %} 6270 ins_pipe( pipe_slow ); 6271 %} 6272 6273 // Doubles vector mul 6274 instruct vmulD(vec dst, vec src) %{ 6275 predicate(UseAVX == 0); 6276 match(Set dst (MulVD dst src)); 6277 format %{ "mulpd $dst,$src\t! mul packedD" %} 6278 ins_encode %{ 6279 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 6280 %} 6281 ins_pipe( pipe_slow ); 6282 %} 6283 6284 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 6285 predicate(UseAVX > 0); 6286 match(Set dst (MulVD src1 src2)); 6287 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 6288 ins_encode %{ 6289 int vlen_enc = vector_length_encoding(this); 6290 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6291 %} 6292 ins_pipe( pipe_slow ); 6293 %} 6294 6295 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 6296 predicate((UseAVX > 0) && 6297 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6298 match(Set dst (MulVD src (LoadVector mem))); 6299 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 6300 ins_encode %{ 6301 int vlen_enc = vector_length_encoding(this); 6302 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6303 %} 6304 ins_pipe( pipe_slow ); 6305 %} 6306 6307 // --------------------------------- DIV -------------------------------------- 6308 6309 // Floats vector div 6310 instruct vdivF(vec dst, vec src) %{ 6311 predicate(UseAVX == 0); 6312 match(Set dst (DivVF dst src)); 6313 format %{ "divps $dst,$src\t! div packedF" %} 6314 ins_encode %{ 6315 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6316 %} 6317 ins_pipe( pipe_slow ); 6318 %} 6319 6320 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 6321 predicate(UseAVX > 0); 6322 match(Set dst (DivVF src1 src2)); 6323 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 6324 ins_encode %{ 6325 int vlen_enc = vector_length_encoding(this); 6326 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6327 %} 6328 ins_pipe( pipe_slow ); 6329 %} 6330 6331 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6332 predicate((UseAVX > 0) && 6333 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6334 match(Set dst (DivVF src (LoadVector mem))); 6335 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6336 ins_encode %{ 6337 int vlen_enc = vector_length_encoding(this); 6338 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6339 %} 6340 ins_pipe( pipe_slow ); 6341 %} 6342 6343 // Doubles vector div 6344 instruct vdivD(vec dst, vec src) %{ 6345 predicate(UseAVX == 0); 6346 match(Set dst (DivVD dst src)); 6347 format %{ "divpd $dst,$src\t! div packedD" %} 6348 ins_encode %{ 6349 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6350 %} 6351 ins_pipe( pipe_slow ); 6352 %} 6353 6354 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6355 predicate(UseAVX > 0); 6356 match(Set dst (DivVD src1 src2)); 6357 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6358 ins_encode %{ 6359 int vlen_enc = vector_length_encoding(this); 6360 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6361 %} 6362 ins_pipe( pipe_slow ); 6363 %} 6364 6365 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6366 predicate((UseAVX > 0) && 6367 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6368 match(Set dst (DivVD src (LoadVector mem))); 6369 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6370 ins_encode %{ 6371 int vlen_enc = vector_length_encoding(this); 6372 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6373 %} 6374 ins_pipe( pipe_slow ); 6375 %} 6376 6377 // ------------------------------ MinMax --------------------------------------- 6378 6379 // Byte, Short, Int vector Min/Max 6380 instruct minmax_reg_sse(vec dst, vec src) %{ 6381 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6382 UseAVX == 0); 6383 match(Set dst (MinV dst src)); 6384 match(Set dst (MaxV dst src)); 6385 format %{ "vector_minmax $dst,$src\t! " %} 6386 ins_encode %{ 6387 assert(UseSSE >= 4, "required"); 6388 6389 int opcode = this->ideal_Opcode(); 6390 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6391 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6392 %} 6393 ins_pipe( pipe_slow ); 6394 %} 6395 6396 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6397 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6398 UseAVX > 0); 6399 match(Set dst (MinV src1 src2)); 6400 match(Set dst (MaxV src1 src2)); 6401 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6402 ins_encode %{ 6403 int opcode = this->ideal_Opcode(); 6404 int vlen_enc = vector_length_encoding(this); 6405 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6406 6407 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6408 %} 6409 ins_pipe( pipe_slow ); 6410 %} 6411 6412 // Long vector Min/Max 6413 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6414 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6415 UseAVX == 0); 6416 match(Set dst (MinV dst src)); 6417 match(Set dst (MaxV src dst)); 6418 effect(TEMP dst, TEMP tmp); 6419 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6420 ins_encode %{ 6421 assert(UseSSE >= 4, "required"); 6422 6423 int opcode = this->ideal_Opcode(); 6424 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6425 assert(elem_bt == T_LONG, "sanity"); 6426 6427 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6428 %} 6429 ins_pipe( pipe_slow ); 6430 %} 6431 6432 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6433 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6434 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6435 match(Set dst (MinV src1 src2)); 6436 match(Set dst (MaxV src1 src2)); 6437 effect(TEMP dst); 6438 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6439 ins_encode %{ 6440 int vlen_enc = vector_length_encoding(this); 6441 int opcode = this->ideal_Opcode(); 6442 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6443 assert(elem_bt == T_LONG, "sanity"); 6444 6445 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6446 %} 6447 ins_pipe( pipe_slow ); 6448 %} 6449 6450 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6451 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6452 Matcher::vector_element_basic_type(n) == T_LONG); 6453 match(Set dst (MinV src1 src2)); 6454 match(Set dst (MaxV src1 src2)); 6455 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6456 ins_encode %{ 6457 assert(UseAVX > 2, "required"); 6458 6459 int vlen_enc = vector_length_encoding(this); 6460 int opcode = this->ideal_Opcode(); 6461 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6462 assert(elem_bt == T_LONG, "sanity"); 6463 6464 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6465 %} 6466 ins_pipe( pipe_slow ); 6467 %} 6468 6469 // Float/Double vector Min/Max 6470 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6471 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6472 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6473 UseAVX > 0); 6474 match(Set dst (MinV a b)); 6475 match(Set dst (MaxV a b)); 6476 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6477 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6478 ins_encode %{ 6479 assert(UseAVX > 0, "required"); 6480 6481 int opcode = this->ideal_Opcode(); 6482 int vlen_enc = vector_length_encoding(this); 6483 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6484 6485 __ vminmax_fp(opcode, elem_bt, 6486 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6487 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6488 %} 6489 ins_pipe( pipe_slow ); 6490 %} 6491 6492 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6493 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6494 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6495 match(Set dst (MinV a b)); 6496 match(Set dst (MaxV a b)); 6497 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6498 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6499 ins_encode %{ 6500 assert(UseAVX > 2, "required"); 6501 6502 int opcode = this->ideal_Opcode(); 6503 int vlen_enc = vector_length_encoding(this); 6504 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6505 6506 __ evminmax_fp(opcode, elem_bt, 6507 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6508 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6509 %} 6510 ins_pipe( pipe_slow ); 6511 %} 6512 6513 // --------------------------------- Signum/CopySign --------------------------- 6514 6515 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6516 match(Set dst (SignumF dst (Binary zero one))); 6517 effect(KILL cr); 6518 format %{ "signumF $dst, $dst" %} 6519 ins_encode %{ 6520 int opcode = this->ideal_Opcode(); 6521 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6522 %} 6523 ins_pipe( pipe_slow ); 6524 %} 6525 6526 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6527 match(Set dst (SignumD dst (Binary zero one))); 6528 effect(KILL cr); 6529 format %{ "signumD $dst, $dst" %} 6530 ins_encode %{ 6531 int opcode = this->ideal_Opcode(); 6532 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6533 %} 6534 ins_pipe( pipe_slow ); 6535 %} 6536 6537 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6538 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6539 match(Set dst (SignumVF src (Binary zero one))); 6540 match(Set dst (SignumVD src (Binary zero one))); 6541 effect(TEMP dst, TEMP xtmp1); 6542 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6543 ins_encode %{ 6544 int opcode = this->ideal_Opcode(); 6545 int vec_enc = vector_length_encoding(this); 6546 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6547 $xtmp1$$XMMRegister, vec_enc); 6548 %} 6549 ins_pipe( pipe_slow ); 6550 %} 6551 6552 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6553 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6554 match(Set dst (SignumVF src (Binary zero one))); 6555 match(Set dst (SignumVD src (Binary zero one))); 6556 effect(TEMP dst, TEMP ktmp1); 6557 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6558 ins_encode %{ 6559 int opcode = this->ideal_Opcode(); 6560 int vec_enc = vector_length_encoding(this); 6561 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6562 $ktmp1$$KRegister, vec_enc); 6563 %} 6564 ins_pipe( pipe_slow ); 6565 %} 6566 6567 // --------------------------------------- 6568 // For copySign use 0xE4 as writemask for vpternlog 6569 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6570 // C (xmm2) is set to 0x7FFFFFFF 6571 // Wherever xmm2 is 0, we want to pick from B (sign) 6572 // Wherever xmm2 is 1, we want to pick from A (src) 6573 // 6574 // A B C Result 6575 // 0 0 0 0 6576 // 0 0 1 0 6577 // 0 1 0 1 6578 // 0 1 1 0 6579 // 1 0 0 0 6580 // 1 0 1 1 6581 // 1 1 0 1 6582 // 1 1 1 1 6583 // 6584 // Result going from high bit to low bit is 0x11100100 = 0xe4 6585 // --------------------------------------- 6586 6587 #ifdef _LP64 6588 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6589 match(Set dst (CopySignF dst src)); 6590 effect(TEMP tmp1, TEMP tmp2); 6591 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6592 ins_encode %{ 6593 __ movl($tmp2$$Register, 0x7FFFFFFF); 6594 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6595 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6596 %} 6597 ins_pipe( pipe_slow ); 6598 %} 6599 6600 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6601 match(Set dst (CopySignD dst (Binary src zero))); 6602 ins_cost(100); 6603 effect(TEMP tmp1, TEMP tmp2); 6604 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6605 ins_encode %{ 6606 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6607 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6608 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6609 %} 6610 ins_pipe( pipe_slow ); 6611 %} 6612 6613 #endif // _LP64 6614 6615 //----------------------------- CompressBits/ExpandBits ------------------------ 6616 6617 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6618 predicate(n->bottom_type()->isa_int()); 6619 match(Set dst (CompressBits src mask)); 6620 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6621 ins_encode %{ 6622 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6623 %} 6624 ins_pipe( pipe_slow ); 6625 %} 6626 6627 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6628 predicate(n->bottom_type()->isa_int()); 6629 match(Set dst (ExpandBits src mask)); 6630 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6631 ins_encode %{ 6632 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6633 %} 6634 ins_pipe( pipe_slow ); 6635 %} 6636 6637 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6638 predicate(n->bottom_type()->isa_int()); 6639 match(Set dst (CompressBits src (LoadI mask))); 6640 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6641 ins_encode %{ 6642 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6643 %} 6644 ins_pipe( pipe_slow ); 6645 %} 6646 6647 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6648 predicate(n->bottom_type()->isa_int()); 6649 match(Set dst (ExpandBits src (LoadI mask))); 6650 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6651 ins_encode %{ 6652 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6653 %} 6654 ins_pipe( pipe_slow ); 6655 %} 6656 6657 // --------------------------------- Sqrt -------------------------------------- 6658 6659 instruct vsqrtF_reg(vec dst, vec src) %{ 6660 match(Set dst (SqrtVF src)); 6661 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6662 ins_encode %{ 6663 assert(UseAVX > 0, "required"); 6664 int vlen_enc = vector_length_encoding(this); 6665 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6666 %} 6667 ins_pipe( pipe_slow ); 6668 %} 6669 6670 instruct vsqrtF_mem(vec dst, memory mem) %{ 6671 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6672 match(Set dst (SqrtVF (LoadVector mem))); 6673 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6674 ins_encode %{ 6675 assert(UseAVX > 0, "required"); 6676 int vlen_enc = vector_length_encoding(this); 6677 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6678 %} 6679 ins_pipe( pipe_slow ); 6680 %} 6681 6682 // Floating point vector sqrt 6683 instruct vsqrtD_reg(vec dst, vec src) %{ 6684 match(Set dst (SqrtVD src)); 6685 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6686 ins_encode %{ 6687 assert(UseAVX > 0, "required"); 6688 int vlen_enc = vector_length_encoding(this); 6689 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6690 %} 6691 ins_pipe( pipe_slow ); 6692 %} 6693 6694 instruct vsqrtD_mem(vec dst, memory mem) %{ 6695 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6696 match(Set dst (SqrtVD (LoadVector mem))); 6697 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6698 ins_encode %{ 6699 assert(UseAVX > 0, "required"); 6700 int vlen_enc = vector_length_encoding(this); 6701 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6702 %} 6703 ins_pipe( pipe_slow ); 6704 %} 6705 6706 // ------------------------------ Shift --------------------------------------- 6707 6708 // Left and right shift count vectors are the same on x86 6709 // (only lowest bits of xmm reg are used for count). 6710 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6711 match(Set dst (LShiftCntV cnt)); 6712 match(Set dst (RShiftCntV cnt)); 6713 format %{ "movdl $dst,$cnt\t! load shift count" %} 6714 ins_encode %{ 6715 __ movdl($dst$$XMMRegister, $cnt$$Register); 6716 %} 6717 ins_pipe( pipe_slow ); 6718 %} 6719 6720 // Byte vector shift 6721 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6722 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6723 match(Set dst ( LShiftVB src shift)); 6724 match(Set dst ( RShiftVB src shift)); 6725 match(Set dst (URShiftVB src shift)); 6726 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6727 format %{"vector_byte_shift $dst,$src,$shift" %} 6728 ins_encode %{ 6729 assert(UseSSE > 3, "required"); 6730 int opcode = this->ideal_Opcode(); 6731 bool sign = (opcode != Op_URShiftVB); 6732 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6733 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6734 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6735 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6736 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6737 %} 6738 ins_pipe( pipe_slow ); 6739 %} 6740 6741 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6742 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6743 UseAVX <= 1); 6744 match(Set dst ( LShiftVB src shift)); 6745 match(Set dst ( RShiftVB src shift)); 6746 match(Set dst (URShiftVB src shift)); 6747 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6748 format %{"vector_byte_shift $dst,$src,$shift" %} 6749 ins_encode %{ 6750 assert(UseSSE > 3, "required"); 6751 int opcode = this->ideal_Opcode(); 6752 bool sign = (opcode != Op_URShiftVB); 6753 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6754 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6755 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6756 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6757 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6758 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6759 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6760 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6761 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6762 %} 6763 ins_pipe( pipe_slow ); 6764 %} 6765 6766 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6767 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6768 UseAVX > 1); 6769 match(Set dst ( LShiftVB src shift)); 6770 match(Set dst ( RShiftVB src shift)); 6771 match(Set dst (URShiftVB src shift)); 6772 effect(TEMP dst, TEMP tmp); 6773 format %{"vector_byte_shift $dst,$src,$shift" %} 6774 ins_encode %{ 6775 int opcode = this->ideal_Opcode(); 6776 bool sign = (opcode != Op_URShiftVB); 6777 int vlen_enc = Assembler::AVX_256bit; 6778 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6779 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6780 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6781 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6782 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6783 %} 6784 ins_pipe( pipe_slow ); 6785 %} 6786 6787 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6788 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6789 match(Set dst ( LShiftVB src shift)); 6790 match(Set dst ( RShiftVB src shift)); 6791 match(Set dst (URShiftVB src shift)); 6792 effect(TEMP dst, TEMP tmp); 6793 format %{"vector_byte_shift $dst,$src,$shift" %} 6794 ins_encode %{ 6795 assert(UseAVX > 1, "required"); 6796 int opcode = this->ideal_Opcode(); 6797 bool sign = (opcode != Op_URShiftVB); 6798 int vlen_enc = Assembler::AVX_256bit; 6799 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6800 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6801 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6802 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6803 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6804 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6805 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6806 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6807 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6808 %} 6809 ins_pipe( pipe_slow ); 6810 %} 6811 6812 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6813 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6814 match(Set dst ( LShiftVB src shift)); 6815 match(Set dst (RShiftVB src shift)); 6816 match(Set dst (URShiftVB src shift)); 6817 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6818 format %{"vector_byte_shift $dst,$src,$shift" %} 6819 ins_encode %{ 6820 assert(UseAVX > 2, "required"); 6821 int opcode = this->ideal_Opcode(); 6822 bool sign = (opcode != Op_URShiftVB); 6823 int vlen_enc = Assembler::AVX_512bit; 6824 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6825 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6826 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6827 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6828 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6829 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6830 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6831 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6832 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6833 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6834 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 6835 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6836 %} 6837 ins_pipe( pipe_slow ); 6838 %} 6839 6840 // Shorts vector logical right shift produces incorrect Java result 6841 // for negative data because java code convert short value into int with 6842 // sign extension before a shift. But char vectors are fine since chars are 6843 // unsigned values. 6844 // Shorts/Chars vector left shift 6845 instruct vshiftS(vec dst, vec src, vec shift) %{ 6846 predicate(!n->as_ShiftV()->is_var_shift()); 6847 match(Set dst ( LShiftVS src shift)); 6848 match(Set dst ( RShiftVS src shift)); 6849 match(Set dst (URShiftVS src shift)); 6850 effect(TEMP dst, USE src, USE shift); 6851 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6852 ins_encode %{ 6853 int opcode = this->ideal_Opcode(); 6854 if (UseAVX > 0) { 6855 int vlen_enc = vector_length_encoding(this); 6856 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6857 } else { 6858 int vlen = Matcher::vector_length(this); 6859 if (vlen == 2) { 6860 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6861 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6862 } else if (vlen == 4) { 6863 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6864 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6865 } else { 6866 assert (vlen == 8, "sanity"); 6867 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6868 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6869 } 6870 } 6871 %} 6872 ins_pipe( pipe_slow ); 6873 %} 6874 6875 // Integers vector left shift 6876 instruct vshiftI(vec dst, vec src, vec shift) %{ 6877 predicate(!n->as_ShiftV()->is_var_shift()); 6878 match(Set dst ( LShiftVI src shift)); 6879 match(Set dst ( RShiftVI src shift)); 6880 match(Set dst (URShiftVI src shift)); 6881 effect(TEMP dst, USE src, USE shift); 6882 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6883 ins_encode %{ 6884 int opcode = this->ideal_Opcode(); 6885 if (UseAVX > 0) { 6886 int vlen_enc = vector_length_encoding(this); 6887 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6888 } else { 6889 int vlen = Matcher::vector_length(this); 6890 if (vlen == 2) { 6891 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6892 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6893 } else { 6894 assert(vlen == 4, "sanity"); 6895 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6896 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6897 } 6898 } 6899 %} 6900 ins_pipe( pipe_slow ); 6901 %} 6902 6903 // Integers vector left constant shift 6904 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6905 match(Set dst (LShiftVI src (LShiftCntV shift))); 6906 match(Set dst (RShiftVI src (RShiftCntV shift))); 6907 match(Set dst (URShiftVI src (RShiftCntV shift))); 6908 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6909 ins_encode %{ 6910 int opcode = this->ideal_Opcode(); 6911 if (UseAVX > 0) { 6912 int vector_len = vector_length_encoding(this); 6913 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6914 } else { 6915 int vlen = Matcher::vector_length(this); 6916 if (vlen == 2) { 6917 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6918 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6919 } else { 6920 assert(vlen == 4, "sanity"); 6921 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6922 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6923 } 6924 } 6925 %} 6926 ins_pipe( pipe_slow ); 6927 %} 6928 6929 // Longs vector shift 6930 instruct vshiftL(vec dst, vec src, vec shift) %{ 6931 predicate(!n->as_ShiftV()->is_var_shift()); 6932 match(Set dst ( LShiftVL src shift)); 6933 match(Set dst (URShiftVL src shift)); 6934 effect(TEMP dst, USE src, USE shift); 6935 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6936 ins_encode %{ 6937 int opcode = this->ideal_Opcode(); 6938 if (UseAVX > 0) { 6939 int vlen_enc = vector_length_encoding(this); 6940 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6941 } else { 6942 assert(Matcher::vector_length(this) == 2, ""); 6943 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6944 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6945 } 6946 %} 6947 ins_pipe( pipe_slow ); 6948 %} 6949 6950 // Longs vector constant shift 6951 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 6952 match(Set dst (LShiftVL src (LShiftCntV shift))); 6953 match(Set dst (URShiftVL src (RShiftCntV shift))); 6954 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 6955 ins_encode %{ 6956 int opcode = this->ideal_Opcode(); 6957 if (UseAVX > 0) { 6958 int vector_len = vector_length_encoding(this); 6959 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6960 } else { 6961 assert(Matcher::vector_length(this) == 2, ""); 6962 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6963 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6964 } 6965 %} 6966 ins_pipe( pipe_slow ); 6967 %} 6968 6969 // -------------------ArithmeticRightShift ----------------------------------- 6970 // Long vector arithmetic right shift 6971 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 6972 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 6973 match(Set dst (RShiftVL src shift)); 6974 effect(TEMP dst, TEMP tmp); 6975 format %{ "vshiftq $dst,$src,$shift" %} 6976 ins_encode %{ 6977 uint vlen = Matcher::vector_length(this); 6978 if (vlen == 2) { 6979 assert(UseSSE >= 2, "required"); 6980 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6981 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6982 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6983 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6984 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6985 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 6986 } else { 6987 assert(vlen == 4, "sanity"); 6988 assert(UseAVX > 1, "required"); 6989 int vlen_enc = Assembler::AVX_256bit; 6990 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6991 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6992 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6993 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6994 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6995 } 6996 %} 6997 ins_pipe( pipe_slow ); 6998 %} 6999 7000 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 7001 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 7002 match(Set dst (RShiftVL src shift)); 7003 format %{ "vshiftq $dst,$src,$shift" %} 7004 ins_encode %{ 7005 int vlen_enc = vector_length_encoding(this); 7006 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7007 %} 7008 ins_pipe( pipe_slow ); 7009 %} 7010 7011 // ------------------- Variable Shift ----------------------------- 7012 // Byte variable shift 7013 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7014 predicate(Matcher::vector_length(n) <= 8 && 7015 n->as_ShiftV()->is_var_shift() && 7016 !VM_Version::supports_avx512bw()); 7017 match(Set dst ( LShiftVB src shift)); 7018 match(Set dst ( RShiftVB src shift)); 7019 match(Set dst (URShiftVB src shift)); 7020 effect(TEMP dst, TEMP vtmp); 7021 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7022 ins_encode %{ 7023 assert(UseAVX >= 2, "required"); 7024 7025 int opcode = this->ideal_Opcode(); 7026 int vlen_enc = Assembler::AVX_128bit; 7027 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7028 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7029 %} 7030 ins_pipe( pipe_slow ); 7031 %} 7032 7033 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7034 predicate(Matcher::vector_length(n) == 16 && 7035 n->as_ShiftV()->is_var_shift() && 7036 !VM_Version::supports_avx512bw()); 7037 match(Set dst ( LShiftVB src shift)); 7038 match(Set dst ( RShiftVB src shift)); 7039 match(Set dst (URShiftVB src shift)); 7040 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7041 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7042 ins_encode %{ 7043 assert(UseAVX >= 2, "required"); 7044 7045 int opcode = this->ideal_Opcode(); 7046 int vlen_enc = Assembler::AVX_128bit; 7047 // Shift lower half and get word result in dst 7048 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7049 7050 // Shift upper half and get word result in vtmp1 7051 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7052 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7053 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7054 7055 // Merge and down convert the two word results to byte in dst 7056 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7057 %} 7058 ins_pipe( pipe_slow ); 7059 %} 7060 7061 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 7062 predicate(Matcher::vector_length(n) == 32 && 7063 n->as_ShiftV()->is_var_shift() && 7064 !VM_Version::supports_avx512bw()); 7065 match(Set dst ( LShiftVB src shift)); 7066 match(Set dst ( RShiftVB src shift)); 7067 match(Set dst (URShiftVB src shift)); 7068 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 7069 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 7070 ins_encode %{ 7071 assert(UseAVX >= 2, "required"); 7072 7073 int opcode = this->ideal_Opcode(); 7074 int vlen_enc = Assembler::AVX_128bit; 7075 // Process lower 128 bits and get result in dst 7076 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7077 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 7078 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 7079 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7080 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 7081 7082 // Process higher 128 bits and get result in vtmp3 7083 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7084 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7085 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 7086 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 7087 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 7088 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7089 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 7090 7091 // Merge the two results in dst 7092 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7093 %} 7094 ins_pipe( pipe_slow ); 7095 %} 7096 7097 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 7098 predicate(Matcher::vector_length(n) <= 32 && 7099 n->as_ShiftV()->is_var_shift() && 7100 VM_Version::supports_avx512bw()); 7101 match(Set dst ( LShiftVB src shift)); 7102 match(Set dst ( RShiftVB src shift)); 7103 match(Set dst (URShiftVB src shift)); 7104 effect(TEMP dst, TEMP vtmp); 7105 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 7106 ins_encode %{ 7107 assert(UseAVX > 2, "required"); 7108 7109 int opcode = this->ideal_Opcode(); 7110 int vlen_enc = vector_length_encoding(this); 7111 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 7112 %} 7113 ins_pipe( pipe_slow ); 7114 %} 7115 7116 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7117 predicate(Matcher::vector_length(n) == 64 && 7118 n->as_ShiftV()->is_var_shift() && 7119 VM_Version::supports_avx512bw()); 7120 match(Set dst ( LShiftVB src shift)); 7121 match(Set dst ( RShiftVB src shift)); 7122 match(Set dst (URShiftVB src shift)); 7123 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7124 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 7125 ins_encode %{ 7126 assert(UseAVX > 2, "required"); 7127 7128 int opcode = this->ideal_Opcode(); 7129 int vlen_enc = Assembler::AVX_256bit; 7130 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 7131 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 7132 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 7133 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 7134 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 7135 %} 7136 ins_pipe( pipe_slow ); 7137 %} 7138 7139 // Short variable shift 7140 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 7141 predicate(Matcher::vector_length(n) <= 8 && 7142 n->as_ShiftV()->is_var_shift() && 7143 !VM_Version::supports_avx512bw()); 7144 match(Set dst ( LShiftVS src shift)); 7145 match(Set dst ( RShiftVS src shift)); 7146 match(Set dst (URShiftVS src shift)); 7147 effect(TEMP dst, TEMP vtmp); 7148 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7149 ins_encode %{ 7150 assert(UseAVX >= 2, "required"); 7151 7152 int opcode = this->ideal_Opcode(); 7153 bool sign = (opcode != Op_URShiftVS); 7154 int vlen_enc = Assembler::AVX_256bit; 7155 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 7156 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 7157 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 7158 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7159 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 7160 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7161 %} 7162 ins_pipe( pipe_slow ); 7163 %} 7164 7165 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 7166 predicate(Matcher::vector_length(n) == 16 && 7167 n->as_ShiftV()->is_var_shift() && 7168 !VM_Version::supports_avx512bw()); 7169 match(Set dst ( LShiftVS src shift)); 7170 match(Set dst ( RShiftVS src shift)); 7171 match(Set dst (URShiftVS src shift)); 7172 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 7173 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 7174 ins_encode %{ 7175 assert(UseAVX >= 2, "required"); 7176 7177 int opcode = this->ideal_Opcode(); 7178 bool sign = (opcode != Op_URShiftVS); 7179 int vlen_enc = Assembler::AVX_256bit; 7180 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 7181 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 7182 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7183 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7184 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7185 7186 // Shift upper half, with result in dst using vtmp1 as TEMP 7187 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 7188 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 7189 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7190 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7191 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 7192 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7193 7194 // Merge lower and upper half result into dst 7195 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7196 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 7197 %} 7198 ins_pipe( pipe_slow ); 7199 %} 7200 7201 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 7202 predicate(n->as_ShiftV()->is_var_shift() && 7203 VM_Version::supports_avx512bw()); 7204 match(Set dst ( LShiftVS src shift)); 7205 match(Set dst ( RShiftVS src shift)); 7206 match(Set dst (URShiftVS src shift)); 7207 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 7208 ins_encode %{ 7209 assert(UseAVX > 2, "required"); 7210 7211 int opcode = this->ideal_Opcode(); 7212 int vlen_enc = vector_length_encoding(this); 7213 if (!VM_Version::supports_avx512vl()) { 7214 vlen_enc = Assembler::AVX_512bit; 7215 } 7216 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7217 %} 7218 ins_pipe( pipe_slow ); 7219 %} 7220 7221 //Integer variable shift 7222 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 7223 predicate(n->as_ShiftV()->is_var_shift()); 7224 match(Set dst ( LShiftVI src shift)); 7225 match(Set dst ( RShiftVI src shift)); 7226 match(Set dst (URShiftVI src shift)); 7227 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 7228 ins_encode %{ 7229 assert(UseAVX >= 2, "required"); 7230 7231 int opcode = this->ideal_Opcode(); 7232 int vlen_enc = vector_length_encoding(this); 7233 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7234 %} 7235 ins_pipe( pipe_slow ); 7236 %} 7237 7238 //Long variable shift 7239 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 7240 predicate(n->as_ShiftV()->is_var_shift()); 7241 match(Set dst ( LShiftVL src shift)); 7242 match(Set dst (URShiftVL src shift)); 7243 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 7244 ins_encode %{ 7245 assert(UseAVX >= 2, "required"); 7246 7247 int opcode = this->ideal_Opcode(); 7248 int vlen_enc = vector_length_encoding(this); 7249 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7250 %} 7251 ins_pipe( pipe_slow ); 7252 %} 7253 7254 //Long variable right shift arithmetic 7255 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 7256 predicate(Matcher::vector_length(n) <= 4 && 7257 n->as_ShiftV()->is_var_shift() && 7258 UseAVX == 2); 7259 match(Set dst (RShiftVL src shift)); 7260 effect(TEMP dst, TEMP vtmp); 7261 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 7262 ins_encode %{ 7263 int opcode = this->ideal_Opcode(); 7264 int vlen_enc = vector_length_encoding(this); 7265 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 7266 $vtmp$$XMMRegister); 7267 %} 7268 ins_pipe( pipe_slow ); 7269 %} 7270 7271 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 7272 predicate(n->as_ShiftV()->is_var_shift() && 7273 UseAVX > 2); 7274 match(Set dst (RShiftVL src shift)); 7275 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 7276 ins_encode %{ 7277 int opcode = this->ideal_Opcode(); 7278 int vlen_enc = vector_length_encoding(this); 7279 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7280 %} 7281 ins_pipe( pipe_slow ); 7282 %} 7283 7284 // --------------------------------- AND -------------------------------------- 7285 7286 instruct vand(vec dst, vec src) %{ 7287 predicate(UseAVX == 0); 7288 match(Set dst (AndV dst src)); 7289 format %{ "pand $dst,$src\t! and vectors" %} 7290 ins_encode %{ 7291 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7292 %} 7293 ins_pipe( pipe_slow ); 7294 %} 7295 7296 instruct vand_reg(vec dst, vec src1, vec src2) %{ 7297 predicate(UseAVX > 0); 7298 match(Set dst (AndV src1 src2)); 7299 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 7300 ins_encode %{ 7301 int vlen_enc = vector_length_encoding(this); 7302 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7303 %} 7304 ins_pipe( pipe_slow ); 7305 %} 7306 7307 instruct vand_mem(vec dst, vec src, memory mem) %{ 7308 predicate((UseAVX > 0) && 7309 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7310 match(Set dst (AndV src (LoadVector mem))); 7311 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 7312 ins_encode %{ 7313 int vlen_enc = vector_length_encoding(this); 7314 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7315 %} 7316 ins_pipe( pipe_slow ); 7317 %} 7318 7319 // --------------------------------- OR --------------------------------------- 7320 7321 instruct vor(vec dst, vec src) %{ 7322 predicate(UseAVX == 0); 7323 match(Set dst (OrV dst src)); 7324 format %{ "por $dst,$src\t! or vectors" %} 7325 ins_encode %{ 7326 __ por($dst$$XMMRegister, $src$$XMMRegister); 7327 %} 7328 ins_pipe( pipe_slow ); 7329 %} 7330 7331 instruct vor_reg(vec dst, vec src1, vec src2) %{ 7332 predicate(UseAVX > 0); 7333 match(Set dst (OrV src1 src2)); 7334 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7335 ins_encode %{ 7336 int vlen_enc = vector_length_encoding(this); 7337 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7338 %} 7339 ins_pipe( pipe_slow ); 7340 %} 7341 7342 instruct vor_mem(vec dst, vec src, memory mem) %{ 7343 predicate((UseAVX > 0) && 7344 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7345 match(Set dst (OrV src (LoadVector mem))); 7346 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7347 ins_encode %{ 7348 int vlen_enc = vector_length_encoding(this); 7349 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7350 %} 7351 ins_pipe( pipe_slow ); 7352 %} 7353 7354 // --------------------------------- XOR -------------------------------------- 7355 7356 instruct vxor(vec dst, vec src) %{ 7357 predicate(UseAVX == 0); 7358 match(Set dst (XorV dst src)); 7359 format %{ "pxor $dst,$src\t! xor vectors" %} 7360 ins_encode %{ 7361 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7362 %} 7363 ins_pipe( pipe_slow ); 7364 %} 7365 7366 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7367 predicate(UseAVX > 0); 7368 match(Set dst (XorV src1 src2)); 7369 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7370 ins_encode %{ 7371 int vlen_enc = vector_length_encoding(this); 7372 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7373 %} 7374 ins_pipe( pipe_slow ); 7375 %} 7376 7377 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7378 predicate((UseAVX > 0) && 7379 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7380 match(Set dst (XorV src (LoadVector mem))); 7381 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7382 ins_encode %{ 7383 int vlen_enc = vector_length_encoding(this); 7384 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7385 %} 7386 ins_pipe( pipe_slow ); 7387 %} 7388 7389 // --------------------------------- VectorCast -------------------------------------- 7390 7391 instruct vcastBtoX(vec dst, vec src) %{ 7392 predicate(VM_Version::supports_avx512vl() || Matcher::vector_element_basic_type(n) != T_DOUBLE); 7393 match(Set dst (VectorCastB2X src)); 7394 format %{ "vector_cast_b2x $dst,$src\t!" %} 7395 ins_encode %{ 7396 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7397 int vlen_enc = vector_length_encoding(this); 7398 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7399 %} 7400 ins_pipe( pipe_slow ); 7401 %} 7402 7403 instruct vcastBtoD(legVec dst, legVec src) %{ 7404 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_element_basic_type(n) == T_DOUBLE); 7405 match(Set dst (VectorCastB2X src)); 7406 format %{ "vector_cast_b2x $dst,$src\t!" %} 7407 ins_encode %{ 7408 int vlen_enc = vector_length_encoding(this); 7409 __ vconvert_b2x(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7410 %} 7411 ins_pipe( pipe_slow ); 7412 %} 7413 7414 instruct castStoX(vec dst, vec src) %{ 7415 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7416 Matcher::vector_length(n->in(1)) <= 8 && // src 7417 Matcher::vector_element_basic_type(n) == T_BYTE); 7418 match(Set dst (VectorCastS2X src)); 7419 format %{ "vector_cast_s2x $dst,$src" %} 7420 ins_encode %{ 7421 assert(UseAVX > 0, "required"); 7422 7423 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7424 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7425 %} 7426 ins_pipe( pipe_slow ); 7427 %} 7428 7429 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7430 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7431 Matcher::vector_length(n->in(1)) == 16 && // src 7432 Matcher::vector_element_basic_type(n) == T_BYTE); 7433 effect(TEMP dst, TEMP vtmp); 7434 match(Set dst (VectorCastS2X src)); 7435 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7436 ins_encode %{ 7437 assert(UseAVX > 0, "required"); 7438 7439 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7440 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7441 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7442 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7443 %} 7444 ins_pipe( pipe_slow ); 7445 %} 7446 7447 instruct vcastStoX_evex(vec dst, vec src) %{ 7448 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7449 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7450 match(Set dst (VectorCastS2X src)); 7451 format %{ "vector_cast_s2x $dst,$src\t!" %} 7452 ins_encode %{ 7453 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7454 int src_vlen_enc = vector_length_encoding(this, $src); 7455 int vlen_enc = vector_length_encoding(this); 7456 switch (to_elem_bt) { 7457 case T_BYTE: 7458 if (!VM_Version::supports_avx512vl()) { 7459 vlen_enc = Assembler::AVX_512bit; 7460 } 7461 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7462 break; 7463 case T_INT: 7464 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7465 break; 7466 case T_FLOAT: 7467 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7468 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7469 break; 7470 case T_LONG: 7471 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7472 break; 7473 case T_DOUBLE: { 7474 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7475 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7476 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7477 break; 7478 } 7479 default: 7480 ShouldNotReachHere(); 7481 } 7482 %} 7483 ins_pipe( pipe_slow ); 7484 %} 7485 7486 instruct castItoX(vec dst, vec src) %{ 7487 predicate(UseAVX <= 2 && 7488 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7489 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7490 match(Set dst (VectorCastI2X src)); 7491 format %{ "vector_cast_i2x $dst,$src" %} 7492 ins_encode %{ 7493 assert(UseAVX > 0, "required"); 7494 7495 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7496 int vlen_enc = vector_length_encoding(this, $src); 7497 7498 if (to_elem_bt == T_BYTE) { 7499 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7500 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7501 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7502 } else { 7503 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7504 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7505 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7506 } 7507 %} 7508 ins_pipe( pipe_slow ); 7509 %} 7510 7511 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7512 predicate(UseAVX <= 2 && 7513 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7514 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7515 match(Set dst (VectorCastI2X src)); 7516 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7517 effect(TEMP dst, TEMP vtmp); 7518 ins_encode %{ 7519 assert(UseAVX > 0, "required"); 7520 7521 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7522 int vlen_enc = vector_length_encoding(this, $src); 7523 7524 if (to_elem_bt == T_BYTE) { 7525 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7526 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7527 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7528 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7529 } else { 7530 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7531 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7532 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7533 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7534 } 7535 %} 7536 ins_pipe( pipe_slow ); 7537 %} 7538 7539 instruct vcastItoX_evex(vec dst, vec src) %{ 7540 predicate(UseAVX > 2 || 7541 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7542 match(Set dst (VectorCastI2X src)); 7543 format %{ "vector_cast_i2x $dst,$src\t!" %} 7544 ins_encode %{ 7545 assert(UseAVX > 0, "required"); 7546 7547 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7548 int src_vlen_enc = vector_length_encoding(this, $src); 7549 int dst_vlen_enc = vector_length_encoding(this); 7550 switch (dst_elem_bt) { 7551 case T_BYTE: 7552 if (!VM_Version::supports_avx512vl()) { 7553 src_vlen_enc = Assembler::AVX_512bit; 7554 } 7555 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7556 break; 7557 case T_SHORT: 7558 if (!VM_Version::supports_avx512vl()) { 7559 src_vlen_enc = Assembler::AVX_512bit; 7560 } 7561 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7562 break; 7563 case T_FLOAT: 7564 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7565 break; 7566 case T_LONG: 7567 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7568 break; 7569 case T_DOUBLE: 7570 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7571 break; 7572 default: 7573 ShouldNotReachHere(); 7574 } 7575 %} 7576 ins_pipe( pipe_slow ); 7577 %} 7578 7579 instruct vcastLtoBS(vec dst, vec src) %{ 7580 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7581 UseAVX <= 2); 7582 match(Set dst (VectorCastL2X src)); 7583 format %{ "vector_cast_l2x $dst,$src" %} 7584 ins_encode %{ 7585 assert(UseAVX > 0, "required"); 7586 7587 int vlen = Matcher::vector_length_in_bytes(this, $src); 7588 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7589 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7590 : ExternalAddress(vector_int_to_short_mask()); 7591 if (vlen <= 16) { 7592 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7593 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7594 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7595 } else { 7596 assert(vlen <= 32, "required"); 7597 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7598 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7599 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7600 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7601 } 7602 if (to_elem_bt == T_BYTE) { 7603 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7604 } 7605 %} 7606 ins_pipe( pipe_slow ); 7607 %} 7608 7609 instruct vcastLtoX_evex(vec dst, vec src) %{ 7610 predicate(UseAVX > 2 || 7611 (Matcher::vector_element_basic_type(n) == T_INT || 7612 Matcher::vector_element_basic_type(n) == T_FLOAT || 7613 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7614 match(Set dst (VectorCastL2X src)); 7615 format %{ "vector_cast_l2x $dst,$src\t!" %} 7616 ins_encode %{ 7617 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7618 int vlen = Matcher::vector_length_in_bytes(this, $src); 7619 int vlen_enc = vector_length_encoding(this, $src); 7620 switch (to_elem_bt) { 7621 case T_BYTE: 7622 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7623 vlen_enc = Assembler::AVX_512bit; 7624 } 7625 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7626 break; 7627 case T_SHORT: 7628 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7629 vlen_enc = Assembler::AVX_512bit; 7630 } 7631 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7632 break; 7633 case T_INT: 7634 if (vlen == 8) { 7635 if ($dst$$XMMRegister != $src$$XMMRegister) { 7636 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7637 } 7638 } else if (vlen == 16) { 7639 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7640 } else if (vlen == 32) { 7641 if (UseAVX > 2) { 7642 if (!VM_Version::supports_avx512vl()) { 7643 vlen_enc = Assembler::AVX_512bit; 7644 } 7645 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7646 } else { 7647 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7648 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7649 } 7650 } else { // vlen == 64 7651 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7652 } 7653 break; 7654 case T_FLOAT: 7655 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7656 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7657 break; 7658 case T_DOUBLE: 7659 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7660 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7661 break; 7662 7663 default: assert(false, "%s", type2name(to_elem_bt)); 7664 } 7665 %} 7666 ins_pipe( pipe_slow ); 7667 %} 7668 7669 instruct vcastFtoD_reg(vec dst, vec src) %{ 7670 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7671 match(Set dst (VectorCastF2X src)); 7672 format %{ "vector_cast_f2d $dst,$src\t!" %} 7673 ins_encode %{ 7674 int vlen_enc = vector_length_encoding(this); 7675 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7676 %} 7677 ins_pipe( pipe_slow ); 7678 %} 7679 7680 7681 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7682 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7683 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7684 match(Set dst (VectorCastF2X src)); 7685 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7686 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7687 ins_encode %{ 7688 int vlen_enc = vector_length_encoding(this, $src); 7689 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7690 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7691 // 32 bit addresses for register indirect addressing mode since stub constants 7692 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7693 // However, targets are free to increase this limit, but having a large code cache size 7694 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7695 // cap we save a temporary register allocation which in limiting case can prevent 7696 // spilling in high register pressure blocks. 7697 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7698 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7699 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7700 %} 7701 ins_pipe( pipe_slow ); 7702 %} 7703 7704 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7705 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7706 is_integral_type(Matcher::vector_element_basic_type(n))); 7707 match(Set dst (VectorCastF2X src)); 7708 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7709 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7710 ins_encode %{ 7711 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7712 if (to_elem_bt == T_LONG) { 7713 int vlen_enc = vector_length_encoding(this); 7714 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7715 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7716 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7717 } else { 7718 int vlen_enc = vector_length_encoding(this, $src); 7719 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7720 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7721 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7722 } 7723 %} 7724 ins_pipe( pipe_slow ); 7725 %} 7726 7727 instruct vcastDtoF_reg(vec dst, vec src) %{ 7728 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7729 match(Set dst (VectorCastD2X src)); 7730 format %{ "vector_cast_d2x $dst,$src\t!" %} 7731 ins_encode %{ 7732 int vlen_enc = vector_length_encoding(this, $src); 7733 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7734 %} 7735 ins_pipe( pipe_slow ); 7736 %} 7737 7738 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7739 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7740 is_integral_type(Matcher::vector_element_basic_type(n))); 7741 match(Set dst (VectorCastD2X src)); 7742 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7743 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7744 ins_encode %{ 7745 int vlen_enc = vector_length_encoding(this, $src); 7746 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7747 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7748 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7749 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7750 %} 7751 ins_pipe( pipe_slow ); 7752 %} 7753 7754 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7755 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7756 is_integral_type(Matcher::vector_element_basic_type(n))); 7757 match(Set dst (VectorCastD2X src)); 7758 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7759 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7760 ins_encode %{ 7761 int vlen_enc = vector_length_encoding(this, $src); 7762 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7763 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7764 ExternalAddress(vector_float_signflip()); 7765 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7766 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7767 %} 7768 ins_pipe( pipe_slow ); 7769 %} 7770 7771 instruct vucast(vec dst, vec src) %{ 7772 match(Set dst (VectorUCastB2X src)); 7773 match(Set dst (VectorUCastS2X src)); 7774 match(Set dst (VectorUCastI2X src)); 7775 format %{ "vector_ucast $dst,$src\t!" %} 7776 ins_encode %{ 7777 assert(UseAVX > 0, "required"); 7778 7779 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7780 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7781 int vlen_enc = vector_length_encoding(this); 7782 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7783 %} 7784 ins_pipe( pipe_slow ); 7785 %} 7786 7787 #ifdef _LP64 7788 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7789 predicate(!VM_Version::supports_avx512vl() && 7790 Matcher::vector_length_in_bytes(n) < 64 && 7791 Matcher::vector_element_basic_type(n) == T_INT); 7792 match(Set dst (RoundVF src)); 7793 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7794 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7795 ins_encode %{ 7796 int vlen_enc = vector_length_encoding(this); 7797 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7798 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7799 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7800 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7801 %} 7802 ins_pipe( pipe_slow ); 7803 %} 7804 7805 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7806 predicate((VM_Version::supports_avx512vl() || 7807 Matcher::vector_length_in_bytes(n) == 64) && 7808 Matcher::vector_element_basic_type(n) == T_INT); 7809 match(Set dst (RoundVF src)); 7810 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7811 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7812 ins_encode %{ 7813 int vlen_enc = vector_length_encoding(this); 7814 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7815 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7816 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7817 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7818 %} 7819 ins_pipe( pipe_slow ); 7820 %} 7821 7822 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7823 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7824 match(Set dst (RoundVD src)); 7825 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7826 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7827 ins_encode %{ 7828 int vlen_enc = vector_length_encoding(this); 7829 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80)); 7830 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 7831 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 7832 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7833 %} 7834 ins_pipe( pipe_slow ); 7835 %} 7836 7837 #endif // _LP64 7838 7839 // --------------------------------- VectorMaskCmp -------------------------------------- 7840 7841 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7842 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7843 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7844 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7845 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7846 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7847 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7848 ins_encode %{ 7849 int vlen_enc = vector_length_encoding(this, $src1); 7850 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7851 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7852 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7853 } else { 7854 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7855 } 7856 %} 7857 ins_pipe( pipe_slow ); 7858 %} 7859 7860 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7861 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7862 n->bottom_type()->isa_vectmask() == nullptr && 7863 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7864 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7865 effect(TEMP ktmp); 7866 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7867 ins_encode %{ 7868 int vlen_enc = Assembler::AVX_512bit; 7869 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7870 KRegister mask = k0; // The comparison itself is not being masked. 7871 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7872 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7873 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7874 } else { 7875 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7876 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7877 } 7878 %} 7879 ins_pipe( pipe_slow ); 7880 %} 7881 7882 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7883 predicate(n->bottom_type()->isa_vectmask() && 7884 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7885 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7886 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7887 ins_encode %{ 7888 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7889 int vlen_enc = vector_length_encoding(this, $src1); 7890 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7891 KRegister mask = k0; // The comparison itself is not being masked. 7892 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7893 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7894 } else { 7895 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7896 } 7897 %} 7898 ins_pipe( pipe_slow ); 7899 %} 7900 7901 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7902 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7903 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7904 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7905 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7906 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7907 (n->in(2)->get_int() == BoolTest::eq || 7908 n->in(2)->get_int() == BoolTest::lt || 7909 n->in(2)->get_int() == BoolTest::gt)); // cond 7910 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7911 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7912 ins_encode %{ 7913 int vlen_enc = vector_length_encoding(this, $src1); 7914 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7915 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7916 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 7917 %} 7918 ins_pipe( pipe_slow ); 7919 %} 7920 7921 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7922 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7923 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7924 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7925 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7926 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7927 (n->in(2)->get_int() == BoolTest::ne || 7928 n->in(2)->get_int() == BoolTest::le || 7929 n->in(2)->get_int() == BoolTest::ge)); // cond 7930 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7931 effect(TEMP dst, TEMP xtmp); 7932 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7933 ins_encode %{ 7934 int vlen_enc = vector_length_encoding(this, $src1); 7935 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7936 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7937 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7938 %} 7939 ins_pipe( pipe_slow ); 7940 %} 7941 7942 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7943 predicate(n->bottom_type()->isa_vectmask() == nullptr && 7944 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) && 7945 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7946 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7947 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7948 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7949 effect(TEMP dst, TEMP xtmp); 7950 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7951 ins_encode %{ 7952 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 7953 int vlen_enc = vector_length_encoding(this, $src1); 7954 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7955 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7956 7957 if (vlen_enc == Assembler::AVX_128bit) { 7958 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7959 } else { 7960 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7961 } 7962 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7963 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7964 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7965 %} 7966 ins_pipe( pipe_slow ); 7967 %} 7968 7969 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7970 predicate((n->bottom_type()->isa_vectmask() == nullptr && 7971 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 7972 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7973 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7974 effect(TEMP ktmp); 7975 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7976 ins_encode %{ 7977 assert(UseAVX > 2, "required"); 7978 7979 int vlen_enc = vector_length_encoding(this, $src1); 7980 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7981 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 7982 KRegister mask = k0; // The comparison itself is not being masked. 7983 bool merge = false; 7984 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7985 7986 switch (src1_elem_bt) { 7987 case T_INT: { 7988 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7989 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7990 break; 7991 } 7992 case T_LONG: { 7993 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7994 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7995 break; 7996 } 7997 default: assert(false, "%s", type2name(src1_elem_bt)); 7998 } 7999 %} 8000 ins_pipe( pipe_slow ); 8001 %} 8002 8003 8004 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 8005 predicate(n->bottom_type()->isa_vectmask() && 8006 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 8007 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 8008 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 8009 ins_encode %{ 8010 assert(UseAVX > 2, "required"); 8011 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 8012 8013 int vlen_enc = vector_length_encoding(this, $src1); 8014 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 8015 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 8016 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 8017 8018 // Comparison i 8019 switch (src1_elem_bt) { 8020 case T_BYTE: { 8021 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8022 break; 8023 } 8024 case T_SHORT: { 8025 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8026 break; 8027 } 8028 case T_INT: { 8029 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8030 break; 8031 } 8032 case T_LONG: { 8033 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 8034 break; 8035 } 8036 default: assert(false, "%s", type2name(src1_elem_bt)); 8037 } 8038 %} 8039 ins_pipe( pipe_slow ); 8040 %} 8041 8042 // Extract 8043 8044 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 8045 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 8046 match(Set dst (ExtractI src idx)); 8047 match(Set dst (ExtractS src idx)); 8048 #ifdef _LP64 8049 match(Set dst (ExtractB src idx)); 8050 #endif 8051 format %{ "extractI $dst,$src,$idx\t!" %} 8052 ins_encode %{ 8053 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8054 8055 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8056 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8057 %} 8058 ins_pipe( pipe_slow ); 8059 %} 8060 8061 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 8062 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 8063 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 8064 match(Set dst (ExtractI src idx)); 8065 match(Set dst (ExtractS src idx)); 8066 #ifdef _LP64 8067 match(Set dst (ExtractB src idx)); 8068 #endif 8069 effect(TEMP vtmp); 8070 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 8071 ins_encode %{ 8072 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8073 8074 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 8075 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8076 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 8077 %} 8078 ins_pipe( pipe_slow ); 8079 %} 8080 8081 #ifdef _LP64 8082 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 8083 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 8084 match(Set dst (ExtractL src idx)); 8085 format %{ "extractL $dst,$src,$idx\t!" %} 8086 ins_encode %{ 8087 assert(UseSSE >= 4, "required"); 8088 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8089 8090 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 8091 %} 8092 ins_pipe( pipe_slow ); 8093 %} 8094 8095 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 8096 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8097 Matcher::vector_length(n->in(1)) == 8); // src 8098 match(Set dst (ExtractL src idx)); 8099 effect(TEMP vtmp); 8100 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 8101 ins_encode %{ 8102 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8103 8104 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8105 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 8106 %} 8107 ins_pipe( pipe_slow ); 8108 %} 8109 #endif 8110 8111 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8112 predicate(Matcher::vector_length(n->in(1)) <= 4); 8113 match(Set dst (ExtractF src idx)); 8114 effect(TEMP dst, TEMP vtmp); 8115 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8116 ins_encode %{ 8117 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8118 8119 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 8120 %} 8121 ins_pipe( pipe_slow ); 8122 %} 8123 8124 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 8125 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 8126 Matcher::vector_length(n->in(1)/*src*/) == 16); 8127 match(Set dst (ExtractF src idx)); 8128 effect(TEMP vtmp); 8129 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 8130 ins_encode %{ 8131 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8132 8133 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8134 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 8135 %} 8136 ins_pipe( pipe_slow ); 8137 %} 8138 8139 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 8140 predicate(Matcher::vector_length(n->in(1)) == 2); // src 8141 match(Set dst (ExtractD src idx)); 8142 format %{ "extractD $dst,$src,$idx\t!" %} 8143 ins_encode %{ 8144 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8145 8146 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8147 %} 8148 ins_pipe( pipe_slow ); 8149 %} 8150 8151 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 8152 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 8153 Matcher::vector_length(n->in(1)) == 8); // src 8154 match(Set dst (ExtractD src idx)); 8155 effect(TEMP vtmp); 8156 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 8157 ins_encode %{ 8158 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 8159 8160 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 8161 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 8162 %} 8163 ins_pipe( pipe_slow ); 8164 %} 8165 8166 // --------------------------------- Vector Blend -------------------------------------- 8167 8168 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 8169 predicate(UseAVX == 0); 8170 match(Set dst (VectorBlend (Binary dst src) mask)); 8171 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 8172 effect(TEMP tmp); 8173 ins_encode %{ 8174 assert(UseSSE >= 4, "required"); 8175 8176 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 8177 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 8178 } 8179 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 8180 %} 8181 ins_pipe( pipe_slow ); 8182 %} 8183 8184 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8185 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8186 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8187 Matcher::vector_length_in_bytes(n) <= 32 && 8188 is_integral_type(Matcher::vector_element_basic_type(n))); 8189 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8190 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8191 ins_encode %{ 8192 int vlen_enc = vector_length_encoding(this); 8193 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8194 %} 8195 ins_pipe( pipe_slow ); 8196 %} 8197 8198 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 8199 predicate(UseAVX > 0 && !EnableX86ECoreOpts && 8200 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8201 Matcher::vector_length_in_bytes(n) <= 32 && 8202 !is_integral_type(Matcher::vector_element_basic_type(n))); 8203 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8204 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 8205 ins_encode %{ 8206 int vlen_enc = vector_length_encoding(this); 8207 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 8208 %} 8209 ins_pipe( pipe_slow ); 8210 %} 8211 8212 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{ 8213 predicate(UseAVX > 0 && EnableX86ECoreOpts && 8214 n->in(2)->bottom_type()->isa_vectmask() == nullptr && 8215 Matcher::vector_length_in_bytes(n) <= 32); 8216 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8217 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %} 8218 effect(TEMP vtmp, TEMP dst); 8219 ins_encode %{ 8220 int vlen_enc = vector_length_encoding(this); 8221 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc); 8222 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8223 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8224 %} 8225 ins_pipe( pipe_slow ); 8226 %} 8227 8228 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 8229 predicate(Matcher::vector_length_in_bytes(n) == 64 && 8230 n->in(2)->bottom_type()->isa_vectmask() == nullptr); 8231 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8232 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8233 effect(TEMP ktmp); 8234 ins_encode %{ 8235 int vlen_enc = Assembler::AVX_512bit; 8236 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8237 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 8238 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8239 %} 8240 ins_pipe( pipe_slow ); 8241 %} 8242 8243 8244 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 8245 predicate(n->in(2)->bottom_type()->isa_vectmask() && 8246 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 8247 VM_Version::supports_avx512bw())); 8248 match(Set dst (VectorBlend (Binary src1 src2) mask)); 8249 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 8250 ins_encode %{ 8251 int vlen_enc = vector_length_encoding(this); 8252 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8253 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8254 %} 8255 ins_pipe( pipe_slow ); 8256 %} 8257 8258 // --------------------------------- ABS -------------------------------------- 8259 // a = |a| 8260 instruct vabsB_reg(vec dst, vec src) %{ 8261 match(Set dst (AbsVB src)); 8262 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 8263 ins_encode %{ 8264 uint vlen = Matcher::vector_length(this); 8265 if (vlen <= 16) { 8266 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8267 } else { 8268 int vlen_enc = vector_length_encoding(this); 8269 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8270 } 8271 %} 8272 ins_pipe( pipe_slow ); 8273 %} 8274 8275 instruct vabsS_reg(vec dst, vec src) %{ 8276 match(Set dst (AbsVS src)); 8277 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 8278 ins_encode %{ 8279 uint vlen = Matcher::vector_length(this); 8280 if (vlen <= 8) { 8281 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8282 } else { 8283 int vlen_enc = vector_length_encoding(this); 8284 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8285 } 8286 %} 8287 ins_pipe( pipe_slow ); 8288 %} 8289 8290 instruct vabsI_reg(vec dst, vec src) %{ 8291 match(Set dst (AbsVI src)); 8292 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 8293 ins_encode %{ 8294 uint vlen = Matcher::vector_length(this); 8295 if (vlen <= 4) { 8296 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8297 } else { 8298 int vlen_enc = vector_length_encoding(this); 8299 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8300 } 8301 %} 8302 ins_pipe( pipe_slow ); 8303 %} 8304 8305 instruct vabsL_reg(vec dst, vec src) %{ 8306 match(Set dst (AbsVL src)); 8307 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 8308 ins_encode %{ 8309 assert(UseAVX > 2, "required"); 8310 int vlen_enc = vector_length_encoding(this); 8311 if (!VM_Version::supports_avx512vl()) { 8312 vlen_enc = Assembler::AVX_512bit; 8313 } 8314 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8315 %} 8316 ins_pipe( pipe_slow ); 8317 %} 8318 8319 // --------------------------------- ABSNEG -------------------------------------- 8320 8321 instruct vabsnegF(vec dst, vec src) %{ 8322 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 8323 match(Set dst (AbsVF src)); 8324 match(Set dst (NegVF src)); 8325 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 8326 ins_cost(150); 8327 ins_encode %{ 8328 int opcode = this->ideal_Opcode(); 8329 int vlen = Matcher::vector_length(this); 8330 if (vlen == 2) { 8331 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8332 } else { 8333 assert(vlen == 8 || vlen == 16, "required"); 8334 int vlen_enc = vector_length_encoding(this); 8335 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8336 } 8337 %} 8338 ins_pipe( pipe_slow ); 8339 %} 8340 8341 instruct vabsneg4F(vec dst) %{ 8342 predicate(Matcher::vector_length(n) == 4); 8343 match(Set dst (AbsVF dst)); 8344 match(Set dst (NegVF dst)); 8345 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 8346 ins_cost(150); 8347 ins_encode %{ 8348 int opcode = this->ideal_Opcode(); 8349 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 8350 %} 8351 ins_pipe( pipe_slow ); 8352 %} 8353 8354 instruct vabsnegD(vec dst, vec src) %{ 8355 match(Set dst (AbsVD src)); 8356 match(Set dst (NegVD src)); 8357 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 8358 ins_encode %{ 8359 int opcode = this->ideal_Opcode(); 8360 uint vlen = Matcher::vector_length(this); 8361 if (vlen == 2) { 8362 assert(UseSSE >= 2, "required"); 8363 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8364 } else { 8365 int vlen_enc = vector_length_encoding(this); 8366 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8367 } 8368 %} 8369 ins_pipe( pipe_slow ); 8370 %} 8371 8372 //------------------------------------- VectorTest -------------------------------------------- 8373 8374 #ifdef _LP64 8375 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8376 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8377 match(Set cr (VectorTest src1 src2)); 8378 effect(TEMP vtmp); 8379 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8380 ins_encode %{ 8381 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8382 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8383 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8384 %} 8385 ins_pipe( pipe_slow ); 8386 %} 8387 8388 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8389 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8390 match(Set cr (VectorTest src1 src2)); 8391 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8392 ins_encode %{ 8393 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8394 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8395 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8396 %} 8397 ins_pipe( pipe_slow ); 8398 %} 8399 8400 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8401 predicate((Matcher::vector_length(n->in(1)) < 8 || 8402 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8403 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8404 match(Set cr (VectorTest src1 src2)); 8405 effect(TEMP tmp); 8406 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8407 ins_encode %{ 8408 uint masklen = Matcher::vector_length(this, $src1); 8409 __ kmovwl($tmp$$Register, $src1$$KRegister); 8410 __ andl($tmp$$Register, (1 << masklen) - 1); 8411 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8412 %} 8413 ins_pipe( pipe_slow ); 8414 %} 8415 8416 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8417 predicate((Matcher::vector_length(n->in(1)) < 8 || 8418 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8419 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8420 match(Set cr (VectorTest src1 src2)); 8421 effect(TEMP tmp); 8422 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8423 ins_encode %{ 8424 uint masklen = Matcher::vector_length(this, $src1); 8425 __ kmovwl($tmp$$Register, $src1$$KRegister); 8426 __ andl($tmp$$Register, (1 << masklen) - 1); 8427 %} 8428 ins_pipe( pipe_slow ); 8429 %} 8430 8431 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8432 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8433 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8434 match(Set cr (VectorTest src1 src2)); 8435 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8436 ins_encode %{ 8437 uint masklen = Matcher::vector_length(this, $src1); 8438 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8439 %} 8440 ins_pipe( pipe_slow ); 8441 %} 8442 #endif 8443 8444 //------------------------------------- LoadMask -------------------------------------------- 8445 8446 instruct loadMask(legVec dst, legVec src) %{ 8447 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw()); 8448 match(Set dst (VectorLoadMask src)); 8449 effect(TEMP dst); 8450 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8451 ins_encode %{ 8452 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8453 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8454 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8455 %} 8456 ins_pipe( pipe_slow ); 8457 %} 8458 8459 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8460 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8461 match(Set dst (VectorLoadMask src)); 8462 effect(TEMP xtmp); 8463 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8464 ins_encode %{ 8465 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8466 true, Assembler::AVX_512bit); 8467 %} 8468 ins_pipe( pipe_slow ); 8469 %} 8470 8471 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8472 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8473 match(Set dst (VectorLoadMask src)); 8474 effect(TEMP xtmp); 8475 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8476 ins_encode %{ 8477 int vlen_enc = vector_length_encoding(in(1)); 8478 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8479 false, vlen_enc); 8480 %} 8481 ins_pipe( pipe_slow ); 8482 %} 8483 8484 //------------------------------------- StoreMask -------------------------------------------- 8485 8486 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8487 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8488 match(Set dst (VectorStoreMask src size)); 8489 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8490 ins_encode %{ 8491 int vlen = Matcher::vector_length(this); 8492 if (vlen <= 16 && UseAVX <= 2) { 8493 assert(UseSSE >= 3, "required"); 8494 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8495 } else { 8496 assert(UseAVX > 0, "required"); 8497 int src_vlen_enc = vector_length_encoding(this, $src); 8498 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8499 } 8500 %} 8501 ins_pipe( pipe_slow ); 8502 %} 8503 8504 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8505 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8506 match(Set dst (VectorStoreMask src size)); 8507 effect(TEMP_DEF dst, TEMP xtmp); 8508 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8509 ins_encode %{ 8510 int vlen_enc = Assembler::AVX_128bit; 8511 int vlen = Matcher::vector_length(this); 8512 if (vlen <= 8) { 8513 assert(UseSSE >= 3, "required"); 8514 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8515 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8516 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8517 } else { 8518 assert(UseAVX > 0, "required"); 8519 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8520 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8521 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8522 } 8523 %} 8524 ins_pipe( pipe_slow ); 8525 %} 8526 8527 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8528 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8529 match(Set dst (VectorStoreMask src size)); 8530 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8531 effect(TEMP_DEF dst, TEMP xtmp); 8532 ins_encode %{ 8533 int vlen_enc = Assembler::AVX_128bit; 8534 int vlen = Matcher::vector_length(this); 8535 if (vlen <= 4) { 8536 assert(UseSSE >= 3, "required"); 8537 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8538 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8539 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8540 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8541 } else { 8542 assert(UseAVX > 0, "required"); 8543 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8544 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8545 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8546 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8547 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8548 } 8549 %} 8550 ins_pipe( pipe_slow ); 8551 %} 8552 8553 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8554 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8555 match(Set dst (VectorStoreMask src size)); 8556 effect(TEMP_DEF dst, TEMP xtmp); 8557 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8558 ins_encode %{ 8559 assert(UseSSE >= 3, "required"); 8560 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8561 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8562 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8563 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8564 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8565 %} 8566 ins_pipe( pipe_slow ); 8567 %} 8568 8569 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8570 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8571 match(Set dst (VectorStoreMask src size)); 8572 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8573 effect(TEMP_DEF dst, TEMP vtmp); 8574 ins_encode %{ 8575 int vlen_enc = Assembler::AVX_128bit; 8576 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8577 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8578 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8579 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8580 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8581 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8582 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8583 %} 8584 ins_pipe( pipe_slow ); 8585 %} 8586 8587 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8588 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8589 match(Set dst (VectorStoreMask src size)); 8590 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8591 ins_encode %{ 8592 int src_vlen_enc = vector_length_encoding(this, $src); 8593 int dst_vlen_enc = vector_length_encoding(this); 8594 if (!VM_Version::supports_avx512vl()) { 8595 src_vlen_enc = Assembler::AVX_512bit; 8596 } 8597 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8598 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8599 %} 8600 ins_pipe( pipe_slow ); 8601 %} 8602 8603 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8604 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr); 8605 match(Set dst (VectorStoreMask src size)); 8606 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8607 ins_encode %{ 8608 int src_vlen_enc = vector_length_encoding(this, $src); 8609 int dst_vlen_enc = vector_length_encoding(this); 8610 if (!VM_Version::supports_avx512vl()) { 8611 src_vlen_enc = Assembler::AVX_512bit; 8612 } 8613 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8614 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8615 %} 8616 ins_pipe( pipe_slow ); 8617 %} 8618 8619 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8620 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8621 match(Set dst (VectorStoreMask mask size)); 8622 effect(TEMP_DEF dst); 8623 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8624 ins_encode %{ 8625 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8626 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8627 false, Assembler::AVX_512bit, noreg); 8628 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8629 %} 8630 ins_pipe( pipe_slow ); 8631 %} 8632 8633 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8634 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8635 match(Set dst (VectorStoreMask mask size)); 8636 effect(TEMP_DEF dst); 8637 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8638 ins_encode %{ 8639 int dst_vlen_enc = vector_length_encoding(this); 8640 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8641 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8642 %} 8643 ins_pipe( pipe_slow ); 8644 %} 8645 8646 instruct vmaskcast_evex(kReg dst) %{ 8647 match(Set dst (VectorMaskCast dst)); 8648 ins_cost(0); 8649 format %{ "vector_mask_cast $dst" %} 8650 ins_encode %{ 8651 // empty 8652 %} 8653 ins_pipe(empty); 8654 %} 8655 8656 instruct vmaskcast(vec dst) %{ 8657 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8658 match(Set dst (VectorMaskCast dst)); 8659 ins_cost(0); 8660 format %{ "vector_mask_cast $dst" %} 8661 ins_encode %{ 8662 // empty 8663 %} 8664 ins_pipe(empty); 8665 %} 8666 8667 instruct vmaskcast_avx(vec dst, vec src) %{ 8668 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8669 match(Set dst (VectorMaskCast src)); 8670 format %{ "vector_mask_cast $dst, $src" %} 8671 ins_encode %{ 8672 int vlen = Matcher::vector_length(this); 8673 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8674 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8675 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8676 %} 8677 ins_pipe(pipe_slow); 8678 %} 8679 8680 //-------------------------------- Load Iota Indices ---------------------------------- 8681 8682 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8683 match(Set dst (VectorLoadConst src)); 8684 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8685 ins_encode %{ 8686 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8687 BasicType bt = Matcher::vector_element_basic_type(this); 8688 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8689 %} 8690 ins_pipe( pipe_slow ); 8691 %} 8692 8693 #ifdef _LP64 8694 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8695 match(Set dst (PopulateIndex src1 src2)); 8696 effect(TEMP dst, TEMP vtmp); 8697 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8698 ins_encode %{ 8699 assert($src2$$constant == 1, "required"); 8700 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8701 int vlen_enc = vector_length_encoding(this); 8702 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8703 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8704 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8705 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8706 %} 8707 ins_pipe( pipe_slow ); 8708 %} 8709 8710 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8711 match(Set dst (PopulateIndex src1 src2)); 8712 effect(TEMP dst, TEMP vtmp); 8713 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8714 ins_encode %{ 8715 assert($src2$$constant == 1, "required"); 8716 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8717 int vlen_enc = vector_length_encoding(this); 8718 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8719 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8720 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8721 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8722 %} 8723 ins_pipe( pipe_slow ); 8724 %} 8725 #endif 8726 //-------------------------------- Rearrange ---------------------------------- 8727 8728 // LoadShuffle/Rearrange for Byte 8729 8730 instruct loadShuffleB(vec dst) %{ 8731 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 8732 match(Set dst (VectorLoadShuffle dst)); 8733 format %{ "vector_load_shuffle $dst, $dst" %} 8734 ins_encode %{ 8735 // empty 8736 %} 8737 ins_pipe( pipe_slow ); 8738 %} 8739 8740 instruct rearrangeB(vec dst, vec shuffle) %{ 8741 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8742 Matcher::vector_length(n) < 32); 8743 match(Set dst (VectorRearrange dst shuffle)); 8744 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8745 ins_encode %{ 8746 assert(UseSSE >= 4, "required"); 8747 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8748 %} 8749 ins_pipe( pipe_slow ); 8750 %} 8751 8752 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8753 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8754 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8755 match(Set dst (VectorRearrange src shuffle)); 8756 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8757 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8758 ins_encode %{ 8759 assert(UseAVX >= 2, "required"); 8760 // Swap src into vtmp1 8761 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8762 // Shuffle swapped src to get entries from other 128 bit lane 8763 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8764 // Shuffle original src to get entries from self 128 bit lane 8765 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8766 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8767 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8768 // Perform the blend 8769 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8770 %} 8771 ins_pipe( pipe_slow ); 8772 %} 8773 8774 8775 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8776 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8777 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8778 match(Set dst (VectorRearrange src shuffle)); 8779 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8780 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8781 ins_encode %{ 8782 int vlen_enc = vector_length_encoding(this); 8783 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8784 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8785 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8786 %} 8787 ins_pipe( pipe_slow ); 8788 %} 8789 8790 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8791 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8792 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8793 match(Set dst (VectorRearrange src shuffle)); 8794 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8795 ins_encode %{ 8796 int vlen_enc = vector_length_encoding(this); 8797 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8798 %} 8799 ins_pipe( pipe_slow ); 8800 %} 8801 8802 // LoadShuffle/Rearrange for Short 8803 8804 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8805 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8806 Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS 8807 match(Set dst (VectorLoadShuffle src)); 8808 effect(TEMP dst, TEMP vtmp); 8809 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8810 ins_encode %{ 8811 // Create a byte shuffle mask from short shuffle mask 8812 // only byte shuffle instruction available on these platforms 8813 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8814 if (UseAVX == 0) { 8815 assert(vlen_in_bytes <= 16, "required"); 8816 // Multiply each shuffle by two to get byte index 8817 __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); 8818 __ psllw($vtmp$$XMMRegister, 1); 8819 8820 // Duplicate to create 2 copies of byte index 8821 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8822 __ psllw($dst$$XMMRegister, 8); 8823 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8824 8825 // Add one to get alternate byte index 8826 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8827 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8828 } else { 8829 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8830 int vlen_enc = vector_length_encoding(this); 8831 // Multiply each shuffle by two to get byte index 8832 __ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8833 __ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8834 8835 // Duplicate to create 2 copies of byte index 8836 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8837 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8838 8839 // Add one to get alternate byte index 8840 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 8841 } 8842 %} 8843 ins_pipe( pipe_slow ); 8844 %} 8845 8846 instruct rearrangeS(vec dst, vec shuffle) %{ 8847 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8848 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8849 match(Set dst (VectorRearrange dst shuffle)); 8850 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8851 ins_encode %{ 8852 assert(UseSSE >= 4, "required"); 8853 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8854 %} 8855 ins_pipe( pipe_slow ); 8856 %} 8857 8858 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8859 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8860 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8861 match(Set dst (VectorRearrange src shuffle)); 8862 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8863 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8864 ins_encode %{ 8865 assert(UseAVX >= 2, "required"); 8866 // Swap src into vtmp1 8867 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8868 // Shuffle swapped src to get entries from other 128 bit lane 8869 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8870 // Shuffle original src to get entries from self 128 bit lane 8871 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8872 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8873 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8874 // Perform the blend 8875 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8876 %} 8877 ins_pipe( pipe_slow ); 8878 %} 8879 8880 instruct loadShuffleS_evex(vec dst, vec src) %{ 8881 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8882 VM_Version::supports_avx512bw()); 8883 match(Set dst (VectorLoadShuffle src)); 8884 format %{ "vector_load_shuffle $dst, $src" %} 8885 ins_encode %{ 8886 int vlen_enc = vector_length_encoding(this); 8887 if (!VM_Version::supports_avx512vl()) { 8888 vlen_enc = Assembler::AVX_512bit; 8889 } 8890 __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8891 %} 8892 ins_pipe( pipe_slow ); 8893 %} 8894 8895 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8896 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8897 VM_Version::supports_avx512bw()); 8898 match(Set dst (VectorRearrange src shuffle)); 8899 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8900 ins_encode %{ 8901 int vlen_enc = vector_length_encoding(this); 8902 if (!VM_Version::supports_avx512vl()) { 8903 vlen_enc = Assembler::AVX_512bit; 8904 } 8905 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8906 %} 8907 ins_pipe( pipe_slow ); 8908 %} 8909 8910 // LoadShuffle/Rearrange for Integer and Float 8911 8912 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 8913 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8914 Matcher::vector_length(n) == 4 && UseAVX == 0); 8915 match(Set dst (VectorLoadShuffle src)); 8916 effect(TEMP dst, TEMP vtmp); 8917 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8918 ins_encode %{ 8919 assert(UseSSE >= 4, "required"); 8920 8921 // Create a byte shuffle mask from int shuffle mask 8922 // only byte shuffle instruction available on these platforms 8923 8924 // Duplicate and multiply each shuffle by 4 8925 __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); 8926 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8927 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8928 __ psllw($vtmp$$XMMRegister, 2); 8929 8930 // Duplicate again to create 4 copies of byte index 8931 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8932 __ psllw($dst$$XMMRegister, 8); 8933 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 8934 8935 // Add 3,2,1,0 to get alternate byte index 8936 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 8937 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8938 %} 8939 ins_pipe( pipe_slow ); 8940 %} 8941 8942 instruct rearrangeI(vec dst, vec shuffle) %{ 8943 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8944 UseAVX == 0); 8945 match(Set dst (VectorRearrange dst shuffle)); 8946 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8947 ins_encode %{ 8948 assert(UseSSE >= 4, "required"); 8949 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8950 %} 8951 ins_pipe( pipe_slow ); 8952 %} 8953 8954 instruct loadShuffleI_avx(vec dst, vec src) %{ 8955 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8956 UseAVX > 0); 8957 match(Set dst (VectorLoadShuffle src)); 8958 format %{ "vector_load_shuffle $dst, $src" %} 8959 ins_encode %{ 8960 int vlen_enc = vector_length_encoding(this); 8961 __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8962 %} 8963 ins_pipe( pipe_slow ); 8964 %} 8965 8966 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 8967 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8968 UseAVX > 0); 8969 match(Set dst (VectorRearrange src shuffle)); 8970 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8971 ins_encode %{ 8972 int vlen_enc = vector_length_encoding(this); 8973 BasicType bt = Matcher::vector_element_basic_type(this); 8974 __ vector_rearrange_int_float(bt, $dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8975 %} 8976 ins_pipe( pipe_slow ); 8977 %} 8978 8979 // LoadShuffle/Rearrange for Long and Double 8980 8981 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 8982 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8983 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8984 match(Set dst (VectorLoadShuffle src)); 8985 effect(TEMP dst, TEMP vtmp); 8986 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8987 ins_encode %{ 8988 assert(UseAVX >= 2, "required"); 8989 8990 int vlen_enc = vector_length_encoding(this); 8991 // Create a double word shuffle mask from long shuffle mask 8992 // only double word shuffle instruction available on these platforms 8993 8994 // Multiply each shuffle by two to get double word index 8995 __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8996 __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8997 8998 // Duplicate each double word shuffle 8999 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 9000 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 9001 9002 // Add one to get alternate double word index 9003 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 9004 %} 9005 ins_pipe( pipe_slow ); 9006 %} 9007 9008 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 9009 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9010 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 9011 match(Set dst (VectorRearrange src shuffle)); 9012 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9013 ins_encode %{ 9014 assert(UseAVX >= 2, "required"); 9015 9016 int vlen_enc = vector_length_encoding(this); 9017 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9018 %} 9019 ins_pipe( pipe_slow ); 9020 %} 9021 9022 instruct loadShuffleL_evex(vec dst, vec src) %{ 9023 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9024 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 9025 match(Set dst (VectorLoadShuffle src)); 9026 format %{ "vector_load_shuffle $dst, $src" %} 9027 ins_encode %{ 9028 assert(UseAVX > 2, "required"); 9029 9030 int vlen_enc = vector_length_encoding(this); 9031 __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9032 %} 9033 ins_pipe( pipe_slow ); 9034 %} 9035 9036 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 9037 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 9038 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 9039 match(Set dst (VectorRearrange src shuffle)); 9040 format %{ "vector_rearrange $dst, $shuffle, $src" %} 9041 ins_encode %{ 9042 assert(UseAVX > 2, "required"); 9043 9044 int vlen_enc = vector_length_encoding(this); 9045 if (vlen_enc == Assembler::AVX_128bit) { 9046 vlen_enc = Assembler::AVX_256bit; 9047 } 9048 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 9049 %} 9050 ins_pipe( pipe_slow ); 9051 %} 9052 9053 // --------------------------------- FMA -------------------------------------- 9054 // a * b + c 9055 9056 instruct vfmaF_reg(vec a, vec b, vec c) %{ 9057 match(Set c (FmaVF c (Binary a b))); 9058 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9059 ins_cost(150); 9060 ins_encode %{ 9061 assert(UseFMA, "not enabled"); 9062 int vlen_enc = vector_length_encoding(this); 9063 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9064 %} 9065 ins_pipe( pipe_slow ); 9066 %} 9067 9068 instruct vfmaF_mem(vec a, memory b, vec c) %{ 9069 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9070 match(Set c (FmaVF c (Binary a (LoadVector b)))); 9071 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 9072 ins_cost(150); 9073 ins_encode %{ 9074 assert(UseFMA, "not enabled"); 9075 int vlen_enc = vector_length_encoding(this); 9076 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9077 %} 9078 ins_pipe( pipe_slow ); 9079 %} 9080 9081 instruct vfmaD_reg(vec a, vec b, vec c) %{ 9082 match(Set c (FmaVD c (Binary a b))); 9083 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9084 ins_cost(150); 9085 ins_encode %{ 9086 assert(UseFMA, "not enabled"); 9087 int vlen_enc = vector_length_encoding(this); 9088 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 9089 %} 9090 ins_pipe( pipe_slow ); 9091 %} 9092 9093 instruct vfmaD_mem(vec a, memory b, vec c) %{ 9094 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 9095 match(Set c (FmaVD c (Binary a (LoadVector b)))); 9096 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 9097 ins_cost(150); 9098 ins_encode %{ 9099 assert(UseFMA, "not enabled"); 9100 int vlen_enc = vector_length_encoding(this); 9101 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 9102 %} 9103 ins_pipe( pipe_slow ); 9104 %} 9105 9106 // --------------------------------- Vector Multiply Add -------------------------------------- 9107 9108 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 9109 predicate(UseAVX == 0); 9110 match(Set dst (MulAddVS2VI dst src1)); 9111 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 9112 ins_encode %{ 9113 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 9114 %} 9115 ins_pipe( pipe_slow ); 9116 %} 9117 9118 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 9119 predicate(UseAVX > 0); 9120 match(Set dst (MulAddVS2VI src1 src2)); 9121 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 9122 ins_encode %{ 9123 int vlen_enc = vector_length_encoding(this); 9124 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9125 %} 9126 ins_pipe( pipe_slow ); 9127 %} 9128 9129 // --------------------------------- Vector Multiply Add Add ---------------------------------- 9130 9131 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 9132 predicate(VM_Version::supports_avx512_vnni()); 9133 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 9134 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 9135 ins_encode %{ 9136 assert(UseAVX > 2, "required"); 9137 int vlen_enc = vector_length_encoding(this); 9138 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 9139 %} 9140 ins_pipe( pipe_slow ); 9141 ins_cost(10); 9142 %} 9143 9144 // --------------------------------- PopCount -------------------------------------- 9145 9146 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 9147 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9148 match(Set dst (PopCountVI src)); 9149 match(Set dst (PopCountVL src)); 9150 format %{ "vector_popcount_integral $dst, $src" %} 9151 ins_encode %{ 9152 int opcode = this->ideal_Opcode(); 9153 int vlen_enc = vector_length_encoding(this, $src); 9154 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9155 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 9156 %} 9157 ins_pipe( pipe_slow ); 9158 %} 9159 9160 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9161 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9162 match(Set dst (PopCountVI src mask)); 9163 match(Set dst (PopCountVL src mask)); 9164 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 9165 ins_encode %{ 9166 int vlen_enc = vector_length_encoding(this, $src); 9167 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9168 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9169 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 9170 %} 9171 ins_pipe( pipe_slow ); 9172 %} 9173 9174 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 9175 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 9176 match(Set dst (PopCountVI src)); 9177 match(Set dst (PopCountVL src)); 9178 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9179 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 9180 ins_encode %{ 9181 int opcode = this->ideal_Opcode(); 9182 int vlen_enc = vector_length_encoding(this, $src); 9183 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9184 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9185 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 9186 %} 9187 ins_pipe( pipe_slow ); 9188 %} 9189 9190 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 9191 9192 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 9193 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9194 Matcher::vector_length_in_bytes(n->in(1)))); 9195 match(Set dst (CountTrailingZerosV src)); 9196 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 9197 ins_cost(400); 9198 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 9199 ins_encode %{ 9200 int vlen_enc = vector_length_encoding(this, $src); 9201 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9202 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9203 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9204 %} 9205 ins_pipe( pipe_slow ); 9206 %} 9207 9208 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9209 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9210 VM_Version::supports_avx512cd() && 9211 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9212 match(Set dst (CountTrailingZerosV src)); 9213 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9214 ins_cost(400); 9215 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 9216 ins_encode %{ 9217 int vlen_enc = vector_length_encoding(this, $src); 9218 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9219 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9220 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 9221 %} 9222 ins_pipe( pipe_slow ); 9223 %} 9224 9225 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 9226 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9227 match(Set dst (CountTrailingZerosV src)); 9228 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 9229 ins_cost(400); 9230 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 9231 ins_encode %{ 9232 int vlen_enc = vector_length_encoding(this, $src); 9233 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9234 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9235 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 9236 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 9237 %} 9238 ins_pipe( pipe_slow ); 9239 %} 9240 9241 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9242 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9243 match(Set dst (CountTrailingZerosV src)); 9244 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9245 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9246 ins_encode %{ 9247 int vlen_enc = vector_length_encoding(this, $src); 9248 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9249 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9250 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9251 %} 9252 ins_pipe( pipe_slow ); 9253 %} 9254 9255 9256 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 9257 9258 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 9259 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 9260 effect(TEMP dst); 9261 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9262 ins_encode %{ 9263 int vector_len = vector_length_encoding(this); 9264 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 9265 %} 9266 ins_pipe( pipe_slow ); 9267 %} 9268 9269 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 9270 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 9271 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 9272 effect(TEMP dst); 9273 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 9274 ins_encode %{ 9275 int vector_len = vector_length_encoding(this); 9276 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 9277 %} 9278 ins_pipe( pipe_slow ); 9279 %} 9280 9281 // --------------------------------- Rotation Operations ---------------------------------- 9282 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 9283 match(Set dst (RotateLeftV src shift)); 9284 match(Set dst (RotateRightV src shift)); 9285 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 9286 ins_encode %{ 9287 int opcode = this->ideal_Opcode(); 9288 int vector_len = vector_length_encoding(this); 9289 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9290 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 9291 %} 9292 ins_pipe( pipe_slow ); 9293 %} 9294 9295 instruct vprorate(vec dst, vec src, vec shift) %{ 9296 match(Set dst (RotateLeftV src shift)); 9297 match(Set dst (RotateRightV src shift)); 9298 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 9299 ins_encode %{ 9300 int opcode = this->ideal_Opcode(); 9301 int vector_len = vector_length_encoding(this); 9302 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9303 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9304 %} 9305 ins_pipe( pipe_slow ); 9306 %} 9307 9308 // ---------------------------------- Masked Operations ------------------------------------ 9309 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 9310 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 9311 match(Set dst (LoadVectorMasked mem mask)); 9312 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9313 ins_encode %{ 9314 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9315 int vlen_enc = vector_length_encoding(this); 9316 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 9317 %} 9318 ins_pipe( pipe_slow ); 9319 %} 9320 9321 9322 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 9323 predicate(n->in(3)->bottom_type()->isa_vectmask()); 9324 match(Set dst (LoadVectorMasked mem mask)); 9325 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9326 ins_encode %{ 9327 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9328 int vector_len = vector_length_encoding(this); 9329 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 9330 %} 9331 ins_pipe( pipe_slow ); 9332 %} 9333 9334 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 9335 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9336 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9337 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9338 ins_encode %{ 9339 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9340 int vlen_enc = vector_length_encoding(src_node); 9341 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9342 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 9343 %} 9344 ins_pipe( pipe_slow ); 9345 %} 9346 9347 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 9348 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9349 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9350 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9351 ins_encode %{ 9352 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9353 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9354 int vlen_enc = vector_length_encoding(src_node); 9355 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 9356 %} 9357 ins_pipe( pipe_slow ); 9358 %} 9359 9360 #ifdef _LP64 9361 instruct verify_vector_alignment(rRegP addr, immL32 mask, rFlagsReg cr) %{ 9362 match(Set addr (VerifyVectorAlignment addr mask)); 9363 effect(KILL cr); 9364 format %{ "verify_vector_alignment $addr $mask \t! verify alignment" %} 9365 ins_encode %{ 9366 Label Lskip; 9367 // check if masked bits of addr are zero 9368 __ testq($addr$$Register, $mask$$constant); 9369 __ jccb(Assembler::equal, Lskip); 9370 __ stop("verify_vector_alignment found a misaligned vector memory access"); 9371 __ bind(Lskip); 9372 %} 9373 ins_pipe(pipe_slow); 9374 %} 9375 9376 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9377 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9378 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9379 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9380 ins_encode %{ 9381 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9382 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9383 9384 Label DONE; 9385 int vlen_enc = vector_length_encoding(this, $src1); 9386 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9387 9388 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9389 __ mov64($dst$$Register, -1L); 9390 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9391 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9392 __ jccb(Assembler::carrySet, DONE); 9393 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9394 __ notq($dst$$Register); 9395 __ tzcntq($dst$$Register, $dst$$Register); 9396 __ bind(DONE); 9397 %} 9398 ins_pipe( pipe_slow ); 9399 %} 9400 9401 9402 instruct vmask_gen(kReg dst, rRegL len, rRegL temp, rFlagsReg cr) %{ 9403 match(Set dst (VectorMaskGen len)); 9404 effect(TEMP temp, KILL cr); 9405 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9406 ins_encode %{ 9407 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9408 %} 9409 ins_pipe( pipe_slow ); 9410 %} 9411 9412 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9413 match(Set dst (VectorMaskGen len)); 9414 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9415 effect(TEMP temp); 9416 ins_encode %{ 9417 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9418 __ kmovql($dst$$KRegister, $temp$$Register); 9419 %} 9420 ins_pipe( pipe_slow ); 9421 %} 9422 9423 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9424 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9425 match(Set dst (VectorMaskToLong mask)); 9426 effect(TEMP dst, KILL cr); 9427 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9428 ins_encode %{ 9429 int opcode = this->ideal_Opcode(); 9430 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9431 int mask_len = Matcher::vector_length(this, $mask); 9432 int mask_size = mask_len * type2aelembytes(mbt); 9433 int vlen_enc = vector_length_encoding(this, $mask); 9434 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9435 $dst$$Register, mask_len, mask_size, vlen_enc); 9436 %} 9437 ins_pipe( pipe_slow ); 9438 %} 9439 9440 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9441 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9442 match(Set dst (VectorMaskToLong mask)); 9443 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9444 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9445 ins_encode %{ 9446 int opcode = this->ideal_Opcode(); 9447 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9448 int mask_len = Matcher::vector_length(this, $mask); 9449 int vlen_enc = vector_length_encoding(this, $mask); 9450 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9451 $dst$$Register, mask_len, mbt, vlen_enc); 9452 %} 9453 ins_pipe( pipe_slow ); 9454 %} 9455 9456 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9457 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9458 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9459 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9460 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9461 ins_encode %{ 9462 int opcode = this->ideal_Opcode(); 9463 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9464 int mask_len = Matcher::vector_length(this, $mask); 9465 int vlen_enc = vector_length_encoding(this, $mask); 9466 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9467 $dst$$Register, mask_len, mbt, vlen_enc); 9468 %} 9469 ins_pipe( pipe_slow ); 9470 %} 9471 9472 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9473 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9474 match(Set dst (VectorMaskTrueCount mask)); 9475 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9476 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9477 ins_encode %{ 9478 int opcode = this->ideal_Opcode(); 9479 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9480 int mask_len = Matcher::vector_length(this, $mask); 9481 int mask_size = mask_len * type2aelembytes(mbt); 9482 int vlen_enc = vector_length_encoding(this, $mask); 9483 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9484 $tmp$$Register, mask_len, mask_size, vlen_enc); 9485 %} 9486 ins_pipe( pipe_slow ); 9487 %} 9488 9489 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9490 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9491 match(Set dst (VectorMaskTrueCount mask)); 9492 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9493 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9494 ins_encode %{ 9495 int opcode = this->ideal_Opcode(); 9496 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9497 int mask_len = Matcher::vector_length(this, $mask); 9498 int vlen_enc = vector_length_encoding(this, $mask); 9499 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9500 $tmp$$Register, mask_len, mbt, vlen_enc); 9501 %} 9502 ins_pipe( pipe_slow ); 9503 %} 9504 9505 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9506 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9507 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9508 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9509 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9510 ins_encode %{ 9511 int opcode = this->ideal_Opcode(); 9512 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9513 int mask_len = Matcher::vector_length(this, $mask); 9514 int vlen_enc = vector_length_encoding(this, $mask); 9515 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9516 $tmp$$Register, mask_len, mbt, vlen_enc); 9517 %} 9518 ins_pipe( pipe_slow ); 9519 %} 9520 9521 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9522 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9523 match(Set dst (VectorMaskFirstTrue mask)); 9524 match(Set dst (VectorMaskLastTrue mask)); 9525 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9526 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9527 ins_encode %{ 9528 int opcode = this->ideal_Opcode(); 9529 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9530 int mask_len = Matcher::vector_length(this, $mask); 9531 int mask_size = mask_len * type2aelembytes(mbt); 9532 int vlen_enc = vector_length_encoding(this, $mask); 9533 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9534 $tmp$$Register, mask_len, mask_size, vlen_enc); 9535 %} 9536 ins_pipe( pipe_slow ); 9537 %} 9538 9539 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9540 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr); 9541 match(Set dst (VectorMaskFirstTrue mask)); 9542 match(Set dst (VectorMaskLastTrue mask)); 9543 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9544 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9545 ins_encode %{ 9546 int opcode = this->ideal_Opcode(); 9547 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9548 int mask_len = Matcher::vector_length(this, $mask); 9549 int vlen_enc = vector_length_encoding(this, $mask); 9550 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9551 $tmp$$Register, mask_len, mbt, vlen_enc); 9552 %} 9553 ins_pipe( pipe_slow ); 9554 %} 9555 9556 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9557 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr); 9558 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9559 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9560 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9561 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9562 ins_encode %{ 9563 int opcode = this->ideal_Opcode(); 9564 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9565 int mask_len = Matcher::vector_length(this, $mask); 9566 int vlen_enc = vector_length_encoding(this, $mask); 9567 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9568 $tmp$$Register, mask_len, mbt, vlen_enc); 9569 %} 9570 ins_pipe( pipe_slow ); 9571 %} 9572 9573 // --------------------------------- Compress/Expand Operations --------------------------- 9574 #ifdef _LP64 9575 instruct vcompress_reg_avx(vec dst, vec src, vec mask, rRegI rtmp, rRegL rscratch, vec perm, vec xtmp, rFlagsReg cr) %{ 9576 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 9577 match(Set dst (CompressV src mask)); 9578 match(Set dst (ExpandV src mask)); 9579 effect(TEMP_DEF dst, TEMP perm, TEMP xtmp, TEMP rtmp, TEMP rscratch, KILL cr); 9580 format %{ "vector_compress $dst, $src, $mask \t!using $xtmp, $rtmp, $rscratch and $perm as TEMP" %} 9581 ins_encode %{ 9582 int opcode = this->ideal_Opcode(); 9583 int vlen_enc = vector_length_encoding(this); 9584 BasicType bt = Matcher::vector_element_basic_type(this); 9585 __ vector_compress_expand_avx2(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$XMMRegister, $rtmp$$Register, 9586 $rscratch$$Register, $perm$$XMMRegister, $xtmp$$XMMRegister, bt, vlen_enc); 9587 %} 9588 ins_pipe( pipe_slow ); 9589 %} 9590 #endif 9591 9592 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9593 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 9594 match(Set dst (CompressV src mask)); 9595 match(Set dst (ExpandV src mask)); 9596 format %{ "vector_compress_expand $dst, $src, $mask" %} 9597 ins_encode %{ 9598 int opcode = this->ideal_Opcode(); 9599 int vector_len = vector_length_encoding(this); 9600 BasicType bt = Matcher::vector_element_basic_type(this); 9601 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9602 %} 9603 ins_pipe( pipe_slow ); 9604 %} 9605 9606 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9607 match(Set dst (CompressM mask)); 9608 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9609 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9610 ins_encode %{ 9611 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9612 int mask_len = Matcher::vector_length(this); 9613 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9614 %} 9615 ins_pipe( pipe_slow ); 9616 %} 9617 9618 #endif // _LP64 9619 9620 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9621 9622 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9623 predicate(!VM_Version::supports_gfni()); 9624 match(Set dst (ReverseV src)); 9625 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9626 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9627 ins_encode %{ 9628 int vec_enc = vector_length_encoding(this); 9629 BasicType bt = Matcher::vector_element_basic_type(this); 9630 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9631 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9632 %} 9633 ins_pipe( pipe_slow ); 9634 %} 9635 9636 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9637 predicate(VM_Version::supports_gfni()); 9638 match(Set dst (ReverseV src)); 9639 effect(TEMP dst, TEMP xtmp); 9640 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9641 ins_encode %{ 9642 int vec_enc = vector_length_encoding(this); 9643 BasicType bt = Matcher::vector_element_basic_type(this); 9644 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1)); 9645 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9646 $xtmp$$XMMRegister); 9647 %} 9648 ins_pipe( pipe_slow ); 9649 %} 9650 9651 instruct vreverse_byte_reg(vec dst, vec src) %{ 9652 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9653 match(Set dst (ReverseBytesV src)); 9654 effect(TEMP dst); 9655 format %{ "vector_reverse_byte $dst, $src" %} 9656 ins_encode %{ 9657 int vec_enc = vector_length_encoding(this); 9658 BasicType bt = Matcher::vector_element_basic_type(this); 9659 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9660 %} 9661 ins_pipe( pipe_slow ); 9662 %} 9663 9664 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9665 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9666 match(Set dst (ReverseBytesV src)); 9667 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9668 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9669 ins_encode %{ 9670 int vec_enc = vector_length_encoding(this); 9671 BasicType bt = Matcher::vector_element_basic_type(this); 9672 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9673 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9674 %} 9675 ins_pipe( pipe_slow ); 9676 %} 9677 9678 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9679 9680 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9681 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9682 Matcher::vector_length_in_bytes(n->in(1)))); 9683 match(Set dst (CountLeadingZerosV src)); 9684 format %{ "vector_count_leading_zeros $dst, $src" %} 9685 ins_encode %{ 9686 int vlen_enc = vector_length_encoding(this, $src); 9687 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9688 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9689 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9690 %} 9691 ins_pipe( pipe_slow ); 9692 %} 9693 9694 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9695 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9696 Matcher::vector_length_in_bytes(n->in(1)))); 9697 match(Set dst (CountLeadingZerosV src mask)); 9698 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9699 ins_encode %{ 9700 int vlen_enc = vector_length_encoding(this, $src); 9701 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9702 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9703 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9704 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9705 %} 9706 ins_pipe( pipe_slow ); 9707 %} 9708 9709 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9710 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9711 VM_Version::supports_avx512cd() && 9712 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9713 match(Set dst (CountLeadingZerosV src)); 9714 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9715 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9716 ins_encode %{ 9717 int vlen_enc = vector_length_encoding(this, $src); 9718 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9719 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9720 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9721 %} 9722 ins_pipe( pipe_slow ); 9723 %} 9724 9725 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9726 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9727 match(Set dst (CountLeadingZerosV src)); 9728 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9729 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9730 ins_encode %{ 9731 int vlen_enc = vector_length_encoding(this, $src); 9732 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9733 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9734 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9735 $rtmp$$Register, true, vlen_enc); 9736 %} 9737 ins_pipe( pipe_slow ); 9738 %} 9739 9740 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9741 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9742 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9743 match(Set dst (CountLeadingZerosV src)); 9744 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9745 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9746 ins_encode %{ 9747 int vlen_enc = vector_length_encoding(this, $src); 9748 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9749 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9750 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9751 %} 9752 ins_pipe( pipe_slow ); 9753 %} 9754 9755 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9756 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9757 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9758 match(Set dst (CountLeadingZerosV src)); 9759 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9760 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9761 ins_encode %{ 9762 int vlen_enc = vector_length_encoding(this, $src); 9763 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9764 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9765 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9766 %} 9767 ins_pipe( pipe_slow ); 9768 %} 9769 9770 // ---------------------------------- Vector Masked Operations ------------------------------------ 9771 9772 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9773 match(Set dst (AddVB (Binary dst src2) mask)); 9774 match(Set dst (AddVS (Binary dst src2) mask)); 9775 match(Set dst (AddVI (Binary dst src2) mask)); 9776 match(Set dst (AddVL (Binary dst src2) mask)); 9777 match(Set dst (AddVF (Binary dst src2) mask)); 9778 match(Set dst (AddVD (Binary dst src2) mask)); 9779 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9780 ins_encode %{ 9781 int vlen_enc = vector_length_encoding(this); 9782 BasicType bt = Matcher::vector_element_basic_type(this); 9783 int opc = this->ideal_Opcode(); 9784 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9785 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9786 %} 9787 ins_pipe( pipe_slow ); 9788 %} 9789 9790 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9791 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9792 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9793 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9794 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9795 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9796 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9797 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9798 ins_encode %{ 9799 int vlen_enc = vector_length_encoding(this); 9800 BasicType bt = Matcher::vector_element_basic_type(this); 9801 int opc = this->ideal_Opcode(); 9802 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9803 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9804 %} 9805 ins_pipe( pipe_slow ); 9806 %} 9807 9808 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9809 match(Set dst (XorV (Binary dst src2) mask)); 9810 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9811 ins_encode %{ 9812 int vlen_enc = vector_length_encoding(this); 9813 BasicType bt = Matcher::vector_element_basic_type(this); 9814 int opc = this->ideal_Opcode(); 9815 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9816 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9817 %} 9818 ins_pipe( pipe_slow ); 9819 %} 9820 9821 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9822 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9823 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9824 ins_encode %{ 9825 int vlen_enc = vector_length_encoding(this); 9826 BasicType bt = Matcher::vector_element_basic_type(this); 9827 int opc = this->ideal_Opcode(); 9828 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9829 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9830 %} 9831 ins_pipe( pipe_slow ); 9832 %} 9833 9834 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9835 match(Set dst (OrV (Binary dst src2) mask)); 9836 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9837 ins_encode %{ 9838 int vlen_enc = vector_length_encoding(this); 9839 BasicType bt = Matcher::vector_element_basic_type(this); 9840 int opc = this->ideal_Opcode(); 9841 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9842 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9843 %} 9844 ins_pipe( pipe_slow ); 9845 %} 9846 9847 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9848 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9849 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9850 ins_encode %{ 9851 int vlen_enc = vector_length_encoding(this); 9852 BasicType bt = Matcher::vector_element_basic_type(this); 9853 int opc = this->ideal_Opcode(); 9854 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9855 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9856 %} 9857 ins_pipe( pipe_slow ); 9858 %} 9859 9860 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9861 match(Set dst (AndV (Binary dst src2) mask)); 9862 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9863 ins_encode %{ 9864 int vlen_enc = vector_length_encoding(this); 9865 BasicType bt = Matcher::vector_element_basic_type(this); 9866 int opc = this->ideal_Opcode(); 9867 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9868 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9869 %} 9870 ins_pipe( pipe_slow ); 9871 %} 9872 9873 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9874 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9875 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9876 ins_encode %{ 9877 int vlen_enc = vector_length_encoding(this); 9878 BasicType bt = Matcher::vector_element_basic_type(this); 9879 int opc = this->ideal_Opcode(); 9880 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9881 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9882 %} 9883 ins_pipe( pipe_slow ); 9884 %} 9885 9886 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9887 match(Set dst (SubVB (Binary dst src2) mask)); 9888 match(Set dst (SubVS (Binary dst src2) mask)); 9889 match(Set dst (SubVI (Binary dst src2) mask)); 9890 match(Set dst (SubVL (Binary dst src2) mask)); 9891 match(Set dst (SubVF (Binary dst src2) mask)); 9892 match(Set dst (SubVD (Binary dst src2) mask)); 9893 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9894 ins_encode %{ 9895 int vlen_enc = vector_length_encoding(this); 9896 BasicType bt = Matcher::vector_element_basic_type(this); 9897 int opc = this->ideal_Opcode(); 9898 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9899 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9900 %} 9901 ins_pipe( pipe_slow ); 9902 %} 9903 9904 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9905 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9906 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9907 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9908 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9909 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9910 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9911 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9912 ins_encode %{ 9913 int vlen_enc = vector_length_encoding(this); 9914 BasicType bt = Matcher::vector_element_basic_type(this); 9915 int opc = this->ideal_Opcode(); 9916 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9917 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9918 %} 9919 ins_pipe( pipe_slow ); 9920 %} 9921 9922 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9923 match(Set dst (MulVS (Binary dst src2) mask)); 9924 match(Set dst (MulVI (Binary dst src2) mask)); 9925 match(Set dst (MulVL (Binary dst src2) mask)); 9926 match(Set dst (MulVF (Binary dst src2) mask)); 9927 match(Set dst (MulVD (Binary dst src2) mask)); 9928 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9929 ins_encode %{ 9930 int vlen_enc = vector_length_encoding(this); 9931 BasicType bt = Matcher::vector_element_basic_type(this); 9932 int opc = this->ideal_Opcode(); 9933 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9934 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9935 %} 9936 ins_pipe( pipe_slow ); 9937 %} 9938 9939 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 9940 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 9941 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 9942 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 9943 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 9944 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 9945 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9946 ins_encode %{ 9947 int vlen_enc = vector_length_encoding(this); 9948 BasicType bt = Matcher::vector_element_basic_type(this); 9949 int opc = this->ideal_Opcode(); 9950 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9951 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9952 %} 9953 ins_pipe( pipe_slow ); 9954 %} 9955 9956 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 9957 match(Set dst (SqrtVF dst mask)); 9958 match(Set dst (SqrtVD dst mask)); 9959 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 9960 ins_encode %{ 9961 int vlen_enc = vector_length_encoding(this); 9962 BasicType bt = Matcher::vector_element_basic_type(this); 9963 int opc = this->ideal_Opcode(); 9964 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9965 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9966 %} 9967 ins_pipe( pipe_slow ); 9968 %} 9969 9970 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 9971 match(Set dst (DivVF (Binary dst src2) mask)); 9972 match(Set dst (DivVD (Binary dst src2) mask)); 9973 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9974 ins_encode %{ 9975 int vlen_enc = vector_length_encoding(this); 9976 BasicType bt = Matcher::vector_element_basic_type(this); 9977 int opc = this->ideal_Opcode(); 9978 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9979 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9980 %} 9981 ins_pipe( pipe_slow ); 9982 %} 9983 9984 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 9985 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 9986 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 9987 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9988 ins_encode %{ 9989 int vlen_enc = vector_length_encoding(this); 9990 BasicType bt = Matcher::vector_element_basic_type(this); 9991 int opc = this->ideal_Opcode(); 9992 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9993 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9994 %} 9995 ins_pipe( pipe_slow ); 9996 %} 9997 9998 9999 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10000 match(Set dst (RotateLeftV (Binary dst shift) mask)); 10001 match(Set dst (RotateRightV (Binary dst shift) mask)); 10002 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 10003 ins_encode %{ 10004 int vlen_enc = vector_length_encoding(this); 10005 BasicType bt = Matcher::vector_element_basic_type(this); 10006 int opc = this->ideal_Opcode(); 10007 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10008 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10009 %} 10010 ins_pipe( pipe_slow ); 10011 %} 10012 10013 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 10014 match(Set dst (RotateLeftV (Binary dst src2) mask)); 10015 match(Set dst (RotateRightV (Binary dst src2) mask)); 10016 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 10017 ins_encode %{ 10018 int vlen_enc = vector_length_encoding(this); 10019 BasicType bt = Matcher::vector_element_basic_type(this); 10020 int opc = this->ideal_Opcode(); 10021 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10022 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10023 %} 10024 ins_pipe( pipe_slow ); 10025 %} 10026 10027 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10028 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 10029 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 10030 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 10031 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 10032 ins_encode %{ 10033 int vlen_enc = vector_length_encoding(this); 10034 BasicType bt = Matcher::vector_element_basic_type(this); 10035 int opc = this->ideal_Opcode(); 10036 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10037 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10038 %} 10039 ins_pipe( pipe_slow ); 10040 %} 10041 10042 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10043 predicate(!n->as_ShiftV()->is_var_shift()); 10044 match(Set dst (LShiftVS (Binary dst src2) mask)); 10045 match(Set dst (LShiftVI (Binary dst src2) mask)); 10046 match(Set dst (LShiftVL (Binary dst src2) mask)); 10047 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10048 ins_encode %{ 10049 int vlen_enc = vector_length_encoding(this); 10050 BasicType bt = Matcher::vector_element_basic_type(this); 10051 int opc = this->ideal_Opcode(); 10052 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10053 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10054 %} 10055 ins_pipe( pipe_slow ); 10056 %} 10057 10058 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10059 predicate(n->as_ShiftV()->is_var_shift()); 10060 match(Set dst (LShiftVS (Binary dst src2) mask)); 10061 match(Set dst (LShiftVI (Binary dst src2) mask)); 10062 match(Set dst (LShiftVL (Binary dst src2) mask)); 10063 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 10064 ins_encode %{ 10065 int vlen_enc = vector_length_encoding(this); 10066 BasicType bt = Matcher::vector_element_basic_type(this); 10067 int opc = this->ideal_Opcode(); 10068 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10069 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10070 %} 10071 ins_pipe( pipe_slow ); 10072 %} 10073 10074 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10075 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 10076 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 10077 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 10078 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 10079 ins_encode %{ 10080 int vlen_enc = vector_length_encoding(this); 10081 BasicType bt = Matcher::vector_element_basic_type(this); 10082 int opc = this->ideal_Opcode(); 10083 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10084 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10085 %} 10086 ins_pipe( pipe_slow ); 10087 %} 10088 10089 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10090 predicate(!n->as_ShiftV()->is_var_shift()); 10091 match(Set dst (RShiftVS (Binary dst src2) mask)); 10092 match(Set dst (RShiftVI (Binary dst src2) mask)); 10093 match(Set dst (RShiftVL (Binary dst src2) mask)); 10094 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10095 ins_encode %{ 10096 int vlen_enc = vector_length_encoding(this); 10097 BasicType bt = Matcher::vector_element_basic_type(this); 10098 int opc = this->ideal_Opcode(); 10099 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10100 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10101 %} 10102 ins_pipe( pipe_slow ); 10103 %} 10104 10105 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10106 predicate(n->as_ShiftV()->is_var_shift()); 10107 match(Set dst (RShiftVS (Binary dst src2) mask)); 10108 match(Set dst (RShiftVI (Binary dst src2) mask)); 10109 match(Set dst (RShiftVL (Binary dst src2) mask)); 10110 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 10111 ins_encode %{ 10112 int vlen_enc = vector_length_encoding(this); 10113 BasicType bt = Matcher::vector_element_basic_type(this); 10114 int opc = this->ideal_Opcode(); 10115 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10116 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10117 %} 10118 ins_pipe( pipe_slow ); 10119 %} 10120 10121 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 10122 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 10123 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 10124 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 10125 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 10126 ins_encode %{ 10127 int vlen_enc = vector_length_encoding(this); 10128 BasicType bt = Matcher::vector_element_basic_type(this); 10129 int opc = this->ideal_Opcode(); 10130 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10131 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 10132 %} 10133 ins_pipe( pipe_slow ); 10134 %} 10135 10136 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 10137 predicate(!n->as_ShiftV()->is_var_shift()); 10138 match(Set dst (URShiftVS (Binary dst src2) mask)); 10139 match(Set dst (URShiftVI (Binary dst src2) mask)); 10140 match(Set dst (URShiftVL (Binary dst src2) mask)); 10141 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10142 ins_encode %{ 10143 int vlen_enc = vector_length_encoding(this); 10144 BasicType bt = Matcher::vector_element_basic_type(this); 10145 int opc = this->ideal_Opcode(); 10146 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10147 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 10148 %} 10149 ins_pipe( pipe_slow ); 10150 %} 10151 10152 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 10153 predicate(n->as_ShiftV()->is_var_shift()); 10154 match(Set dst (URShiftVS (Binary dst src2) mask)); 10155 match(Set dst (URShiftVI (Binary dst src2) mask)); 10156 match(Set dst (URShiftVL (Binary dst src2) mask)); 10157 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 10158 ins_encode %{ 10159 int vlen_enc = vector_length_encoding(this); 10160 BasicType bt = Matcher::vector_element_basic_type(this); 10161 int opc = this->ideal_Opcode(); 10162 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10163 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 10164 %} 10165 ins_pipe( pipe_slow ); 10166 %} 10167 10168 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 10169 match(Set dst (MaxV (Binary dst src2) mask)); 10170 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10171 ins_encode %{ 10172 int vlen_enc = vector_length_encoding(this); 10173 BasicType bt = Matcher::vector_element_basic_type(this); 10174 int opc = this->ideal_Opcode(); 10175 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10176 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10177 %} 10178 ins_pipe( pipe_slow ); 10179 %} 10180 10181 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 10182 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 10183 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 10184 ins_encode %{ 10185 int vlen_enc = vector_length_encoding(this); 10186 BasicType bt = Matcher::vector_element_basic_type(this); 10187 int opc = this->ideal_Opcode(); 10188 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10189 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10190 %} 10191 ins_pipe( pipe_slow ); 10192 %} 10193 10194 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 10195 match(Set dst (MinV (Binary dst src2) mask)); 10196 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10197 ins_encode %{ 10198 int vlen_enc = vector_length_encoding(this); 10199 BasicType bt = Matcher::vector_element_basic_type(this); 10200 int opc = this->ideal_Opcode(); 10201 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10202 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 10203 %} 10204 ins_pipe( pipe_slow ); 10205 %} 10206 10207 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 10208 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 10209 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 10210 ins_encode %{ 10211 int vlen_enc = vector_length_encoding(this); 10212 BasicType bt = Matcher::vector_element_basic_type(this); 10213 int opc = this->ideal_Opcode(); 10214 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10215 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 10216 %} 10217 ins_pipe( pipe_slow ); 10218 %} 10219 10220 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 10221 match(Set dst (VectorRearrange (Binary dst src2) mask)); 10222 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 10223 ins_encode %{ 10224 int vlen_enc = vector_length_encoding(this); 10225 BasicType bt = Matcher::vector_element_basic_type(this); 10226 int opc = this->ideal_Opcode(); 10227 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10228 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 10229 %} 10230 ins_pipe( pipe_slow ); 10231 %} 10232 10233 instruct vabs_masked(vec dst, kReg mask) %{ 10234 match(Set dst (AbsVB dst mask)); 10235 match(Set dst (AbsVS dst mask)); 10236 match(Set dst (AbsVI dst mask)); 10237 match(Set dst (AbsVL dst mask)); 10238 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 10239 ins_encode %{ 10240 int vlen_enc = vector_length_encoding(this); 10241 BasicType bt = Matcher::vector_element_basic_type(this); 10242 int opc = this->ideal_Opcode(); 10243 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10244 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 10245 %} 10246 ins_pipe( pipe_slow ); 10247 %} 10248 10249 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 10250 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 10251 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 10252 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10253 ins_encode %{ 10254 assert(UseFMA, "Needs FMA instructions support."); 10255 int vlen_enc = vector_length_encoding(this); 10256 BasicType bt = Matcher::vector_element_basic_type(this); 10257 int opc = this->ideal_Opcode(); 10258 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10259 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 10260 %} 10261 ins_pipe( pipe_slow ); 10262 %} 10263 10264 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 10265 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 10266 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 10267 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 10268 ins_encode %{ 10269 assert(UseFMA, "Needs FMA instructions support."); 10270 int vlen_enc = vector_length_encoding(this); 10271 BasicType bt = Matcher::vector_element_basic_type(this); 10272 int opc = this->ideal_Opcode(); 10273 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 10274 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 10275 %} 10276 ins_pipe( pipe_slow ); 10277 %} 10278 10279 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 10280 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 10281 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 10282 ins_encode %{ 10283 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 10284 int vlen_enc = vector_length_encoding(this, $src1); 10285 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 10286 10287 // Comparison i 10288 switch (src1_elem_bt) { 10289 case T_BYTE: { 10290 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10291 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10292 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10293 break; 10294 } 10295 case T_SHORT: { 10296 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10297 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10298 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10299 break; 10300 } 10301 case T_INT: { 10302 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10303 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10304 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10305 break; 10306 } 10307 case T_LONG: { 10308 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant); 10309 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10310 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10311 break; 10312 } 10313 case T_FLOAT: { 10314 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10315 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10316 break; 10317 } 10318 case T_DOUBLE: { 10319 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10320 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10321 break; 10322 } 10323 default: assert(false, "%s", type2name(src1_elem_bt)); break; 10324 } 10325 %} 10326 ins_pipe( pipe_slow ); 10327 %} 10328 10329 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 10330 predicate(Matcher::vector_length(n) <= 32); 10331 match(Set dst (MaskAll src)); 10332 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 10333 ins_encode %{ 10334 int mask_len = Matcher::vector_length(this); 10335 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 10336 %} 10337 ins_pipe( pipe_slow ); 10338 %} 10339 10340 #ifdef _LP64 10341 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10342 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10343 match(Set dst (XorVMask src (MaskAll cnt))); 10344 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10345 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10346 ins_encode %{ 10347 uint masklen = Matcher::vector_length(this); 10348 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10349 %} 10350 ins_pipe( pipe_slow ); 10351 %} 10352 10353 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10354 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10355 (Matcher::vector_length(n) == 16) || 10356 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10357 match(Set dst (XorVMask src (MaskAll cnt))); 10358 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10359 ins_encode %{ 10360 uint masklen = Matcher::vector_length(this); 10361 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10362 %} 10363 ins_pipe( pipe_slow ); 10364 %} 10365 10366 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10367 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8); 10368 match(Set dst (VectorLongToMask src)); 10369 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10370 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10371 ins_encode %{ 10372 int mask_len = Matcher::vector_length(this); 10373 int vec_enc = vector_length_encoding(mask_len); 10374 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10375 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10376 %} 10377 ins_pipe( pipe_slow ); 10378 %} 10379 10380 10381 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10382 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8); 10383 match(Set dst (VectorLongToMask src)); 10384 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10385 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10386 ins_encode %{ 10387 int mask_len = Matcher::vector_length(this); 10388 assert(mask_len <= 32, "invalid mask length"); 10389 int vec_enc = vector_length_encoding(mask_len); 10390 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10391 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10392 %} 10393 ins_pipe( pipe_slow ); 10394 %} 10395 10396 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10397 predicate(n->bottom_type()->isa_vectmask()); 10398 match(Set dst (VectorLongToMask src)); 10399 format %{ "long_to_mask_evex $dst, $src\t!" %} 10400 ins_encode %{ 10401 __ kmov($dst$$KRegister, $src$$Register); 10402 %} 10403 ins_pipe( pipe_slow ); 10404 %} 10405 #endif 10406 10407 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10408 match(Set dst (AndVMask src1 src2)); 10409 match(Set dst (OrVMask src1 src2)); 10410 match(Set dst (XorVMask src1 src2)); 10411 effect(TEMP kscratch); 10412 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10413 ins_encode %{ 10414 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10415 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10416 assert(Type::equals(mask1->bottom_type(), mask2->bottom_type()), "Mask types must be equal"); 10417 uint masklen = Matcher::vector_length(this); 10418 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10419 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10420 %} 10421 ins_pipe( pipe_slow ); 10422 %} 10423 10424 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10425 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10426 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10427 ins_encode %{ 10428 int vlen_enc = vector_length_encoding(this); 10429 BasicType bt = Matcher::vector_element_basic_type(this); 10430 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10431 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10432 %} 10433 ins_pipe( pipe_slow ); 10434 %} 10435 10436 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10437 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10438 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10439 ins_encode %{ 10440 int vlen_enc = vector_length_encoding(this); 10441 BasicType bt = Matcher::vector_element_basic_type(this); 10442 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10443 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10444 %} 10445 ins_pipe( pipe_slow ); 10446 %} 10447 10448 instruct castMM(kReg dst) 10449 %{ 10450 match(Set dst (CastVV dst)); 10451 10452 size(0); 10453 format %{ "# castVV of $dst" %} 10454 ins_encode(/* empty encoding */); 10455 ins_cost(0); 10456 ins_pipe(empty); 10457 %} 10458 10459 instruct castVV(vec dst) 10460 %{ 10461 match(Set dst (CastVV dst)); 10462 10463 size(0); 10464 format %{ "# castVV of $dst" %} 10465 ins_encode(/* empty encoding */); 10466 ins_cost(0); 10467 ins_pipe(empty); 10468 %} 10469 10470 instruct castVVLeg(legVec dst) 10471 %{ 10472 match(Set dst (CastVV dst)); 10473 10474 size(0); 10475 format %{ "# castVV of $dst" %} 10476 ins_encode(/* empty encoding */); 10477 ins_cost(0); 10478 ins_pipe(empty); 10479 %} 10480 10481 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10482 %{ 10483 match(Set dst (IsInfiniteF src)); 10484 effect(TEMP ktmp, KILL cr); 10485 format %{ "float_class_check $dst, $src" %} 10486 ins_encode %{ 10487 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10488 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10489 %} 10490 ins_pipe(pipe_slow); 10491 %} 10492 10493 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10494 %{ 10495 match(Set dst (IsInfiniteD src)); 10496 effect(TEMP ktmp, KILL cr); 10497 format %{ "double_class_check $dst, $src" %} 10498 ins_encode %{ 10499 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10500 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10501 %} 10502 ins_pipe(pipe_slow); 10503 %}