1 // 2 // Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 // AVX3 Mask Registers. 632 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 633 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 634 635 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 636 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 637 638 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 639 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 640 641 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 642 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 643 644 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 645 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 646 647 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 648 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 649 650 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 651 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 652 653 654 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 655 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 656 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 657 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 658 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 659 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 660 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 661 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 662 #ifdef _LP64 663 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 664 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 665 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 666 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 667 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 668 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 669 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 670 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 671 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 672 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 673 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 674 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 675 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 676 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 677 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 678 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 679 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 680 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 681 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 682 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 683 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 684 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 685 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 686 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 687 #endif 688 ); 689 690 alloc_class chunk2(K7, K7_H, 691 K6, K6_H, 692 K5, K5_H, 693 K4, K4_H, 694 K3, K3_H, 695 K2, K2_H, 696 K1, K1_H); 697 698 reg_class vectmask_reg(K1, K1_H, 699 K2, K2_H, 700 K3, K3_H, 701 K4, K4_H, 702 K5, K5_H, 703 K6, K6_H, 704 K7, K7_H); 705 706 reg_class vectmask_reg_K1(K1, K1_H); 707 reg_class vectmask_reg_K2(K2, K2_H); 708 reg_class vectmask_reg_K3(K3, K3_H); 709 reg_class vectmask_reg_K4(K4, K4_H); 710 reg_class vectmask_reg_K5(K5, K5_H); 711 reg_class vectmask_reg_K6(K6, K6_H); 712 reg_class vectmask_reg_K7(K7, K7_H); 713 714 // flags allocation class should be last. 715 alloc_class chunk3(RFLAGS); 716 717 718 // Singleton class for condition codes 719 reg_class int_flags(RFLAGS); 720 721 // Class for pre evex float registers 722 reg_class float_reg_legacy(XMM0, 723 XMM1, 724 XMM2, 725 XMM3, 726 XMM4, 727 XMM5, 728 XMM6, 729 XMM7 730 #ifdef _LP64 731 ,XMM8, 732 XMM9, 733 XMM10, 734 XMM11, 735 XMM12, 736 XMM13, 737 XMM14, 738 XMM15 739 #endif 740 ); 741 742 // Class for evex float registers 743 reg_class float_reg_evex(XMM0, 744 XMM1, 745 XMM2, 746 XMM3, 747 XMM4, 748 XMM5, 749 XMM6, 750 XMM7 751 #ifdef _LP64 752 ,XMM8, 753 XMM9, 754 XMM10, 755 XMM11, 756 XMM12, 757 XMM13, 758 XMM14, 759 XMM15, 760 XMM16, 761 XMM17, 762 XMM18, 763 XMM19, 764 XMM20, 765 XMM21, 766 XMM22, 767 XMM23, 768 XMM24, 769 XMM25, 770 XMM26, 771 XMM27, 772 XMM28, 773 XMM29, 774 XMM30, 775 XMM31 776 #endif 777 ); 778 779 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 780 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 781 782 // Class for pre evex double registers 783 reg_class double_reg_legacy(XMM0, XMM0b, 784 XMM1, XMM1b, 785 XMM2, XMM2b, 786 XMM3, XMM3b, 787 XMM4, XMM4b, 788 XMM5, XMM5b, 789 XMM6, XMM6b, 790 XMM7, XMM7b 791 #ifdef _LP64 792 ,XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b 800 #endif 801 ); 802 803 // Class for evex double registers 804 reg_class double_reg_evex(XMM0, XMM0b, 805 XMM1, XMM1b, 806 XMM2, XMM2b, 807 XMM3, XMM3b, 808 XMM4, XMM4b, 809 XMM5, XMM5b, 810 XMM6, XMM6b, 811 XMM7, XMM7b 812 #ifdef _LP64 813 ,XMM8, XMM8b, 814 XMM9, XMM9b, 815 XMM10, XMM10b, 816 XMM11, XMM11b, 817 XMM12, XMM12b, 818 XMM13, XMM13b, 819 XMM14, XMM14b, 820 XMM15, XMM15b, 821 XMM16, XMM16b, 822 XMM17, XMM17b, 823 XMM18, XMM18b, 824 XMM19, XMM19b, 825 XMM20, XMM20b, 826 XMM21, XMM21b, 827 XMM22, XMM22b, 828 XMM23, XMM23b, 829 XMM24, XMM24b, 830 XMM25, XMM25b, 831 XMM26, XMM26b, 832 XMM27, XMM27b, 833 XMM28, XMM28b, 834 XMM29, XMM29b, 835 XMM30, XMM30b, 836 XMM31, XMM31b 837 #endif 838 ); 839 840 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 841 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 842 843 // Class for pre evex 32bit vector registers 844 reg_class vectors_reg_legacy(XMM0, 845 XMM1, 846 XMM2, 847 XMM3, 848 XMM4, 849 XMM5, 850 XMM6, 851 XMM7 852 #ifdef _LP64 853 ,XMM8, 854 XMM9, 855 XMM10, 856 XMM11, 857 XMM12, 858 XMM13, 859 XMM14, 860 XMM15 861 #endif 862 ); 863 864 // Class for evex 32bit vector registers 865 reg_class vectors_reg_evex(XMM0, 866 XMM1, 867 XMM2, 868 XMM3, 869 XMM4, 870 XMM5, 871 XMM6, 872 XMM7 873 #ifdef _LP64 874 ,XMM8, 875 XMM9, 876 XMM10, 877 XMM11, 878 XMM12, 879 XMM13, 880 XMM14, 881 XMM15, 882 XMM16, 883 XMM17, 884 XMM18, 885 XMM19, 886 XMM20, 887 XMM21, 888 XMM22, 889 XMM23, 890 XMM24, 891 XMM25, 892 XMM26, 893 XMM27, 894 XMM28, 895 XMM29, 896 XMM30, 897 XMM31 898 #endif 899 ); 900 901 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 902 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 903 904 // Class for all 64bit vector registers 905 reg_class vectord_reg_legacy(XMM0, XMM0b, 906 XMM1, XMM1b, 907 XMM2, XMM2b, 908 XMM3, XMM3b, 909 XMM4, XMM4b, 910 XMM5, XMM5b, 911 XMM6, XMM6b, 912 XMM7, XMM7b 913 #ifdef _LP64 914 ,XMM8, XMM8b, 915 XMM9, XMM9b, 916 XMM10, XMM10b, 917 XMM11, XMM11b, 918 XMM12, XMM12b, 919 XMM13, XMM13b, 920 XMM14, XMM14b, 921 XMM15, XMM15b 922 #endif 923 ); 924 925 // Class for all 64bit vector registers 926 reg_class vectord_reg_evex(XMM0, XMM0b, 927 XMM1, XMM1b, 928 XMM2, XMM2b, 929 XMM3, XMM3b, 930 XMM4, XMM4b, 931 XMM5, XMM5b, 932 XMM6, XMM6b, 933 XMM7, XMM7b 934 #ifdef _LP64 935 ,XMM8, XMM8b, 936 XMM9, XMM9b, 937 XMM10, XMM10b, 938 XMM11, XMM11b, 939 XMM12, XMM12b, 940 XMM13, XMM13b, 941 XMM14, XMM14b, 942 XMM15, XMM15b, 943 XMM16, XMM16b, 944 XMM17, XMM17b, 945 XMM18, XMM18b, 946 XMM19, XMM19b, 947 XMM20, XMM20b, 948 XMM21, XMM21b, 949 XMM22, XMM22b, 950 XMM23, XMM23b, 951 XMM24, XMM24b, 952 XMM25, XMM25b, 953 XMM26, XMM26b, 954 XMM27, XMM27b, 955 XMM28, XMM28b, 956 XMM29, XMM29b, 957 XMM30, XMM30b, 958 XMM31, XMM31b 959 #endif 960 ); 961 962 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 963 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 964 965 // Class for all 128bit vector registers 966 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 967 XMM1, XMM1b, XMM1c, XMM1d, 968 XMM2, XMM2b, XMM2c, XMM2d, 969 XMM3, XMM3b, XMM3c, XMM3d, 970 XMM4, XMM4b, XMM4c, XMM4d, 971 XMM5, XMM5b, XMM5c, XMM5d, 972 XMM6, XMM6b, XMM6c, XMM6d, 973 XMM7, XMM7b, XMM7c, XMM7d 974 #ifdef _LP64 975 ,XMM8, XMM8b, XMM8c, XMM8d, 976 XMM9, XMM9b, XMM9c, XMM9d, 977 XMM10, XMM10b, XMM10c, XMM10d, 978 XMM11, XMM11b, XMM11c, XMM11d, 979 XMM12, XMM12b, XMM12c, XMM12d, 980 XMM13, XMM13b, XMM13c, XMM13d, 981 XMM14, XMM14b, XMM14c, XMM14d, 982 XMM15, XMM15b, XMM15c, XMM15d 983 #endif 984 ); 985 986 // Class for all 128bit vector registers 987 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 988 XMM1, XMM1b, XMM1c, XMM1d, 989 XMM2, XMM2b, XMM2c, XMM2d, 990 XMM3, XMM3b, XMM3c, XMM3d, 991 XMM4, XMM4b, XMM4c, XMM4d, 992 XMM5, XMM5b, XMM5c, XMM5d, 993 XMM6, XMM6b, XMM6c, XMM6d, 994 XMM7, XMM7b, XMM7c, XMM7d 995 #ifdef _LP64 996 ,XMM8, XMM8b, XMM8c, XMM8d, 997 XMM9, XMM9b, XMM9c, XMM9d, 998 XMM10, XMM10b, XMM10c, XMM10d, 999 XMM11, XMM11b, XMM11c, XMM11d, 1000 XMM12, XMM12b, XMM12c, XMM12d, 1001 XMM13, XMM13b, XMM13c, XMM13d, 1002 XMM14, XMM14b, XMM14c, XMM14d, 1003 XMM15, XMM15b, XMM15c, XMM15d, 1004 XMM16, XMM16b, XMM16c, XMM16d, 1005 XMM17, XMM17b, XMM17c, XMM17d, 1006 XMM18, XMM18b, XMM18c, XMM18d, 1007 XMM19, XMM19b, XMM19c, XMM19d, 1008 XMM20, XMM20b, XMM20c, XMM20d, 1009 XMM21, XMM21b, XMM21c, XMM21d, 1010 XMM22, XMM22b, XMM22c, XMM22d, 1011 XMM23, XMM23b, XMM23c, XMM23d, 1012 XMM24, XMM24b, XMM24c, XMM24d, 1013 XMM25, XMM25b, XMM25c, XMM25d, 1014 XMM26, XMM26b, XMM26c, XMM26d, 1015 XMM27, XMM27b, XMM27c, XMM27d, 1016 XMM28, XMM28b, XMM28c, XMM28d, 1017 XMM29, XMM29b, XMM29c, XMM29d, 1018 XMM30, XMM30b, XMM30c, XMM30d, 1019 XMM31, XMM31b, XMM31c, XMM31d 1020 #endif 1021 ); 1022 1023 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1024 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1025 1026 // Class for all 256bit vector registers 1027 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1028 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1029 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1030 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1031 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1032 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1033 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1034 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1035 #ifdef _LP64 1036 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1037 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1038 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1039 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1040 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1041 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1042 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1043 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1044 #endif 1045 ); 1046 1047 // Class for all 256bit vector registers 1048 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1049 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1050 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1051 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1052 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1053 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1054 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1055 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1056 #ifdef _LP64 1057 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1058 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1059 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1060 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1061 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1062 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1063 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1064 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1065 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1066 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1067 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1068 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1069 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1070 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1071 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1072 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1073 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1074 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1075 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1076 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1077 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1078 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1079 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1080 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1081 #endif 1082 ); 1083 1084 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1085 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1086 1087 // Class for all 512bit vector registers 1088 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1089 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1090 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1091 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1092 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1093 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1094 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1095 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1096 #ifdef _LP64 1097 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1098 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1099 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1100 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1101 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1102 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1103 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1104 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1105 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1106 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1107 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1108 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1109 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1110 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1111 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1112 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1113 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1114 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1115 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1116 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1117 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1118 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1119 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1120 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1121 #endif 1122 ); 1123 1124 // Class for restricted 512bit vector registers 1125 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1126 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1127 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1128 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1129 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1130 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1131 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1132 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1133 #ifdef _LP64 1134 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1135 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1136 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1137 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1138 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1139 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1140 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1141 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1142 #endif 1143 ); 1144 1145 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1146 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1147 1148 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1149 %} 1150 1151 1152 //----------SOURCE BLOCK------------------------------------------------------- 1153 // This is a block of C++ code which provides values, functions, and 1154 // definitions necessary in the rest of the architecture description 1155 1156 source_hpp %{ 1157 // Header information of the source block. 1158 // Method declarations/definitions which are used outside 1159 // the ad-scope can conveniently be defined here. 1160 // 1161 // To keep related declarations/definitions/uses close together, 1162 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1163 1164 #include "runtime/vm_version.hpp" 1165 1166 class NativeJump; 1167 1168 class CallStubImpl { 1169 1170 //-------------------------------------------------------------- 1171 //---< Used for optimization in Compile::shorten_branches >--- 1172 //-------------------------------------------------------------- 1173 1174 public: 1175 // Size of call trampoline stub. 1176 static uint size_call_trampoline() { 1177 return 0; // no call trampolines on this platform 1178 } 1179 1180 // number of relocations needed by a call trampoline stub 1181 static uint reloc_call_trampoline() { 1182 return 0; // no call trampolines on this platform 1183 } 1184 }; 1185 1186 class HandlerImpl { 1187 1188 public: 1189 1190 static int emit_exception_handler(CodeBuffer &cbuf); 1191 static int emit_deopt_handler(CodeBuffer& cbuf); 1192 1193 static uint size_exception_handler() { 1194 // NativeCall instruction size is the same as NativeJump. 1195 // exception handler starts out as jump and can be patched to 1196 // a call be deoptimization. (4932387) 1197 // Note that this value is also credited (in output.cpp) to 1198 // the size of the code section. 1199 return NativeJump::instruction_size; 1200 } 1201 1202 #ifdef _LP64 1203 static uint size_deopt_handler() { 1204 // three 5 byte instructions plus one move for unreachable address. 1205 return 15+3; 1206 } 1207 #else 1208 static uint size_deopt_handler() { 1209 // NativeCall instruction size is the same as NativeJump. 1210 // exception handler starts out as jump and can be patched to 1211 // a call be deoptimization. (4932387) 1212 // Note that this value is also credited (in output.cpp) to 1213 // the size of the code section. 1214 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1215 } 1216 #endif 1217 }; 1218 1219 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1220 switch(bytes) { 1221 case 4: // fall-through 1222 case 8: // fall-through 1223 case 16: return Assembler::AVX_128bit; 1224 case 32: return Assembler::AVX_256bit; 1225 case 64: return Assembler::AVX_512bit; 1226 1227 default: { 1228 ShouldNotReachHere(); 1229 return Assembler::AVX_NoVec; 1230 } 1231 } 1232 } 1233 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1236 } 1237 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1239 uint def_idx = use->operand_index(opnd); 1240 Node* def = use->in(def_idx); 1241 return vector_length_encoding(def); 1242 } 1243 1244 static inline bool is_vector_popcount_predicate(BasicType bt) { 1245 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1246 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1247 } 1248 1249 static inline bool is_unsigned_booltest_pred(int bt) { 1250 return ((bt & BoolTest::unsigned_compare) == BoolTest::unsigned_compare); 1251 } 1252 1253 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1254 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1255 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1256 } 1257 1258 class Node::PD { 1259 public: 1260 enum NodeFlags { 1261 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1262 _last_flag = Flag_intel_jcc_erratum 1263 }; 1264 }; 1265 1266 %} // end source_hpp 1267 1268 source %{ 1269 1270 #include "opto/addnode.hpp" 1271 #include "c2_intelJccErratum_x86.hpp" 1272 1273 void PhaseOutput::pd_perform_mach_node_analysis() { 1274 if (VM_Version::has_intel_jcc_erratum()) { 1275 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1276 _buf_sizes._code += extra_padding; 1277 } 1278 } 1279 1280 int MachNode::pd_alignment_required() const { 1281 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1282 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1283 return IntelJccErratum::largest_jcc_size() + 1; 1284 } else { 1285 return 1; 1286 } 1287 } 1288 1289 int MachNode::compute_padding(int current_offset) const { 1290 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1291 Compile* C = Compile::current(); 1292 PhaseOutput* output = C->output(); 1293 Block* block = output->block(); 1294 int index = output->index(); 1295 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1296 } else { 1297 return 0; 1298 } 1299 } 1300 1301 // Emit exception handler code. 1302 // Stuff framesize into a register and call a VM stub routine. 1303 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1304 1305 // Note that the code buffer's insts_mark is always relative to insts. 1306 // That's why we must use the macroassembler to generate a handler. 1307 C2_MacroAssembler _masm(&cbuf); 1308 address base = __ start_a_stub(size_exception_handler()); 1309 if (base == NULL) { 1310 ciEnv::current()->record_failure("CodeCache is full"); 1311 return 0; // CodeBuffer::expand failed 1312 } 1313 int offset = __ offset(); 1314 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1315 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1316 __ end_a_stub(); 1317 return offset; 1318 } 1319 1320 // Emit deopt handler code. 1321 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1322 1323 // Note that the code buffer's insts_mark is always relative to insts. 1324 // That's why we must use the macroassembler to generate a handler. 1325 C2_MacroAssembler _masm(&cbuf); 1326 address base = __ start_a_stub(size_deopt_handler()); 1327 if (base == NULL) { 1328 ciEnv::current()->record_failure("CodeCache is full"); 1329 return 0; // CodeBuffer::expand failed 1330 } 1331 int offset = __ offset(); 1332 1333 #ifdef _LP64 1334 address the_pc = (address) __ pc(); 1335 Label next; 1336 // push a "the_pc" on the stack without destroying any registers 1337 // as they all may be live. 1338 1339 // push address of "next" 1340 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1341 __ bind(next); 1342 // adjust it so it matches "the_pc" 1343 __ subptr(Address(rsp, 0), __ offset() - offset); 1344 #else 1345 InternalAddress here(__ pc()); 1346 __ pushptr(here.addr()); 1347 #endif 1348 1349 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1350 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1351 __ end_a_stub(); 1352 return offset; 1353 } 1354 1355 Assembler::Width widthForType(BasicType bt) { 1356 if (bt == T_BYTE) { 1357 return Assembler::B; 1358 } else if (bt == T_SHORT) { 1359 return Assembler::W; 1360 } else if (bt == T_INT) { 1361 return Assembler::D; 1362 } else { 1363 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1364 return Assembler::Q; 1365 } 1366 } 1367 1368 //============================================================================= 1369 1370 // Float masks come from different places depending on platform. 1371 #ifdef _LP64 1372 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1373 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1374 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1375 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1376 #else 1377 static address float_signmask() { return (address)float_signmask_pool; } 1378 static address float_signflip() { return (address)float_signflip_pool; } 1379 static address double_signmask() { return (address)double_signmask_pool; } 1380 static address double_signflip() { return (address)double_signflip_pool; } 1381 #endif 1382 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1383 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1384 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1385 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1386 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1387 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1388 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1389 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1390 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1391 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1392 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1393 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1394 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1395 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1396 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1397 1398 //============================================================================= 1399 const bool Matcher::match_rule_supported(int opcode) { 1400 if (!has_match_rule(opcode)) { 1401 return false; // no match rule present 1402 } 1403 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1404 switch (opcode) { 1405 case Op_AbsVL: 1406 case Op_StoreVectorScatter: 1407 if (UseAVX < 3) { 1408 return false; 1409 } 1410 break; 1411 case Op_PopCountI: 1412 case Op_PopCountL: 1413 if (!UsePopCountInstruction) { 1414 return false; 1415 } 1416 break; 1417 case Op_PopCountVI: 1418 if (UseAVX < 2) { 1419 return false; 1420 } 1421 break; 1422 case Op_PopCountVL: 1423 if (UseAVX < 2) { 1424 return false; 1425 } 1426 break; 1427 case Op_MulVI: 1428 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1429 return false; 1430 } 1431 break; 1432 case Op_MulVL: 1433 if (UseSSE < 4) { // only with SSE4_1 or AVX 1434 return false; 1435 } 1436 break; 1437 case Op_MulReductionVL: 1438 if (VM_Version::supports_avx512dq() == false) { 1439 return false; 1440 } 1441 break; 1442 case Op_AddReductionVL: 1443 if (UseSSE < 2) { // requires at least SSE2 1444 return false; 1445 } 1446 break; 1447 case Op_AbsVB: 1448 case Op_AbsVS: 1449 case Op_AbsVI: 1450 case Op_AddReductionVI: 1451 case Op_AndReductionV: 1452 case Op_OrReductionV: 1453 case Op_XorReductionV: 1454 if (UseSSE < 3) { // requires at least SSSE3 1455 return false; 1456 } 1457 break; 1458 case Op_VectorLoadShuffle: 1459 case Op_VectorRearrange: 1460 case Op_MulReductionVI: 1461 if (UseSSE < 4) { // requires at least SSE4 1462 return false; 1463 } 1464 break; 1465 case Op_SqrtVD: 1466 case Op_SqrtVF: 1467 case Op_VectorMaskCmp: 1468 case Op_VectorCastB2X: 1469 case Op_VectorCastS2X: 1470 case Op_VectorCastI2X: 1471 case Op_VectorCastL2X: 1472 case Op_VectorCastF2X: 1473 case Op_VectorCastD2X: 1474 case Op_VectorUCastB2X: 1475 case Op_VectorUCastS2X: 1476 case Op_VectorUCastI2X: 1477 if (UseAVX < 1) { // enabled for AVX only 1478 return false; 1479 } 1480 break; 1481 case Op_RoundVF: 1482 if (UseAVX < 2) { // enabled for AVX2 only 1483 return false; 1484 } 1485 break; 1486 case Op_RoundVD: 1487 if (UseAVX < 3) { 1488 return false; // enabled for AVX3 only 1489 } 1490 break; 1491 case Op_CompareAndSwapL: 1492 #ifdef _LP64 1493 case Op_CompareAndSwapP: 1494 #endif 1495 if (!VM_Version::supports_cx8()) { 1496 return false; 1497 } 1498 break; 1499 case Op_CMoveVF: 1500 case Op_CMoveVD: 1501 if (UseAVX < 1) { // enabled for AVX only 1502 return false; 1503 } 1504 break; 1505 case Op_StrIndexOf: 1506 if (!UseSSE42Intrinsics) { 1507 return false; 1508 } 1509 break; 1510 case Op_StrIndexOfChar: 1511 if (!UseSSE42Intrinsics) { 1512 return false; 1513 } 1514 break; 1515 case Op_OnSpinWait: 1516 if (VM_Version::supports_on_spin_wait() == false) { 1517 return false; 1518 } 1519 break; 1520 case Op_MulVB: 1521 case Op_LShiftVB: 1522 case Op_RShiftVB: 1523 case Op_URShiftVB: 1524 case Op_VectorInsert: 1525 case Op_VectorLoadMask: 1526 case Op_VectorStoreMask: 1527 case Op_VectorBlend: 1528 if (UseSSE < 4) { 1529 return false; 1530 } 1531 break; 1532 #ifdef _LP64 1533 case Op_MaxD: 1534 case Op_MaxF: 1535 case Op_MinD: 1536 case Op_MinF: 1537 if (UseAVX < 1) { // enabled for AVX only 1538 return false; 1539 } 1540 break; 1541 #endif 1542 case Op_CacheWB: 1543 case Op_CacheWBPreSync: 1544 case Op_CacheWBPostSync: 1545 if (!VM_Version::supports_data_cache_line_flush()) { 1546 return false; 1547 } 1548 break; 1549 case Op_ExtractB: 1550 case Op_ExtractL: 1551 case Op_ExtractI: 1552 case Op_RoundDoubleMode: 1553 if (UseSSE < 4) { 1554 return false; 1555 } 1556 break; 1557 case Op_RoundDoubleModeV: 1558 if (VM_Version::supports_avx() == false) { 1559 return false; // 128bit vroundpd is not available 1560 } 1561 break; 1562 case Op_LoadVectorGather: 1563 if (UseAVX < 2) { 1564 return false; 1565 } 1566 break; 1567 case Op_FmaVD: 1568 case Op_FmaVF: 1569 if (!UseFMA) { 1570 return false; 1571 } 1572 break; 1573 case Op_MacroLogicV: 1574 if (UseAVX < 3 || !UseVectorMacroLogic) { 1575 return false; 1576 } 1577 break; 1578 1579 case Op_VectorCmpMasked: 1580 case Op_VectorMaskGen: 1581 case Op_LoadVectorMasked: 1582 case Op_StoreVectorMasked: 1583 if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { 1584 return false; 1585 } 1586 break; 1587 case Op_VectorMaskFirstTrue: 1588 case Op_VectorMaskLastTrue: 1589 case Op_VectorMaskTrueCount: 1590 case Op_VectorMaskToLong: 1591 if (!is_LP64 || UseAVX < 1) { 1592 return false; 1593 } 1594 break; 1595 case Op_RoundF: 1596 case Op_RoundD: 1597 if (!is_LP64) { 1598 return false; 1599 } 1600 break; 1601 case Op_CopySignD: 1602 case Op_CopySignF: 1603 if (UseAVX < 3 || !is_LP64) { 1604 return false; 1605 } 1606 if (!VM_Version::supports_avx512vl()) { 1607 return false; 1608 } 1609 break; 1610 #ifndef _LP64 1611 case Op_AddReductionVF: 1612 case Op_AddReductionVD: 1613 case Op_MulReductionVF: 1614 case Op_MulReductionVD: 1615 if (UseSSE < 1) { // requires at least SSE 1616 return false; 1617 } 1618 break; 1619 case Op_MulAddVS2VI: 1620 case Op_RShiftVL: 1621 case Op_AbsVD: 1622 case Op_NegVD: 1623 if (UseSSE < 2) { 1624 return false; 1625 } 1626 break; 1627 #endif // !LP64 1628 case Op_SignumF: 1629 if (UseSSE < 1) { 1630 return false; 1631 } 1632 break; 1633 case Op_SignumD: 1634 if (UseSSE < 2) { 1635 return false; 1636 } 1637 break; 1638 case Op_CompressM: 1639 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1640 return false; 1641 } 1642 break; 1643 case Op_CompressV: 1644 case Op_ExpandV: 1645 if (!VM_Version::supports_avx512vl()) { 1646 return false; 1647 } 1648 break; 1649 case Op_SqrtF: 1650 if (UseSSE < 1) { 1651 return false; 1652 } 1653 break; 1654 case Op_SqrtD: 1655 #ifdef _LP64 1656 if (UseSSE < 2) { 1657 return false; 1658 } 1659 #else 1660 // x86_32.ad has a special match rule for SqrtD. 1661 // Together with common x86 rules, this handles all UseSSE cases. 1662 #endif 1663 break; 1664 } 1665 return true; // Match rules are supported by default. 1666 } 1667 1668 //------------------------------------------------------------------------ 1669 1670 static inline bool is_pop_count_instr_target(BasicType bt) { 1671 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1672 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1673 } 1674 1675 // Identify extra cases that we might want to provide match rules for vector nodes and 1676 // other intrinsics guarded with vector length (vlen) and element type (bt). 1677 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1678 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1679 if (!match_rule_supported(opcode)) { 1680 return false; 1681 } 1682 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1683 // * SSE2 supports 128bit vectors for all types; 1684 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1685 // * AVX2 supports 256bit vectors for all types; 1686 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1687 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1688 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1689 // And MaxVectorSize is taken into account as well. 1690 if (!vector_size_supported(bt, vlen)) { 1691 return false; 1692 } 1693 // Special cases which require vector length follow: 1694 // * implementation limitations 1695 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1696 // * 128bit vroundpd instruction is present only in AVX1 1697 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1698 switch (opcode) { 1699 case Op_AbsVF: 1700 case Op_NegVF: 1701 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1702 return false; // 512bit vandps and vxorps are not available 1703 } 1704 break; 1705 case Op_AbsVD: 1706 case Op_NegVD: 1707 case Op_MulVL: 1708 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1709 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1710 } 1711 break; 1712 case Op_CMoveVF: 1713 if (vlen != 8) { 1714 return false; // implementation limitation (only vcmov8F_reg is present) 1715 } 1716 break; 1717 case Op_RotateRightV: 1718 case Op_RotateLeftV: 1719 if (bt != T_INT && bt != T_LONG) { 1720 return false; 1721 } // fallthrough 1722 case Op_MacroLogicV: 1723 if (!VM_Version::supports_evex() || 1724 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1725 return false; 1726 } 1727 break; 1728 case Op_ClearArray: 1729 case Op_VectorMaskGen: 1730 case Op_VectorCmpMasked: 1731 case Op_LoadVectorMasked: 1732 case Op_StoreVectorMasked: 1733 if (!is_LP64 || !VM_Version::supports_avx512bw()) { 1734 return false; 1735 } 1736 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1737 return false; 1738 } 1739 break; 1740 case Op_CMoveVD: 1741 if (vlen != 4) { 1742 return false; // implementation limitation (only vcmov4D_reg is present) 1743 } 1744 break; 1745 case Op_MaxV: 1746 case Op_MinV: 1747 if (UseSSE < 4 && is_integral_type(bt)) { 1748 return false; 1749 } 1750 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1751 // Float/Double intrinsics are enabled for AVX family currently. 1752 if (UseAVX == 0) { 1753 return false; 1754 } 1755 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1756 return false; 1757 } 1758 } 1759 break; 1760 case Op_CallLeafVector: 1761 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1762 return false; 1763 } 1764 break; 1765 case Op_AddReductionVI: 1766 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1767 return false; 1768 } 1769 // fallthrough 1770 case Op_AndReductionV: 1771 case Op_OrReductionV: 1772 case Op_XorReductionV: 1773 if (is_subword_type(bt) && (UseSSE < 4)) { 1774 return false; 1775 } 1776 #ifndef _LP64 1777 if (bt == T_BYTE || bt == T_LONG) { 1778 return false; 1779 } 1780 #endif 1781 break; 1782 #ifndef _LP64 1783 case Op_VectorInsert: 1784 if (bt == T_LONG || bt == T_DOUBLE) { 1785 return false; 1786 } 1787 break; 1788 #endif 1789 case Op_MinReductionV: 1790 case Op_MaxReductionV: 1791 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1792 return false; 1793 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1794 return false; 1795 } 1796 // Float/Double intrinsics enabled for AVX family. 1797 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1798 return false; 1799 } 1800 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1801 return false; 1802 } 1803 #ifndef _LP64 1804 if (bt == T_BYTE || bt == T_LONG) { 1805 return false; 1806 } 1807 #endif 1808 break; 1809 case Op_VectorTest: 1810 if (UseSSE < 4) { 1811 return false; // Implementation limitation 1812 } else if (size_in_bits < 32) { 1813 return false; // Implementation limitation 1814 } else if (size_in_bits == 512 && (VM_Version::supports_avx512bw() == false)) { 1815 return false; // Implementation limitation 1816 } 1817 break; 1818 case Op_VectorLoadShuffle: 1819 case Op_VectorRearrange: 1820 if(vlen == 2) { 1821 return false; // Implementation limitation due to how shuffle is loaded 1822 } else if (size_in_bits == 256 && UseAVX < 2) { 1823 return false; // Implementation limitation 1824 } else if (bt == T_BYTE && size_in_bits > 256 && !VM_Version::supports_avx512_vbmi()) { 1825 return false; // Implementation limitation 1826 } else if (bt == T_SHORT && size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1827 return false; // Implementation limitation 1828 } 1829 break; 1830 case Op_VectorLoadMask: 1831 if (size_in_bits == 256 && UseAVX < 2) { 1832 return false; // Implementation limitation 1833 } 1834 // fallthrough 1835 case Op_VectorStoreMask: 1836 if (vlen == 2) { 1837 return false; // Implementation limitation 1838 } 1839 break; 1840 case Op_VectorCastB2X: 1841 case Op_VectorCastS2X: 1842 case Op_VectorCastI2X: 1843 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1844 return false; 1845 } 1846 break; 1847 case Op_VectorCastL2X: 1848 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1849 return false; 1850 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1851 return false; 1852 } 1853 break; 1854 case Op_VectorCastD2X: 1855 if (is_subword_type(bt) || bt == T_INT) { 1856 return false; 1857 } 1858 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1859 return false; 1860 } 1861 break; 1862 case Op_RoundVD: 1863 if (!VM_Version::supports_avx512dq()) { 1864 return false; 1865 } 1866 break; 1867 case Op_VectorCastF2X: 1868 if (is_subword_type(bt) || bt == T_LONG) { 1869 return false; 1870 } 1871 break; 1872 case Op_MulReductionVI: 1873 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1874 return false; 1875 } 1876 break; 1877 case Op_LoadVectorGatherMasked: 1878 case Op_StoreVectorScatterMasked: 1879 case Op_StoreVectorScatter: 1880 if (is_subword_type(bt)) { 1881 return false; 1882 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1883 return false; 1884 } 1885 // fallthrough 1886 case Op_LoadVectorGather: 1887 if (size_in_bits == 64 ) { 1888 return false; 1889 } 1890 break; 1891 case Op_MaskAll: 1892 if (!VM_Version::supports_evex()) { 1893 return false; 1894 } 1895 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1896 return false; 1897 } 1898 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1899 return false; 1900 } 1901 break; 1902 case Op_VectorMaskCmp: 1903 if (vlen < 2 || size_in_bits < 32) { 1904 return false; 1905 } 1906 break; 1907 case Op_CompressM: 1908 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1909 return false; 1910 } 1911 break; 1912 case Op_CompressV: 1913 case Op_ExpandV: 1914 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1915 return false; 1916 } 1917 if (size_in_bits < 128 ) { 1918 return false; 1919 } 1920 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1921 return false; 1922 } 1923 break; 1924 case Op_VectorLongToMask: 1925 if (UseAVX < 1 || !is_LP64) { 1926 return false; 1927 } 1928 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1929 return false; 1930 } 1931 break; 1932 case Op_PopCountVI: 1933 case Op_PopCountVL: { 1934 if (!is_pop_count_instr_target(bt) && 1935 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 1936 return false; 1937 } 1938 } 1939 break; 1940 case Op_ReverseV: 1941 case Op_ReverseBytesV: 1942 if (UseAVX < 2) { 1943 return false; 1944 } 1945 break; 1946 case Op_CountTrailingZerosV: 1947 case Op_CountLeadingZerosV: 1948 if (UseAVX < 2) { 1949 return false; 1950 } 1951 break; 1952 } 1953 return true; // Per default match rules are supported. 1954 } 1955 1956 const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 1957 // ADLC based match_rule_supported routine checks for the existence of pattern based 1958 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 1959 // of their non-masked counterpart with mask edge being the differentiator. 1960 // This routine does a strict check on the existence of masked operation patterns 1961 // by returning a default false value for all the other opcodes apart from the 1962 // ones whose masked instruction patterns are defined in this file. 1963 if (!match_rule_supported_vector(opcode, vlen, bt)) { 1964 return false; 1965 } 1966 1967 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1968 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1969 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 1970 return false; 1971 } 1972 switch(opcode) { 1973 // Unary masked operations 1974 case Op_AbsVB: 1975 case Op_AbsVS: 1976 if(!VM_Version::supports_avx512bw()) { 1977 return false; // Implementation limitation 1978 } 1979 case Op_AbsVI: 1980 case Op_AbsVL: 1981 return true; 1982 1983 // Ternary masked operations 1984 case Op_FmaVF: 1985 case Op_FmaVD: 1986 return true; 1987 1988 case Op_MacroLogicV: 1989 if(bt != T_INT && bt != T_LONG) { 1990 return false; 1991 } 1992 return true; 1993 1994 // Binary masked operations 1995 case Op_AddVB: 1996 case Op_AddVS: 1997 case Op_SubVB: 1998 case Op_SubVS: 1999 case Op_MulVS: 2000 case Op_LShiftVS: 2001 case Op_RShiftVS: 2002 case Op_URShiftVS: 2003 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2004 if (!VM_Version::supports_avx512bw()) { 2005 return false; // Implementation limitation 2006 } 2007 return true; 2008 2009 case Op_MulVL: 2010 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2011 if (!VM_Version::supports_avx512dq()) { 2012 return false; // Implementation limitation 2013 } 2014 return true; 2015 2016 case Op_AndV: 2017 case Op_OrV: 2018 case Op_XorV: 2019 case Op_RotateRightV: 2020 case Op_RotateLeftV: 2021 if (bt != T_INT && bt != T_LONG) { 2022 return false; // Implementation limitation 2023 } 2024 return true; 2025 2026 case Op_VectorLoadMask: 2027 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2028 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2029 return false; 2030 } 2031 return true; 2032 2033 case Op_AddVI: 2034 case Op_AddVL: 2035 case Op_AddVF: 2036 case Op_AddVD: 2037 case Op_SubVI: 2038 case Op_SubVL: 2039 case Op_SubVF: 2040 case Op_SubVD: 2041 case Op_MulVI: 2042 case Op_MulVF: 2043 case Op_MulVD: 2044 case Op_DivVF: 2045 case Op_DivVD: 2046 case Op_SqrtVF: 2047 case Op_SqrtVD: 2048 case Op_LShiftVI: 2049 case Op_LShiftVL: 2050 case Op_RShiftVI: 2051 case Op_RShiftVL: 2052 case Op_URShiftVI: 2053 case Op_URShiftVL: 2054 case Op_LoadVectorMasked: 2055 case Op_StoreVectorMasked: 2056 case Op_LoadVectorGatherMasked: 2057 case Op_StoreVectorScatterMasked: 2058 return true; 2059 2060 case Op_MaxV: 2061 case Op_MinV: 2062 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2063 return false; // Implementation limitation 2064 } 2065 if (is_floating_point_type(bt)) { 2066 return false; // Implementation limitation 2067 } 2068 return true; 2069 2070 case Op_VectorMaskCmp: 2071 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2072 return false; // Implementation limitation 2073 } 2074 return true; 2075 2076 case Op_VectorRearrange: 2077 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2078 return false; // Implementation limitation 2079 } 2080 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2081 return false; // Implementation limitation 2082 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2083 return false; // Implementation limitation 2084 } 2085 return true; 2086 2087 // Binary Logical operations 2088 case Op_AndVMask: 2089 case Op_OrVMask: 2090 case Op_XorVMask: 2091 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2092 return false; // Implementation limitation 2093 } 2094 return true; 2095 2096 case Op_PopCountVI: 2097 case Op_PopCountVL: 2098 if (!is_pop_count_instr_target(bt)) { 2099 return false; 2100 } 2101 return true; 2102 2103 case Op_MaskAll: 2104 return true; 2105 2106 case Op_CountLeadingZerosV: 2107 if ((bt == T_INT || bt == T_LONG) && VM_Version::supports_avx512cd()) { 2108 return true; 2109 } 2110 default: 2111 return false; 2112 } 2113 } 2114 2115 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2116 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2117 bool legacy = (generic_opnd->opcode() == LEGVEC); 2118 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2119 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2120 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2121 return new legVecZOper(); 2122 } 2123 if (legacy) { 2124 switch (ideal_reg) { 2125 case Op_VecS: return new legVecSOper(); 2126 case Op_VecD: return new legVecDOper(); 2127 case Op_VecX: return new legVecXOper(); 2128 case Op_VecY: return new legVecYOper(); 2129 case Op_VecZ: return new legVecZOper(); 2130 } 2131 } else { 2132 switch (ideal_reg) { 2133 case Op_VecS: return new vecSOper(); 2134 case Op_VecD: return new vecDOper(); 2135 case Op_VecX: return new vecXOper(); 2136 case Op_VecY: return new vecYOper(); 2137 case Op_VecZ: return new vecZOper(); 2138 } 2139 } 2140 ShouldNotReachHere(); 2141 return NULL; 2142 } 2143 2144 bool Matcher::is_reg2reg_move(MachNode* m) { 2145 switch (m->rule()) { 2146 case MoveVec2Leg_rule: 2147 case MoveLeg2Vec_rule: 2148 case MoveF2VL_rule: 2149 case MoveF2LEG_rule: 2150 case MoveVL2F_rule: 2151 case MoveLEG2F_rule: 2152 case MoveD2VL_rule: 2153 case MoveD2LEG_rule: 2154 case MoveVL2D_rule: 2155 case MoveLEG2D_rule: 2156 return true; 2157 default: 2158 return false; 2159 } 2160 } 2161 2162 bool Matcher::is_generic_vector(MachOper* opnd) { 2163 switch (opnd->opcode()) { 2164 case VEC: 2165 case LEGVEC: 2166 return true; 2167 default: 2168 return false; 2169 } 2170 } 2171 2172 //------------------------------------------------------------------------ 2173 2174 const RegMask* Matcher::predicate_reg_mask(void) { 2175 return &_VECTMASK_REG_mask; 2176 } 2177 2178 const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) { 2179 return new TypeVectMask(elemTy, length); 2180 } 2181 2182 // Max vector size in bytes. 0 if not supported. 2183 const int Matcher::vector_width_in_bytes(BasicType bt) { 2184 assert(is_java_primitive(bt), "only primitive type vectors"); 2185 if (UseSSE < 2) return 0; 2186 // SSE2 supports 128bit vectors for all types. 2187 // AVX2 supports 256bit vectors for all types. 2188 // AVX2/EVEX supports 512bit vectors for all types. 2189 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2190 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2191 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2192 size = (UseAVX > 2) ? 64 : 32; 2193 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2194 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2195 // Use flag to limit vector size. 2196 size = MIN2(size,(int)MaxVectorSize); 2197 // Minimum 2 values in vector (or 4 for bytes). 2198 switch (bt) { 2199 case T_DOUBLE: 2200 case T_LONG: 2201 if (size < 16) return 0; 2202 break; 2203 case T_FLOAT: 2204 case T_INT: 2205 if (size < 8) return 0; 2206 break; 2207 case T_BOOLEAN: 2208 if (size < 4) return 0; 2209 break; 2210 case T_CHAR: 2211 if (size < 4) return 0; 2212 break; 2213 case T_BYTE: 2214 if (size < 4) return 0; 2215 break; 2216 case T_SHORT: 2217 if (size < 4) return 0; 2218 break; 2219 default: 2220 ShouldNotReachHere(); 2221 } 2222 return size; 2223 } 2224 2225 // Limits on vector size (number of elements) loaded into vector. 2226 const int Matcher::max_vector_size(const BasicType bt) { 2227 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2228 } 2229 const int Matcher::min_vector_size(const BasicType bt) { 2230 int max_size = max_vector_size(bt); 2231 // Min size which can be loaded into vector is 4 bytes. 2232 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2233 // Support for calling svml double64 vectors 2234 if (bt == T_DOUBLE) { 2235 size = 1; 2236 } 2237 return MIN2(size,max_size); 2238 } 2239 2240 const int Matcher::scalable_vector_reg_size(const BasicType bt) { 2241 return -1; 2242 } 2243 2244 // Vector ideal reg corresponding to specified size in bytes 2245 const uint Matcher::vector_ideal_reg(int size) { 2246 assert(MaxVectorSize >= size, ""); 2247 switch(size) { 2248 case 4: return Op_VecS; 2249 case 8: return Op_VecD; 2250 case 16: return Op_VecX; 2251 case 32: return Op_VecY; 2252 case 64: return Op_VecZ; 2253 } 2254 ShouldNotReachHere(); 2255 return 0; 2256 } 2257 2258 // Check for shift by small constant as well 2259 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2260 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2261 shift->in(2)->get_int() <= 3 && 2262 // Are there other uses besides address expressions? 2263 !matcher->is_visited(shift)) { 2264 address_visited.set(shift->_idx); // Flag as address_visited 2265 mstack.push(shift->in(2), Matcher::Visit); 2266 Node *conv = shift->in(1); 2267 #ifdef _LP64 2268 // Allow Matcher to match the rule which bypass 2269 // ConvI2L operation for an array index on LP64 2270 // if the index value is positive. 2271 if (conv->Opcode() == Op_ConvI2L && 2272 conv->as_Type()->type()->is_long()->_lo >= 0 && 2273 // Are there other uses besides address expressions? 2274 !matcher->is_visited(conv)) { 2275 address_visited.set(conv->_idx); // Flag as address_visited 2276 mstack.push(conv->in(1), Matcher::Pre_Visit); 2277 } else 2278 #endif 2279 mstack.push(conv, Matcher::Pre_Visit); 2280 return true; 2281 } 2282 return false; 2283 } 2284 2285 // This function identifies sub-graphs in which a 'load' node is 2286 // input to two different nodes, and such that it can be matched 2287 // with BMI instructions like blsi, blsr, etc. 2288 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2289 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2290 // refers to the same node. 2291 // 2292 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2293 // This is a temporary solution until we make DAGs expressible in ADL. 2294 template<typename ConType> 2295 class FusedPatternMatcher { 2296 Node* _op1_node; 2297 Node* _mop_node; 2298 int _con_op; 2299 2300 static int match_next(Node* n, int next_op, int next_op_idx) { 2301 if (n->in(1) == NULL || n->in(2) == NULL) { 2302 return -1; 2303 } 2304 2305 if (next_op_idx == -1) { // n is commutative, try rotations 2306 if (n->in(1)->Opcode() == next_op) { 2307 return 1; 2308 } else if (n->in(2)->Opcode() == next_op) { 2309 return 2; 2310 } 2311 } else { 2312 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2313 if (n->in(next_op_idx)->Opcode() == next_op) { 2314 return next_op_idx; 2315 } 2316 } 2317 return -1; 2318 } 2319 2320 public: 2321 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2322 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2323 2324 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2325 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2326 typename ConType::NativeType con_value) { 2327 if (_op1_node->Opcode() != op1) { 2328 return false; 2329 } 2330 if (_mop_node->outcnt() > 2) { 2331 return false; 2332 } 2333 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2334 if (op1_op2_idx == -1) { 2335 return false; 2336 } 2337 // Memory operation must be the other edge 2338 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2339 2340 // Check that the mop node is really what we want 2341 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2342 Node* op2_node = _op1_node->in(op1_op2_idx); 2343 if (op2_node->outcnt() > 1) { 2344 return false; 2345 } 2346 assert(op2_node->Opcode() == op2, "Should be"); 2347 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2348 if (op2_con_idx == -1) { 2349 return false; 2350 } 2351 // Memory operation must be the other edge 2352 int op2_mop_idx = (op2_con_idx & 1) + 1; 2353 // Check that the memory operation is the same node 2354 if (op2_node->in(op2_mop_idx) == _mop_node) { 2355 // Now check the constant 2356 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2357 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2358 return true; 2359 } 2360 } 2361 } 2362 return false; 2363 } 2364 }; 2365 2366 static bool is_bmi_pattern(Node* n, Node* m) { 2367 assert(UseBMI1Instructions, "sanity"); 2368 if (n != NULL && m != NULL) { 2369 if (m->Opcode() == Op_LoadI) { 2370 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2371 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2372 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2373 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2374 } else if (m->Opcode() == Op_LoadL) { 2375 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2376 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2377 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2378 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2379 } 2380 } 2381 return false; 2382 } 2383 2384 // Should the matcher clone input 'm' of node 'n'? 2385 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2386 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2387 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2388 mstack.push(m, Visit); 2389 return true; 2390 } 2391 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2392 mstack.push(m, Visit); // m = ShiftCntV 2393 return true; 2394 } 2395 return false; 2396 } 2397 2398 // Should the Matcher clone shifts on addressing modes, expecting them 2399 // to be subsumed into complex addressing expressions or compute them 2400 // into registers? 2401 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2402 Node *off = m->in(AddPNode::Offset); 2403 if (off->is_Con()) { 2404 address_visited.test_set(m->_idx); // Flag as address_visited 2405 Node *adr = m->in(AddPNode::Address); 2406 2407 // Intel can handle 2 adds in addressing mode 2408 // AtomicAdd is not an addressing expression. 2409 // Cheap to find it by looking for screwy base. 2410 if (adr->is_AddP() && 2411 !adr->in(AddPNode::Base)->is_top() && 2412 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2413 // Are there other uses besides address expressions? 2414 !is_visited(adr)) { 2415 address_visited.set(adr->_idx); // Flag as address_visited 2416 Node *shift = adr->in(AddPNode::Offset); 2417 if (!clone_shift(shift, this, mstack, address_visited)) { 2418 mstack.push(shift, Pre_Visit); 2419 } 2420 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2421 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2422 } else { 2423 mstack.push(adr, Pre_Visit); 2424 } 2425 2426 // Clone X+offset as it also folds into most addressing expressions 2427 mstack.push(off, Visit); 2428 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2429 return true; 2430 } else if (clone_shift(off, this, mstack, address_visited)) { 2431 address_visited.test_set(m->_idx); // Flag as address_visited 2432 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2433 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2434 return true; 2435 } 2436 return false; 2437 } 2438 2439 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2440 switch (bt) { 2441 case BoolTest::eq: 2442 return Assembler::eq; 2443 case BoolTest::ne: 2444 return Assembler::neq; 2445 case BoolTest::le: 2446 case BoolTest::ule: 2447 return Assembler::le; 2448 case BoolTest::ge: 2449 case BoolTest::uge: 2450 return Assembler::nlt; 2451 case BoolTest::lt: 2452 case BoolTest::ult: 2453 return Assembler::lt; 2454 case BoolTest::gt: 2455 case BoolTest::ugt: 2456 return Assembler::nle; 2457 default : ShouldNotReachHere(); return Assembler::_false; 2458 } 2459 } 2460 2461 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2462 switch (bt) { 2463 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2464 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2465 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2466 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2467 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2468 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2469 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2470 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2471 } 2472 } 2473 2474 // Helper methods for MachSpillCopyNode::implementation(). 2475 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 2476 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2477 assert(ireg == Op_VecS || // 32bit vector 2478 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2479 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 2480 "no non-adjacent vector moves" ); 2481 if (cbuf) { 2482 C2_MacroAssembler _masm(cbuf); 2483 switch (ireg) { 2484 case Op_VecS: // copy whole register 2485 case Op_VecD: 2486 case Op_VecX: 2487 #ifndef _LP64 2488 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2489 #else 2490 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2491 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2492 } else { 2493 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2494 } 2495 #endif 2496 break; 2497 case Op_VecY: 2498 #ifndef _LP64 2499 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2500 #else 2501 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2502 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2503 } else { 2504 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2505 } 2506 #endif 2507 break; 2508 case Op_VecZ: 2509 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2510 break; 2511 default: 2512 ShouldNotReachHere(); 2513 } 2514 #ifndef PRODUCT 2515 } else { 2516 switch (ireg) { 2517 case Op_VecS: 2518 case Op_VecD: 2519 case Op_VecX: 2520 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2521 break; 2522 case Op_VecY: 2523 case Op_VecZ: 2524 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2525 break; 2526 default: 2527 ShouldNotReachHere(); 2528 } 2529 #endif 2530 } 2531 } 2532 2533 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 2534 int stack_offset, int reg, uint ireg, outputStream* st) { 2535 if (cbuf) { 2536 C2_MacroAssembler _masm(cbuf); 2537 if (is_load) { 2538 switch (ireg) { 2539 case Op_VecS: 2540 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2541 break; 2542 case Op_VecD: 2543 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2544 break; 2545 case Op_VecX: 2546 #ifndef _LP64 2547 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2548 #else 2549 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2550 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2551 } else { 2552 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2553 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2554 } 2555 #endif 2556 break; 2557 case Op_VecY: 2558 #ifndef _LP64 2559 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2560 #else 2561 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2562 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2563 } else { 2564 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2565 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2566 } 2567 #endif 2568 break; 2569 case Op_VecZ: 2570 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2571 break; 2572 default: 2573 ShouldNotReachHere(); 2574 } 2575 } else { // store 2576 switch (ireg) { 2577 case Op_VecS: 2578 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2579 break; 2580 case Op_VecD: 2581 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2582 break; 2583 case Op_VecX: 2584 #ifndef _LP64 2585 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2586 #else 2587 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2588 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2589 } 2590 else { 2591 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2592 } 2593 #endif 2594 break; 2595 case Op_VecY: 2596 #ifndef _LP64 2597 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2598 #else 2599 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2600 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2601 } 2602 else { 2603 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2604 } 2605 #endif 2606 break; 2607 case Op_VecZ: 2608 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2609 break; 2610 default: 2611 ShouldNotReachHere(); 2612 } 2613 } 2614 #ifndef PRODUCT 2615 } else { 2616 if (is_load) { 2617 switch (ireg) { 2618 case Op_VecS: 2619 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2620 break; 2621 case Op_VecD: 2622 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2623 break; 2624 case Op_VecX: 2625 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2626 break; 2627 case Op_VecY: 2628 case Op_VecZ: 2629 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2630 break; 2631 default: 2632 ShouldNotReachHere(); 2633 } 2634 } else { // store 2635 switch (ireg) { 2636 case Op_VecS: 2637 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2638 break; 2639 case Op_VecD: 2640 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2641 break; 2642 case Op_VecX: 2643 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2644 break; 2645 case Op_VecY: 2646 case Op_VecZ: 2647 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2648 break; 2649 default: 2650 ShouldNotReachHere(); 2651 } 2652 } 2653 #endif 2654 } 2655 } 2656 2657 template <class T> 2658 static inline GrowableArray<jvalue>* vreplicate_imm(BasicType bt, T con, int len) { 2659 GrowableArray<jvalue>* val = new GrowableArray<jvalue>(len); 2660 jvalue ele; 2661 switch (bt) { 2662 case T_BYTE: ele.b = con; break; 2663 case T_SHORT: ele.s = con; break; 2664 case T_INT: ele.i = con; break; 2665 case T_LONG: ele.j = con; break; 2666 case T_FLOAT: ele.f = con; break; 2667 case T_DOUBLE: ele.d = con; break; 2668 default: ShouldNotReachHere(); 2669 } 2670 for (int i = 0; i < len; i++) { 2671 val->append(ele); 2672 } 2673 return val; 2674 } 2675 2676 static inline jlong high_bit_set(BasicType bt) { 2677 switch (bt) { 2678 case T_BYTE: return 0x8080808080808080; 2679 case T_SHORT: return 0x8000800080008000; 2680 case T_INT: return 0x8000000080000000; 2681 case T_LONG: return 0x8000000000000000; 2682 default: 2683 ShouldNotReachHere(); 2684 return 0; 2685 } 2686 } 2687 2688 #ifndef PRODUCT 2689 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2690 st->print("nop \t# %d bytes pad for loops and calls", _count); 2691 } 2692 #endif 2693 2694 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2695 C2_MacroAssembler _masm(&cbuf); 2696 __ nop(_count); 2697 } 2698 2699 uint MachNopNode::size(PhaseRegAlloc*) const { 2700 return _count; 2701 } 2702 2703 #ifndef PRODUCT 2704 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2705 st->print("# breakpoint"); 2706 } 2707 #endif 2708 2709 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2710 C2_MacroAssembler _masm(&cbuf); 2711 __ int3(); 2712 } 2713 2714 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2715 return MachNode::size(ra_); 2716 } 2717 2718 %} 2719 2720 encode %{ 2721 2722 enc_class call_epilog %{ 2723 if (VerifyStackAtCalls) { 2724 // Check that stack depth is unchanged: find majik cookie on stack 2725 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2726 C2_MacroAssembler _masm(&cbuf); 2727 Label L; 2728 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2729 __ jccb(Assembler::equal, L); 2730 // Die if stack mismatch 2731 __ int3(); 2732 __ bind(L); 2733 } 2734 %} 2735 2736 %} 2737 2738 // Operands for bound floating pointer register arguments 2739 operand rxmm0() %{ 2740 constraint(ALLOC_IN_RC(xmm0_reg)); 2741 match(VecX); 2742 format%{%} 2743 interface(REG_INTER); 2744 %} 2745 2746 //----------OPERANDS----------------------------------------------------------- 2747 // Operand definitions must precede instruction definitions for correct parsing 2748 // in the ADLC because operands constitute user defined types which are used in 2749 // instruction definitions. 2750 2751 // Vectors 2752 2753 // Dummy generic vector class. Should be used for all vector operands. 2754 // Replaced with vec[SDXYZ] during post-selection pass. 2755 operand vec() %{ 2756 constraint(ALLOC_IN_RC(dynamic)); 2757 match(VecX); 2758 match(VecY); 2759 match(VecZ); 2760 match(VecS); 2761 match(VecD); 2762 2763 format %{ %} 2764 interface(REG_INTER); 2765 %} 2766 2767 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2768 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2769 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2770 // runtime code generation via reg_class_dynamic. 2771 operand legVec() %{ 2772 constraint(ALLOC_IN_RC(dynamic)); 2773 match(VecX); 2774 match(VecY); 2775 match(VecZ); 2776 match(VecS); 2777 match(VecD); 2778 2779 format %{ %} 2780 interface(REG_INTER); 2781 %} 2782 2783 // Replaces vec during post-selection cleanup. See above. 2784 operand vecS() %{ 2785 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2786 match(VecS); 2787 2788 format %{ %} 2789 interface(REG_INTER); 2790 %} 2791 2792 // Replaces legVec during post-selection cleanup. See above. 2793 operand legVecS() %{ 2794 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2795 match(VecS); 2796 2797 format %{ %} 2798 interface(REG_INTER); 2799 %} 2800 2801 // Replaces vec during post-selection cleanup. See above. 2802 operand vecD() %{ 2803 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2804 match(VecD); 2805 2806 format %{ %} 2807 interface(REG_INTER); 2808 %} 2809 2810 // Replaces legVec during post-selection cleanup. See above. 2811 operand legVecD() %{ 2812 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2813 match(VecD); 2814 2815 format %{ %} 2816 interface(REG_INTER); 2817 %} 2818 2819 // Replaces vec during post-selection cleanup. See above. 2820 operand vecX() %{ 2821 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2822 match(VecX); 2823 2824 format %{ %} 2825 interface(REG_INTER); 2826 %} 2827 2828 // Replaces legVec during post-selection cleanup. See above. 2829 operand legVecX() %{ 2830 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2831 match(VecX); 2832 2833 format %{ %} 2834 interface(REG_INTER); 2835 %} 2836 2837 // Replaces vec during post-selection cleanup. See above. 2838 operand vecY() %{ 2839 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2840 match(VecY); 2841 2842 format %{ %} 2843 interface(REG_INTER); 2844 %} 2845 2846 // Replaces legVec during post-selection cleanup. See above. 2847 operand legVecY() %{ 2848 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2849 match(VecY); 2850 2851 format %{ %} 2852 interface(REG_INTER); 2853 %} 2854 2855 // Replaces vec during post-selection cleanup. See above. 2856 operand vecZ() %{ 2857 constraint(ALLOC_IN_RC(vectorz_reg)); 2858 match(VecZ); 2859 2860 format %{ %} 2861 interface(REG_INTER); 2862 %} 2863 2864 // Replaces legVec during post-selection cleanup. See above. 2865 operand legVecZ() %{ 2866 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2867 match(VecZ); 2868 2869 format %{ %} 2870 interface(REG_INTER); 2871 %} 2872 2873 // Comparison Code for FP conditional move 2874 operand cmpOp_vcmppd() %{ 2875 match(Bool); 2876 2877 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2878 n->as_Bool()->_test._test != BoolTest::no_overflow); 2879 format %{ "" %} 2880 interface(COND_INTER) %{ 2881 equal (0x0, "eq"); 2882 less (0x1, "lt"); 2883 less_equal (0x2, "le"); 2884 not_equal (0xC, "ne"); 2885 greater_equal(0xD, "ge"); 2886 greater (0xE, "gt"); 2887 //TODO cannot compile (adlc breaks) without two next lines with error: 2888 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2889 // equal' for overflow. 2890 overflow (0x20, "o"); // not really supported by the instruction 2891 no_overflow (0x21, "no"); // not really supported by the instruction 2892 %} 2893 %} 2894 2895 2896 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2897 2898 // ============================================================================ 2899 2900 instruct ShouldNotReachHere() %{ 2901 match(Halt); 2902 format %{ "stop\t# ShouldNotReachHere" %} 2903 ins_encode %{ 2904 if (is_reachable()) { 2905 __ stop(_halt_reason); 2906 } 2907 %} 2908 ins_pipe(pipe_slow); 2909 %} 2910 2911 // ============================================================================ 2912 2913 instruct addF_reg(regF dst, regF src) %{ 2914 predicate((UseSSE>=1) && (UseAVX == 0)); 2915 match(Set dst (AddF dst src)); 2916 2917 format %{ "addss $dst, $src" %} 2918 ins_cost(150); 2919 ins_encode %{ 2920 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2921 %} 2922 ins_pipe(pipe_slow); 2923 %} 2924 2925 instruct addF_mem(regF dst, memory src) %{ 2926 predicate((UseSSE>=1) && (UseAVX == 0)); 2927 match(Set dst (AddF dst (LoadF src))); 2928 2929 format %{ "addss $dst, $src" %} 2930 ins_cost(150); 2931 ins_encode %{ 2932 __ addss($dst$$XMMRegister, $src$$Address); 2933 %} 2934 ins_pipe(pipe_slow); 2935 %} 2936 2937 instruct addF_imm(regF dst, immF con) %{ 2938 predicate((UseSSE>=1) && (UseAVX == 0)); 2939 match(Set dst (AddF dst con)); 2940 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2941 ins_cost(150); 2942 ins_encode %{ 2943 __ addss($dst$$XMMRegister, $constantaddress($con)); 2944 %} 2945 ins_pipe(pipe_slow); 2946 %} 2947 2948 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2949 predicate(UseAVX > 0); 2950 match(Set dst (AddF src1 src2)); 2951 2952 format %{ "vaddss $dst, $src1, $src2" %} 2953 ins_cost(150); 2954 ins_encode %{ 2955 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2956 %} 2957 ins_pipe(pipe_slow); 2958 %} 2959 2960 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2961 predicate(UseAVX > 0); 2962 match(Set dst (AddF src1 (LoadF src2))); 2963 2964 format %{ "vaddss $dst, $src1, $src2" %} 2965 ins_cost(150); 2966 ins_encode %{ 2967 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2968 %} 2969 ins_pipe(pipe_slow); 2970 %} 2971 2972 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2973 predicate(UseAVX > 0); 2974 match(Set dst (AddF src con)); 2975 2976 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2977 ins_cost(150); 2978 ins_encode %{ 2979 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2980 %} 2981 ins_pipe(pipe_slow); 2982 %} 2983 2984 instruct addD_reg(regD dst, regD src) %{ 2985 predicate((UseSSE>=2) && (UseAVX == 0)); 2986 match(Set dst (AddD dst src)); 2987 2988 format %{ "addsd $dst, $src" %} 2989 ins_cost(150); 2990 ins_encode %{ 2991 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2992 %} 2993 ins_pipe(pipe_slow); 2994 %} 2995 2996 instruct addD_mem(regD dst, memory src) %{ 2997 predicate((UseSSE>=2) && (UseAVX == 0)); 2998 match(Set dst (AddD dst (LoadD src))); 2999 3000 format %{ "addsd $dst, $src" %} 3001 ins_cost(150); 3002 ins_encode %{ 3003 __ addsd($dst$$XMMRegister, $src$$Address); 3004 %} 3005 ins_pipe(pipe_slow); 3006 %} 3007 3008 instruct addD_imm(regD dst, immD con) %{ 3009 predicate((UseSSE>=2) && (UseAVX == 0)); 3010 match(Set dst (AddD dst con)); 3011 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3012 ins_cost(150); 3013 ins_encode %{ 3014 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3015 %} 3016 ins_pipe(pipe_slow); 3017 %} 3018 3019 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3020 predicate(UseAVX > 0); 3021 match(Set dst (AddD src1 src2)); 3022 3023 format %{ "vaddsd $dst, $src1, $src2" %} 3024 ins_cost(150); 3025 ins_encode %{ 3026 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3027 %} 3028 ins_pipe(pipe_slow); 3029 %} 3030 3031 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3032 predicate(UseAVX > 0); 3033 match(Set dst (AddD src1 (LoadD src2))); 3034 3035 format %{ "vaddsd $dst, $src1, $src2" %} 3036 ins_cost(150); 3037 ins_encode %{ 3038 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3039 %} 3040 ins_pipe(pipe_slow); 3041 %} 3042 3043 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3044 predicate(UseAVX > 0); 3045 match(Set dst (AddD src con)); 3046 3047 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3048 ins_cost(150); 3049 ins_encode %{ 3050 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3051 %} 3052 ins_pipe(pipe_slow); 3053 %} 3054 3055 instruct subF_reg(regF dst, regF src) %{ 3056 predicate((UseSSE>=1) && (UseAVX == 0)); 3057 match(Set dst (SubF dst src)); 3058 3059 format %{ "subss $dst, $src" %} 3060 ins_cost(150); 3061 ins_encode %{ 3062 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3063 %} 3064 ins_pipe(pipe_slow); 3065 %} 3066 3067 instruct subF_mem(regF dst, memory src) %{ 3068 predicate((UseSSE>=1) && (UseAVX == 0)); 3069 match(Set dst (SubF dst (LoadF src))); 3070 3071 format %{ "subss $dst, $src" %} 3072 ins_cost(150); 3073 ins_encode %{ 3074 __ subss($dst$$XMMRegister, $src$$Address); 3075 %} 3076 ins_pipe(pipe_slow); 3077 %} 3078 3079 instruct subF_imm(regF dst, immF con) %{ 3080 predicate((UseSSE>=1) && (UseAVX == 0)); 3081 match(Set dst (SubF dst con)); 3082 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3083 ins_cost(150); 3084 ins_encode %{ 3085 __ subss($dst$$XMMRegister, $constantaddress($con)); 3086 %} 3087 ins_pipe(pipe_slow); 3088 %} 3089 3090 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3091 predicate(UseAVX > 0); 3092 match(Set dst (SubF src1 src2)); 3093 3094 format %{ "vsubss $dst, $src1, $src2" %} 3095 ins_cost(150); 3096 ins_encode %{ 3097 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3098 %} 3099 ins_pipe(pipe_slow); 3100 %} 3101 3102 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3103 predicate(UseAVX > 0); 3104 match(Set dst (SubF src1 (LoadF src2))); 3105 3106 format %{ "vsubss $dst, $src1, $src2" %} 3107 ins_cost(150); 3108 ins_encode %{ 3109 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3110 %} 3111 ins_pipe(pipe_slow); 3112 %} 3113 3114 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3115 predicate(UseAVX > 0); 3116 match(Set dst (SubF src con)); 3117 3118 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3119 ins_cost(150); 3120 ins_encode %{ 3121 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3122 %} 3123 ins_pipe(pipe_slow); 3124 %} 3125 3126 instruct subD_reg(regD dst, regD src) %{ 3127 predicate((UseSSE>=2) && (UseAVX == 0)); 3128 match(Set dst (SubD dst src)); 3129 3130 format %{ "subsd $dst, $src" %} 3131 ins_cost(150); 3132 ins_encode %{ 3133 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3134 %} 3135 ins_pipe(pipe_slow); 3136 %} 3137 3138 instruct subD_mem(regD dst, memory src) %{ 3139 predicate((UseSSE>=2) && (UseAVX == 0)); 3140 match(Set dst (SubD dst (LoadD src))); 3141 3142 format %{ "subsd $dst, $src" %} 3143 ins_cost(150); 3144 ins_encode %{ 3145 __ subsd($dst$$XMMRegister, $src$$Address); 3146 %} 3147 ins_pipe(pipe_slow); 3148 %} 3149 3150 instruct subD_imm(regD dst, immD con) %{ 3151 predicate((UseSSE>=2) && (UseAVX == 0)); 3152 match(Set dst (SubD dst con)); 3153 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3154 ins_cost(150); 3155 ins_encode %{ 3156 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3157 %} 3158 ins_pipe(pipe_slow); 3159 %} 3160 3161 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3162 predicate(UseAVX > 0); 3163 match(Set dst (SubD src1 src2)); 3164 3165 format %{ "vsubsd $dst, $src1, $src2" %} 3166 ins_cost(150); 3167 ins_encode %{ 3168 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3169 %} 3170 ins_pipe(pipe_slow); 3171 %} 3172 3173 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3174 predicate(UseAVX > 0); 3175 match(Set dst (SubD src1 (LoadD src2))); 3176 3177 format %{ "vsubsd $dst, $src1, $src2" %} 3178 ins_cost(150); 3179 ins_encode %{ 3180 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3181 %} 3182 ins_pipe(pipe_slow); 3183 %} 3184 3185 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3186 predicate(UseAVX > 0); 3187 match(Set dst (SubD src con)); 3188 3189 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3190 ins_cost(150); 3191 ins_encode %{ 3192 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3193 %} 3194 ins_pipe(pipe_slow); 3195 %} 3196 3197 instruct mulF_reg(regF dst, regF src) %{ 3198 predicate((UseSSE>=1) && (UseAVX == 0)); 3199 match(Set dst (MulF dst src)); 3200 3201 format %{ "mulss $dst, $src" %} 3202 ins_cost(150); 3203 ins_encode %{ 3204 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3205 %} 3206 ins_pipe(pipe_slow); 3207 %} 3208 3209 instruct mulF_mem(regF dst, memory src) %{ 3210 predicate((UseSSE>=1) && (UseAVX == 0)); 3211 match(Set dst (MulF dst (LoadF src))); 3212 3213 format %{ "mulss $dst, $src" %} 3214 ins_cost(150); 3215 ins_encode %{ 3216 __ mulss($dst$$XMMRegister, $src$$Address); 3217 %} 3218 ins_pipe(pipe_slow); 3219 %} 3220 3221 instruct mulF_imm(regF dst, immF con) %{ 3222 predicate((UseSSE>=1) && (UseAVX == 0)); 3223 match(Set dst (MulF dst con)); 3224 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3225 ins_cost(150); 3226 ins_encode %{ 3227 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3228 %} 3229 ins_pipe(pipe_slow); 3230 %} 3231 3232 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3233 predicate(UseAVX > 0); 3234 match(Set dst (MulF src1 src2)); 3235 3236 format %{ "vmulss $dst, $src1, $src2" %} 3237 ins_cost(150); 3238 ins_encode %{ 3239 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3240 %} 3241 ins_pipe(pipe_slow); 3242 %} 3243 3244 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3245 predicate(UseAVX > 0); 3246 match(Set dst (MulF src1 (LoadF src2))); 3247 3248 format %{ "vmulss $dst, $src1, $src2" %} 3249 ins_cost(150); 3250 ins_encode %{ 3251 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3252 %} 3253 ins_pipe(pipe_slow); 3254 %} 3255 3256 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3257 predicate(UseAVX > 0); 3258 match(Set dst (MulF src con)); 3259 3260 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3261 ins_cost(150); 3262 ins_encode %{ 3263 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3264 %} 3265 ins_pipe(pipe_slow); 3266 %} 3267 3268 instruct mulD_reg(regD dst, regD src) %{ 3269 predicate((UseSSE>=2) && (UseAVX == 0)); 3270 match(Set dst (MulD dst src)); 3271 3272 format %{ "mulsd $dst, $src" %} 3273 ins_cost(150); 3274 ins_encode %{ 3275 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3276 %} 3277 ins_pipe(pipe_slow); 3278 %} 3279 3280 instruct mulD_mem(regD dst, memory src) %{ 3281 predicate((UseSSE>=2) && (UseAVX == 0)); 3282 match(Set dst (MulD dst (LoadD src))); 3283 3284 format %{ "mulsd $dst, $src" %} 3285 ins_cost(150); 3286 ins_encode %{ 3287 __ mulsd($dst$$XMMRegister, $src$$Address); 3288 %} 3289 ins_pipe(pipe_slow); 3290 %} 3291 3292 instruct mulD_imm(regD dst, immD con) %{ 3293 predicate((UseSSE>=2) && (UseAVX == 0)); 3294 match(Set dst (MulD dst con)); 3295 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3296 ins_cost(150); 3297 ins_encode %{ 3298 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3299 %} 3300 ins_pipe(pipe_slow); 3301 %} 3302 3303 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3304 predicate(UseAVX > 0); 3305 match(Set dst (MulD src1 src2)); 3306 3307 format %{ "vmulsd $dst, $src1, $src2" %} 3308 ins_cost(150); 3309 ins_encode %{ 3310 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3311 %} 3312 ins_pipe(pipe_slow); 3313 %} 3314 3315 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3316 predicate(UseAVX > 0); 3317 match(Set dst (MulD src1 (LoadD src2))); 3318 3319 format %{ "vmulsd $dst, $src1, $src2" %} 3320 ins_cost(150); 3321 ins_encode %{ 3322 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3323 %} 3324 ins_pipe(pipe_slow); 3325 %} 3326 3327 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3328 predicate(UseAVX > 0); 3329 match(Set dst (MulD src con)); 3330 3331 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3332 ins_cost(150); 3333 ins_encode %{ 3334 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3335 %} 3336 ins_pipe(pipe_slow); 3337 %} 3338 3339 instruct divF_reg(regF dst, regF src) %{ 3340 predicate((UseSSE>=1) && (UseAVX == 0)); 3341 match(Set dst (DivF dst src)); 3342 3343 format %{ "divss $dst, $src" %} 3344 ins_cost(150); 3345 ins_encode %{ 3346 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3347 %} 3348 ins_pipe(pipe_slow); 3349 %} 3350 3351 instruct divF_mem(regF dst, memory src) %{ 3352 predicate((UseSSE>=1) && (UseAVX == 0)); 3353 match(Set dst (DivF dst (LoadF src))); 3354 3355 format %{ "divss $dst, $src" %} 3356 ins_cost(150); 3357 ins_encode %{ 3358 __ divss($dst$$XMMRegister, $src$$Address); 3359 %} 3360 ins_pipe(pipe_slow); 3361 %} 3362 3363 instruct divF_imm(regF dst, immF con) %{ 3364 predicate((UseSSE>=1) && (UseAVX == 0)); 3365 match(Set dst (DivF dst con)); 3366 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3367 ins_cost(150); 3368 ins_encode %{ 3369 __ divss($dst$$XMMRegister, $constantaddress($con)); 3370 %} 3371 ins_pipe(pipe_slow); 3372 %} 3373 3374 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3375 predicate(UseAVX > 0); 3376 match(Set dst (DivF src1 src2)); 3377 3378 format %{ "vdivss $dst, $src1, $src2" %} 3379 ins_cost(150); 3380 ins_encode %{ 3381 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3382 %} 3383 ins_pipe(pipe_slow); 3384 %} 3385 3386 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3387 predicate(UseAVX > 0); 3388 match(Set dst (DivF src1 (LoadF src2))); 3389 3390 format %{ "vdivss $dst, $src1, $src2" %} 3391 ins_cost(150); 3392 ins_encode %{ 3393 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3394 %} 3395 ins_pipe(pipe_slow); 3396 %} 3397 3398 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3399 predicate(UseAVX > 0); 3400 match(Set dst (DivF src con)); 3401 3402 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3403 ins_cost(150); 3404 ins_encode %{ 3405 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3406 %} 3407 ins_pipe(pipe_slow); 3408 %} 3409 3410 instruct divD_reg(regD dst, regD src) %{ 3411 predicate((UseSSE>=2) && (UseAVX == 0)); 3412 match(Set dst (DivD dst src)); 3413 3414 format %{ "divsd $dst, $src" %} 3415 ins_cost(150); 3416 ins_encode %{ 3417 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3418 %} 3419 ins_pipe(pipe_slow); 3420 %} 3421 3422 instruct divD_mem(regD dst, memory src) %{ 3423 predicate((UseSSE>=2) && (UseAVX == 0)); 3424 match(Set dst (DivD dst (LoadD src))); 3425 3426 format %{ "divsd $dst, $src" %} 3427 ins_cost(150); 3428 ins_encode %{ 3429 __ divsd($dst$$XMMRegister, $src$$Address); 3430 %} 3431 ins_pipe(pipe_slow); 3432 %} 3433 3434 instruct divD_imm(regD dst, immD con) %{ 3435 predicate((UseSSE>=2) && (UseAVX == 0)); 3436 match(Set dst (DivD dst con)); 3437 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3438 ins_cost(150); 3439 ins_encode %{ 3440 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3441 %} 3442 ins_pipe(pipe_slow); 3443 %} 3444 3445 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3446 predicate(UseAVX > 0); 3447 match(Set dst (DivD src1 src2)); 3448 3449 format %{ "vdivsd $dst, $src1, $src2" %} 3450 ins_cost(150); 3451 ins_encode %{ 3452 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3453 %} 3454 ins_pipe(pipe_slow); 3455 %} 3456 3457 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3458 predicate(UseAVX > 0); 3459 match(Set dst (DivD src1 (LoadD src2))); 3460 3461 format %{ "vdivsd $dst, $src1, $src2" %} 3462 ins_cost(150); 3463 ins_encode %{ 3464 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3465 %} 3466 ins_pipe(pipe_slow); 3467 %} 3468 3469 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3470 predicate(UseAVX > 0); 3471 match(Set dst (DivD src con)); 3472 3473 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3474 ins_cost(150); 3475 ins_encode %{ 3476 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3477 %} 3478 ins_pipe(pipe_slow); 3479 %} 3480 3481 instruct absF_reg(regF dst) %{ 3482 predicate((UseSSE>=1) && (UseAVX == 0)); 3483 match(Set dst (AbsF dst)); 3484 ins_cost(150); 3485 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3486 ins_encode %{ 3487 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3488 %} 3489 ins_pipe(pipe_slow); 3490 %} 3491 3492 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3493 predicate(UseAVX > 0); 3494 match(Set dst (AbsF src)); 3495 ins_cost(150); 3496 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3497 ins_encode %{ 3498 int vlen_enc = Assembler::AVX_128bit; 3499 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3500 ExternalAddress(float_signmask()), vlen_enc); 3501 %} 3502 ins_pipe(pipe_slow); 3503 %} 3504 3505 instruct absD_reg(regD dst) %{ 3506 predicate((UseSSE>=2) && (UseAVX == 0)); 3507 match(Set dst (AbsD dst)); 3508 ins_cost(150); 3509 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3510 "# abs double by sign masking" %} 3511 ins_encode %{ 3512 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3513 %} 3514 ins_pipe(pipe_slow); 3515 %} 3516 3517 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3518 predicate(UseAVX > 0); 3519 match(Set dst (AbsD src)); 3520 ins_cost(150); 3521 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3522 "# abs double by sign masking" %} 3523 ins_encode %{ 3524 int vlen_enc = Assembler::AVX_128bit; 3525 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3526 ExternalAddress(double_signmask()), vlen_enc); 3527 %} 3528 ins_pipe(pipe_slow); 3529 %} 3530 3531 instruct negF_reg(regF dst) %{ 3532 predicate((UseSSE>=1) && (UseAVX == 0)); 3533 match(Set dst (NegF dst)); 3534 ins_cost(150); 3535 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3536 ins_encode %{ 3537 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3538 %} 3539 ins_pipe(pipe_slow); 3540 %} 3541 3542 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3543 predicate(UseAVX > 0); 3544 match(Set dst (NegF src)); 3545 ins_cost(150); 3546 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3547 ins_encode %{ 3548 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3549 ExternalAddress(float_signflip())); 3550 %} 3551 ins_pipe(pipe_slow); 3552 %} 3553 3554 instruct negD_reg(regD dst) %{ 3555 predicate((UseSSE>=2) && (UseAVX == 0)); 3556 match(Set dst (NegD dst)); 3557 ins_cost(150); 3558 format %{ "xorpd $dst, [0x8000000000000000]\t" 3559 "# neg double by sign flipping" %} 3560 ins_encode %{ 3561 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3562 %} 3563 ins_pipe(pipe_slow); 3564 %} 3565 3566 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3567 predicate(UseAVX > 0); 3568 match(Set dst (NegD src)); 3569 ins_cost(150); 3570 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3571 "# neg double by sign flipping" %} 3572 ins_encode %{ 3573 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3574 ExternalAddress(double_signflip())); 3575 %} 3576 ins_pipe(pipe_slow); 3577 %} 3578 3579 // sqrtss instruction needs destination register to be pre initialized for best performance 3580 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3581 instruct sqrtF_reg(regF dst) %{ 3582 predicate(UseSSE>=1); 3583 match(Set dst (SqrtF dst)); 3584 format %{ "sqrtss $dst, $dst" %} 3585 ins_encode %{ 3586 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3587 %} 3588 ins_pipe(pipe_slow); 3589 %} 3590 3591 // sqrtsd instruction needs destination register to be pre initialized for best performance 3592 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3593 instruct sqrtD_reg(regD dst) %{ 3594 predicate(UseSSE>=2); 3595 match(Set dst (SqrtD dst)); 3596 format %{ "sqrtsd $dst, $dst" %} 3597 ins_encode %{ 3598 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3599 %} 3600 ins_pipe(pipe_slow); 3601 %} 3602 3603 3604 // ---------------------------------------- VectorReinterpret ------------------------------------ 3605 instruct reinterpret_mask(kReg dst) %{ 3606 predicate(n->bottom_type()->isa_vectmask() && 3607 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3608 match(Set dst (VectorReinterpret dst)); 3609 ins_cost(125); 3610 format %{ "vector_reinterpret $dst\t!" %} 3611 ins_encode %{ 3612 // empty 3613 %} 3614 ins_pipe( pipe_slow ); 3615 %} 3616 3617 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3618 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3619 n->bottom_type()->isa_vectmask() && 3620 n->in(1)->bottom_type()->isa_vectmask() && 3621 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3622 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3623 match(Set dst (VectorReinterpret src)); 3624 effect(TEMP xtmp); 3625 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3626 ins_encode %{ 3627 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3628 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3629 assert(src_sz == dst_sz , "src and dst size mismatch"); 3630 int vlen_enc = vector_length_encoding(src_sz); 3631 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3632 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3633 %} 3634 ins_pipe( pipe_slow ); 3635 %} 3636 3637 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3638 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3639 n->bottom_type()->isa_vectmask() && 3640 n->in(1)->bottom_type()->isa_vectmask() && 3641 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3642 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3643 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3644 match(Set dst (VectorReinterpret src)); 3645 effect(TEMP xtmp); 3646 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3647 ins_encode %{ 3648 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3649 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3650 assert(src_sz == dst_sz , "src and dst size mismatch"); 3651 int vlen_enc = vector_length_encoding(src_sz); 3652 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3653 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3654 %} 3655 ins_pipe( pipe_slow ); 3656 %} 3657 3658 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3659 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3660 n->bottom_type()->isa_vectmask() && 3661 n->in(1)->bottom_type()->isa_vectmask() && 3662 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3663 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3664 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3665 match(Set dst (VectorReinterpret src)); 3666 effect(TEMP xtmp); 3667 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3668 ins_encode %{ 3669 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3670 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3671 assert(src_sz == dst_sz , "src and dst size mismatch"); 3672 int vlen_enc = vector_length_encoding(src_sz); 3673 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3674 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3675 %} 3676 ins_pipe( pipe_slow ); 3677 %} 3678 3679 instruct reinterpret(vec dst) %{ 3680 predicate(!n->bottom_type()->isa_vectmask() && 3681 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3682 match(Set dst (VectorReinterpret dst)); 3683 ins_cost(125); 3684 format %{ "vector_reinterpret $dst\t!" %} 3685 ins_encode %{ 3686 // empty 3687 %} 3688 ins_pipe( pipe_slow ); 3689 %} 3690 3691 instruct reinterpret_expand(vec dst, vec src, rRegP scratch) %{ 3692 predicate(UseAVX == 0 && 3693 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3694 match(Set dst (VectorReinterpret src)); 3695 ins_cost(125); 3696 effect(TEMP dst, TEMP scratch); 3697 format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %} 3698 ins_encode %{ 3699 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3700 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3701 3702 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3703 if (src_vlen_in_bytes == 4) { 3704 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register); 3705 } else { 3706 assert(src_vlen_in_bytes == 8, ""); 3707 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), $scratch$$Register); 3708 } 3709 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3710 %} 3711 ins_pipe( pipe_slow ); 3712 %} 3713 3714 instruct vreinterpret_expand4(legVec dst, vec src, rRegP scratch) %{ 3715 predicate(UseAVX > 0 && 3716 !n->bottom_type()->isa_vectmask() && 3717 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3718 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3719 match(Set dst (VectorReinterpret src)); 3720 ins_cost(125); 3721 effect(TEMP scratch); 3722 format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %} 3723 ins_encode %{ 3724 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, $scratch$$Register); 3725 %} 3726 ins_pipe( pipe_slow ); 3727 %} 3728 3729 3730 instruct vreinterpret_expand(legVec dst, vec src) %{ 3731 predicate(UseAVX > 0 && 3732 !n->bottom_type()->isa_vectmask() && 3733 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3734 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3735 match(Set dst (VectorReinterpret src)); 3736 ins_cost(125); 3737 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3738 ins_encode %{ 3739 switch (Matcher::vector_length_in_bytes(this, $src)) { 3740 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3741 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3742 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3743 default: ShouldNotReachHere(); 3744 } 3745 %} 3746 ins_pipe( pipe_slow ); 3747 %} 3748 3749 instruct reinterpret_shrink(vec dst, legVec src) %{ 3750 predicate(!n->bottom_type()->isa_vectmask() && 3751 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3752 match(Set dst (VectorReinterpret src)); 3753 ins_cost(125); 3754 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3755 ins_encode %{ 3756 switch (Matcher::vector_length_in_bytes(this)) { 3757 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3758 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3759 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3760 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3761 default: ShouldNotReachHere(); 3762 } 3763 %} 3764 ins_pipe( pipe_slow ); 3765 %} 3766 3767 // ---------------------------------------------------------------------------------------------------- 3768 3769 #ifdef _LP64 3770 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3771 match(Set dst (RoundDoubleMode src rmode)); 3772 format %{ "roundsd $dst,$src" %} 3773 ins_cost(150); 3774 ins_encode %{ 3775 assert(UseSSE >= 4, "required"); 3776 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3777 %} 3778 ins_pipe(pipe_slow); 3779 %} 3780 3781 instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{ 3782 match(Set dst (RoundDoubleMode (LoadD src) rmode)); 3783 format %{ "roundsd $dst,$src" %} 3784 ins_cost(150); 3785 ins_encode %{ 3786 assert(UseSSE >= 4, "required"); 3787 __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant); 3788 %} 3789 ins_pipe(pipe_slow); 3790 %} 3791 3792 instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{ 3793 match(Set dst (RoundDoubleMode con rmode)); 3794 effect(TEMP scratch_reg); 3795 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3796 ins_cost(150); 3797 ins_encode %{ 3798 assert(UseSSE >= 4, "required"); 3799 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, $scratch_reg$$Register); 3800 %} 3801 ins_pipe(pipe_slow); 3802 %} 3803 3804 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3805 predicate(Matcher::vector_length(n) < 8); 3806 match(Set dst (RoundDoubleModeV src rmode)); 3807 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3808 ins_encode %{ 3809 assert(UseAVX > 0, "required"); 3810 int vlen_enc = vector_length_encoding(this); 3811 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3812 %} 3813 ins_pipe( pipe_slow ); 3814 %} 3815 3816 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3817 predicate(Matcher::vector_length(n) == 8); 3818 match(Set dst (RoundDoubleModeV src rmode)); 3819 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3820 ins_encode %{ 3821 assert(UseAVX > 2, "required"); 3822 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3823 %} 3824 ins_pipe( pipe_slow ); 3825 %} 3826 3827 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3828 predicate(Matcher::vector_length(n) < 8); 3829 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3830 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3831 ins_encode %{ 3832 assert(UseAVX > 0, "required"); 3833 int vlen_enc = vector_length_encoding(this); 3834 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3835 %} 3836 ins_pipe( pipe_slow ); 3837 %} 3838 3839 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3840 predicate(Matcher::vector_length(n) == 8); 3841 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3842 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3843 ins_encode %{ 3844 assert(UseAVX > 2, "required"); 3845 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3846 %} 3847 ins_pipe( pipe_slow ); 3848 %} 3849 #endif // _LP64 3850 3851 instruct onspinwait() %{ 3852 match(OnSpinWait); 3853 ins_cost(200); 3854 3855 format %{ 3856 $$template 3857 $$emit$$"pause\t! membar_onspinwait" 3858 %} 3859 ins_encode %{ 3860 __ pause(); 3861 %} 3862 ins_pipe(pipe_slow); 3863 %} 3864 3865 // a * b + c 3866 instruct fmaD_reg(regD a, regD b, regD c) %{ 3867 predicate(UseFMA); 3868 match(Set c (FmaD c (Binary a b))); 3869 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3870 ins_cost(150); 3871 ins_encode %{ 3872 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3873 %} 3874 ins_pipe( pipe_slow ); 3875 %} 3876 3877 // a * b + c 3878 instruct fmaF_reg(regF a, regF b, regF c) %{ 3879 predicate(UseFMA); 3880 match(Set c (FmaF c (Binary a b))); 3881 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3882 ins_cost(150); 3883 ins_encode %{ 3884 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3885 %} 3886 ins_pipe( pipe_slow ); 3887 %} 3888 3889 // ====================VECTOR INSTRUCTIONS===================================== 3890 3891 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3892 instruct MoveVec2Leg(legVec dst, vec src) %{ 3893 match(Set dst src); 3894 format %{ "" %} 3895 ins_encode %{ 3896 ShouldNotReachHere(); 3897 %} 3898 ins_pipe( fpu_reg_reg ); 3899 %} 3900 3901 instruct MoveLeg2Vec(vec dst, legVec src) %{ 3902 match(Set dst src); 3903 format %{ "" %} 3904 ins_encode %{ 3905 ShouldNotReachHere(); 3906 %} 3907 ins_pipe( fpu_reg_reg ); 3908 %} 3909 3910 // ============================================================================ 3911 3912 // Load vectors generic operand pattern 3913 instruct loadV(vec dst, memory mem) %{ 3914 match(Set dst (LoadVector mem)); 3915 ins_cost(125); 3916 format %{ "load_vector $dst,$mem" %} 3917 ins_encode %{ 3918 __ load_vector($dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 3919 %} 3920 ins_pipe( pipe_slow ); 3921 %} 3922 3923 // Store vectors generic operand pattern. 3924 instruct storeV(memory mem, vec src) %{ 3925 match(Set mem (StoreVector mem src)); 3926 ins_cost(145); 3927 format %{ "store_vector $mem,$src\n\t" %} 3928 ins_encode %{ 3929 switch (Matcher::vector_length_in_bytes(this, $src)) { 3930 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 3931 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 3932 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 3933 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 3934 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 3935 default: ShouldNotReachHere(); 3936 } 3937 %} 3938 ins_pipe( pipe_slow ); 3939 %} 3940 3941 // ---------------------------------------- Gather ------------------------------------ 3942 3943 // Gather INT, LONG, FLOAT, DOUBLE 3944 3945 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 3946 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 3947 match(Set dst (LoadVectorGather mem idx)); 3948 effect(TEMP dst, TEMP tmp, TEMP mask); 3949 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 3950 ins_encode %{ 3951 assert(UseAVX >= 2, "sanity"); 3952 3953 int vlen_enc = vector_length_encoding(this); 3954 BasicType elem_bt = Matcher::vector_element_basic_type(this); 3955 3956 assert(Matcher::vector_length_in_bytes(this) >= 16, "sanity"); 3957 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3958 3959 if (vlen_enc == Assembler::AVX_128bit) { 3960 __ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set())); 3961 } else { 3962 __ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set())); 3963 } 3964 __ lea($tmp$$Register, $mem$$Address); 3965 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 3966 %} 3967 ins_pipe( pipe_slow ); 3968 %} 3969 3970 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 3971 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 3972 match(Set dst (LoadVectorGather mem idx)); 3973 effect(TEMP dst, TEMP tmp, TEMP ktmp); 3974 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 3975 ins_encode %{ 3976 assert(UseAVX > 2, "sanity"); 3977 3978 int vlen_enc = vector_length_encoding(this); 3979 BasicType elem_bt = Matcher::vector_element_basic_type(this); 3980 3981 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3982 3983 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register); 3984 __ lea($tmp$$Register, $mem$$Address); 3985 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 3986 %} 3987 ins_pipe( pipe_slow ); 3988 %} 3989 3990 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 3991 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 3992 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 3993 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 3994 ins_encode %{ 3995 assert(UseAVX > 2, "sanity"); 3996 int vlen_enc = vector_length_encoding(this); 3997 BasicType elem_bt = Matcher::vector_element_basic_type(this); 3998 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3999 // Note: Since gather instruction partially updates the opmask register used 4000 // for predication hense moving mask operand to a temporary. 4001 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4002 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4003 __ lea($tmp$$Register, $mem$$Address); 4004 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4005 %} 4006 ins_pipe( pipe_slow ); 4007 %} 4008 // ====================Scatter======================================= 4009 4010 // Scatter INT, LONG, FLOAT, DOUBLE 4011 4012 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4013 predicate(UseAVX > 2); 4014 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4015 effect(TEMP tmp, TEMP ktmp); 4016 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4017 ins_encode %{ 4018 int vlen_enc = vector_length_encoding(this, $src); 4019 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4020 4021 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4022 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4023 4024 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register); 4025 __ lea($tmp$$Register, $mem$$Address); 4026 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4027 %} 4028 ins_pipe( pipe_slow ); 4029 %} 4030 4031 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4032 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4033 effect(TEMP tmp, TEMP ktmp); 4034 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4035 ins_encode %{ 4036 int vlen_enc = vector_length_encoding(this, $src); 4037 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4038 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4039 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4040 // Note: Since scatter instruction partially updates the opmask register used 4041 // for predication hense moving mask operand to a temporary. 4042 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4043 __ lea($tmp$$Register, $mem$$Address); 4044 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4045 %} 4046 ins_pipe( pipe_slow ); 4047 %} 4048 4049 // ====================REPLICATE======================================= 4050 4051 // Replicate byte scalar to be vector 4052 instruct ReplB_reg(vec dst, rRegI src) %{ 4053 match(Set dst (ReplicateB src)); 4054 format %{ "replicateB $dst,$src" %} 4055 ins_encode %{ 4056 uint vlen = Matcher::vector_length(this); 4057 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4058 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4059 int vlen_enc = vector_length_encoding(this); 4060 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4061 } else if (VM_Version::supports_avx2()) { 4062 int vlen_enc = vector_length_encoding(this); 4063 __ movdl($dst$$XMMRegister, $src$$Register); 4064 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4065 } else { 4066 __ movdl($dst$$XMMRegister, $src$$Register); 4067 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4068 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4069 if (vlen >= 16) { 4070 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4071 if (vlen >= 32) { 4072 assert(vlen == 32, "sanity"); 4073 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4074 } 4075 } 4076 } 4077 %} 4078 ins_pipe( pipe_slow ); 4079 %} 4080 4081 instruct ReplB_mem(vec dst, memory mem) %{ 4082 predicate(VM_Version::supports_avx2()); 4083 match(Set dst (ReplicateB (LoadB mem))); 4084 format %{ "replicateB $dst,$mem" %} 4085 ins_encode %{ 4086 int vlen_enc = vector_length_encoding(this); 4087 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4088 %} 4089 ins_pipe( pipe_slow ); 4090 %} 4091 4092 instruct ReplB_imm(vec dst, immI con) %{ 4093 match(Set dst (ReplicateB con)); 4094 format %{ "replicateB $dst,$con" %} 4095 ins_encode %{ 4096 InternalAddress addr = $constantaddress(T_BYTE, vreplicate_imm(T_BYTE, $con$$constant, Matcher::vector_length(this))); 4097 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4098 %} 4099 ins_pipe( pipe_slow ); 4100 %} 4101 4102 // ====================ReplicateS======================================= 4103 4104 instruct ReplS_reg(vec dst, rRegI src) %{ 4105 match(Set dst (ReplicateS src)); 4106 format %{ "replicateS $dst,$src" %} 4107 ins_encode %{ 4108 uint vlen = Matcher::vector_length(this); 4109 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4110 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4111 int vlen_enc = vector_length_encoding(this); 4112 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4113 } else if (VM_Version::supports_avx2()) { 4114 int vlen_enc = vector_length_encoding(this); 4115 __ movdl($dst$$XMMRegister, $src$$Register); 4116 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4117 } else { 4118 __ movdl($dst$$XMMRegister, $src$$Register); 4119 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4120 if (vlen >= 8) { 4121 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4122 if (vlen >= 16) { 4123 assert(vlen == 16, "sanity"); 4124 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4125 } 4126 } 4127 } 4128 %} 4129 ins_pipe( pipe_slow ); 4130 %} 4131 4132 instruct ReplS_mem(vec dst, memory mem) %{ 4133 predicate(VM_Version::supports_avx2()); 4134 match(Set dst (ReplicateS (LoadS mem))); 4135 format %{ "replicateS $dst,$mem" %} 4136 ins_encode %{ 4137 int vlen_enc = vector_length_encoding(this); 4138 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4139 %} 4140 ins_pipe( pipe_slow ); 4141 %} 4142 4143 instruct ReplS_imm(vec dst, immI con) %{ 4144 match(Set dst (ReplicateS con)); 4145 format %{ "replicateS $dst,$con" %} 4146 ins_encode %{ 4147 InternalAddress addr = $constantaddress(T_SHORT, vreplicate_imm(T_SHORT, $con$$constant, Matcher::vector_length(this))); 4148 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4149 %} 4150 ins_pipe( pipe_slow ); 4151 %} 4152 4153 // ====================ReplicateI======================================= 4154 4155 instruct ReplI_reg(vec dst, rRegI src) %{ 4156 match(Set dst (ReplicateI src)); 4157 format %{ "replicateI $dst,$src" %} 4158 ins_encode %{ 4159 uint vlen = Matcher::vector_length(this); 4160 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4161 int vlen_enc = vector_length_encoding(this); 4162 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4163 } else if (VM_Version::supports_avx2()) { 4164 int vlen_enc = vector_length_encoding(this); 4165 __ movdl($dst$$XMMRegister, $src$$Register); 4166 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4167 } else { 4168 __ movdl($dst$$XMMRegister, $src$$Register); 4169 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4170 if (vlen >= 8) { 4171 assert(vlen == 8, "sanity"); 4172 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4173 } 4174 } 4175 %} 4176 ins_pipe( pipe_slow ); 4177 %} 4178 4179 instruct ReplI_mem(vec dst, memory mem) %{ 4180 match(Set dst (ReplicateI (LoadI mem))); 4181 format %{ "replicateI $dst,$mem" %} 4182 ins_encode %{ 4183 uint vlen = Matcher::vector_length(this); 4184 if (vlen <= 4) { 4185 __ movdl($dst$$XMMRegister, $mem$$Address); 4186 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4187 } else { 4188 assert(VM_Version::supports_avx2(), "sanity"); 4189 int vlen_enc = vector_length_encoding(this); 4190 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4191 } 4192 %} 4193 ins_pipe( pipe_slow ); 4194 %} 4195 4196 instruct ReplI_imm(vec dst, immI con) %{ 4197 match(Set dst (ReplicateI con)); 4198 format %{ "replicateI $dst,$con" %} 4199 ins_encode %{ 4200 InternalAddress addr = $constantaddress(T_INT, vreplicate_imm(T_INT, $con$$constant, Matcher::vector_length(this))); 4201 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4202 %} 4203 ins_pipe( pipe_slow ); 4204 %} 4205 4206 // Replicate scalar zero to be vector 4207 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4208 match(Set dst (ReplicateB zero)); 4209 match(Set dst (ReplicateS zero)); 4210 match(Set dst (ReplicateI zero)); 4211 format %{ "replicateI $dst,$zero" %} 4212 ins_encode %{ 4213 uint vsize = Matcher::vector_length_in_bytes(this); 4214 if (vsize <= 16) { 4215 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4216 } else { 4217 int vlen_enc = vector_length_encoding(this); 4218 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4219 } 4220 %} 4221 ins_pipe( fpu_reg_reg ); 4222 %} 4223 4224 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4225 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) >= 16); 4226 match(Set dst (ReplicateB con)); 4227 match(Set dst (ReplicateS con)); 4228 match(Set dst (ReplicateI con)); 4229 effect(TEMP dst); 4230 format %{ "vallones $dst" %} 4231 ins_encode %{ 4232 int vector_len = vector_length_encoding(this); 4233 __ vallones($dst$$XMMRegister, vector_len); 4234 %} 4235 ins_pipe( pipe_slow ); 4236 %} 4237 4238 // ====================ReplicateL======================================= 4239 4240 #ifdef _LP64 4241 // Replicate long (8 byte) scalar to be vector 4242 instruct ReplL_reg(vec dst, rRegL src) %{ 4243 match(Set dst (ReplicateL src)); 4244 format %{ "replicateL $dst,$src" %} 4245 ins_encode %{ 4246 uint vlen = Matcher::vector_length(this); 4247 if (vlen == 2) { 4248 __ movdq($dst$$XMMRegister, $src$$Register); 4249 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4250 } else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4251 int vlen_enc = vector_length_encoding(this); 4252 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4253 } else if (VM_Version::supports_avx2()) { 4254 assert(vlen == 4, "sanity"); 4255 int vlen_enc = vector_length_encoding(this); 4256 __ movdq($dst$$XMMRegister, $src$$Register); 4257 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4258 } else { 4259 assert(vlen == 4, "sanity"); 4260 __ movdq($dst$$XMMRegister, $src$$Register); 4261 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4262 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4263 } 4264 %} 4265 ins_pipe( pipe_slow ); 4266 %} 4267 #else // _LP64 4268 // Replicate long (8 byte) scalar to be vector 4269 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 4270 predicate(Matcher::vector_length(n) <= 4); 4271 match(Set dst (ReplicateL src)); 4272 effect(TEMP dst, USE src, TEMP tmp); 4273 format %{ "replicateL $dst,$src" %} 4274 ins_encode %{ 4275 uint vlen = Matcher::vector_length(this); 4276 if (vlen == 2) { 4277 __ movdl($dst$$XMMRegister, $src$$Register); 4278 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4279 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4280 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4281 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4282 int vlen_enc = Assembler::AVX_256bit; 4283 __ movdl($dst$$XMMRegister, $src$$Register); 4284 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4285 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4286 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4287 } else { 4288 __ movdl($dst$$XMMRegister, $src$$Register); 4289 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4290 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4291 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4292 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4293 } 4294 %} 4295 ins_pipe( pipe_slow ); 4296 %} 4297 4298 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 4299 predicate(Matcher::vector_length(n) == 8); 4300 match(Set dst (ReplicateL src)); 4301 effect(TEMP dst, USE src, TEMP tmp); 4302 format %{ "replicateL $dst,$src" %} 4303 ins_encode %{ 4304 if (VM_Version::supports_avx512vl()) { 4305 __ movdl($dst$$XMMRegister, $src$$Register); 4306 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4307 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4308 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4309 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4310 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4311 } else { 4312 int vlen_enc = Assembler::AVX_512bit; 4313 __ movdl($dst$$XMMRegister, $src$$Register); 4314 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4315 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4316 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4317 } 4318 %} 4319 ins_pipe( pipe_slow ); 4320 %} 4321 #endif // _LP64 4322 4323 instruct ReplL_mem(vec dst, memory mem) %{ 4324 match(Set dst (ReplicateL (LoadL mem))); 4325 format %{ "replicateL $dst,$mem" %} 4326 ins_encode %{ 4327 uint vlen = Matcher::vector_length(this); 4328 if (vlen == 2) { 4329 __ movq($dst$$XMMRegister, $mem$$Address); 4330 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4331 } else { 4332 assert(VM_Version::supports_avx2(), "sanity"); 4333 int vlen_enc = vector_length_encoding(this); 4334 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4335 } 4336 %} 4337 ins_pipe( pipe_slow ); 4338 %} 4339 4340 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4341 instruct ReplL_imm(vec dst, immL con) %{ 4342 match(Set dst (ReplicateL con)); 4343 format %{ "replicateL $dst,$con" %} 4344 ins_encode %{ 4345 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, Matcher::vector_length(this))); 4346 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4347 %} 4348 ins_pipe( pipe_slow ); 4349 %} 4350 4351 instruct ReplL_zero(vec dst, immL0 zero) %{ 4352 match(Set dst (ReplicateL zero)); 4353 format %{ "replicateL $dst,$zero" %} 4354 ins_encode %{ 4355 int vlen = Matcher::vector_length(this); 4356 if (vlen == 2) { 4357 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4358 } else { 4359 int vlen_enc = vector_length_encoding(this); 4360 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4361 } 4362 %} 4363 ins_pipe( fpu_reg_reg ); 4364 %} 4365 4366 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4367 predicate(UseAVX > 0); 4368 match(Set dst (ReplicateL con)); 4369 effect(TEMP dst); 4370 format %{ "vallones $dst" %} 4371 ins_encode %{ 4372 int vector_len = vector_length_encoding(this); 4373 __ vallones($dst$$XMMRegister, vector_len); 4374 %} 4375 ins_pipe( pipe_slow ); 4376 %} 4377 4378 // ====================ReplicateF======================================= 4379 4380 instruct ReplF_reg(vec dst, vlRegF src) %{ 4381 match(Set dst (ReplicateF src)); 4382 format %{ "replicateF $dst,$src" %} 4383 ins_encode %{ 4384 uint vlen = Matcher::vector_length(this); 4385 if (vlen <= 4) { 4386 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4387 } else if (VM_Version::supports_avx2()) { 4388 int vlen_enc = vector_length_encoding(this); 4389 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4390 } else { 4391 assert(vlen == 8, "sanity"); 4392 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4393 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4394 } 4395 %} 4396 ins_pipe( pipe_slow ); 4397 %} 4398 4399 instruct ReplF_mem(vec dst, memory mem) %{ 4400 match(Set dst (ReplicateF (LoadF mem))); 4401 format %{ "replicateF $dst,$mem" %} 4402 ins_encode %{ 4403 uint vlen = Matcher::vector_length(this); 4404 if (vlen <= 4) { 4405 __ movdl($dst$$XMMRegister, $mem$$Address); 4406 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4407 } else { 4408 assert(VM_Version::supports_avx(), "sanity"); 4409 int vlen_enc = vector_length_encoding(this); 4410 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4411 } 4412 %} 4413 ins_pipe( pipe_slow ); 4414 %} 4415 4416 // Replicate float scalar immediate to be vector by loading from const table. 4417 instruct ReplF_imm(vec dst, immF con) %{ 4418 match(Set dst (ReplicateF con)); 4419 format %{ "replicateF $dst,$con" %} 4420 ins_encode %{ 4421 InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant, Matcher::vector_length(this))); 4422 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4423 %} 4424 ins_pipe( pipe_slow ); 4425 %} 4426 4427 instruct ReplF_zero(vec dst, immF0 zero) %{ 4428 match(Set dst (ReplicateF zero)); 4429 format %{ "replicateF $dst,$zero" %} 4430 ins_encode %{ 4431 uint vlen = Matcher::vector_length(this); 4432 if (vlen <= 4) { 4433 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4434 } else { 4435 int vlen_enc = vector_length_encoding(this); 4436 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 4437 } 4438 %} 4439 ins_pipe( fpu_reg_reg ); 4440 %} 4441 4442 // ====================ReplicateD======================================= 4443 4444 // Replicate double (8 bytes) scalar to be vector 4445 instruct ReplD_reg(vec dst, vlRegD src) %{ 4446 match(Set dst (ReplicateD src)); 4447 format %{ "replicateD $dst,$src" %} 4448 ins_encode %{ 4449 uint vlen = Matcher::vector_length(this); 4450 if (vlen == 2) { 4451 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4452 } else if (VM_Version::supports_avx2()) { 4453 int vlen_enc = vector_length_encoding(this); 4454 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4455 } else { 4456 assert(vlen == 4, "sanity"); 4457 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4458 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4459 } 4460 %} 4461 ins_pipe( pipe_slow ); 4462 %} 4463 4464 instruct ReplD_mem(vec dst, memory mem) %{ 4465 match(Set dst (ReplicateD (LoadD mem))); 4466 format %{ "replicateD $dst,$mem" %} 4467 ins_encode %{ 4468 uint vlen = Matcher::vector_length(this); 4469 if (vlen == 2) { 4470 __ movq($dst$$XMMRegister, $mem$$Address); 4471 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44); 4472 } else { 4473 assert(VM_Version::supports_avx(), "sanity"); 4474 int vlen_enc = vector_length_encoding(this); 4475 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4476 } 4477 %} 4478 ins_pipe( pipe_slow ); 4479 %} 4480 4481 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4482 instruct ReplD_imm(vec dst, immD con) %{ 4483 match(Set dst (ReplicateD con)); 4484 format %{ "replicateD $dst,$con" %} 4485 ins_encode %{ 4486 InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, Matcher::vector_length(this))); 4487 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4488 %} 4489 ins_pipe( pipe_slow ); 4490 %} 4491 4492 instruct ReplD_zero(vec dst, immD0 zero) %{ 4493 match(Set dst (ReplicateD zero)); 4494 format %{ "replicateD $dst,$zero" %} 4495 ins_encode %{ 4496 uint vlen = Matcher::vector_length(this); 4497 if (vlen == 2) { 4498 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 4499 } else { 4500 int vlen_enc = vector_length_encoding(this); 4501 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 4502 } 4503 %} 4504 ins_pipe( fpu_reg_reg ); 4505 %} 4506 4507 // ====================VECTOR INSERT======================================= 4508 4509 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4510 predicate(Matcher::vector_length_in_bytes(n) < 32); 4511 match(Set dst (VectorInsert (Binary dst val) idx)); 4512 format %{ "vector_insert $dst,$val,$idx" %} 4513 ins_encode %{ 4514 assert(UseSSE >= 4, "required"); 4515 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4516 4517 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4518 4519 assert(is_integral_type(elem_bt), ""); 4520 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4521 4522 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4523 %} 4524 ins_pipe( pipe_slow ); 4525 %} 4526 4527 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4528 predicate(Matcher::vector_length_in_bytes(n) == 32); 4529 match(Set dst (VectorInsert (Binary src val) idx)); 4530 effect(TEMP vtmp); 4531 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4532 ins_encode %{ 4533 int vlen_enc = Assembler::AVX_256bit; 4534 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4535 int elem_per_lane = 16/type2aelembytes(elem_bt); 4536 int log2epr = log2(elem_per_lane); 4537 4538 assert(is_integral_type(elem_bt), "sanity"); 4539 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4540 4541 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4542 uint y_idx = ($idx$$constant >> log2epr) & 1; 4543 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4544 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4545 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4546 %} 4547 ins_pipe( pipe_slow ); 4548 %} 4549 4550 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4551 predicate(Matcher::vector_length_in_bytes(n) == 64); 4552 match(Set dst (VectorInsert (Binary src val) idx)); 4553 effect(TEMP vtmp); 4554 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4555 ins_encode %{ 4556 assert(UseAVX > 2, "sanity"); 4557 4558 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4559 int elem_per_lane = 16/type2aelembytes(elem_bt); 4560 int log2epr = log2(elem_per_lane); 4561 4562 assert(is_integral_type(elem_bt), ""); 4563 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4564 4565 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4566 uint y_idx = ($idx$$constant >> log2epr) & 3; 4567 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4568 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4569 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4570 %} 4571 ins_pipe( pipe_slow ); 4572 %} 4573 4574 #ifdef _LP64 4575 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4576 predicate(Matcher::vector_length(n) == 2); 4577 match(Set dst (VectorInsert (Binary dst val) idx)); 4578 format %{ "vector_insert $dst,$val,$idx" %} 4579 ins_encode %{ 4580 assert(UseSSE >= 4, "required"); 4581 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4582 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4583 4584 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4585 %} 4586 ins_pipe( pipe_slow ); 4587 %} 4588 4589 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4590 predicate(Matcher::vector_length(n) == 4); 4591 match(Set dst (VectorInsert (Binary src val) idx)); 4592 effect(TEMP vtmp); 4593 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4594 ins_encode %{ 4595 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4596 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4597 4598 uint x_idx = $idx$$constant & right_n_bits(1); 4599 uint y_idx = ($idx$$constant >> 1) & 1; 4600 int vlen_enc = Assembler::AVX_256bit; 4601 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4602 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4603 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4604 %} 4605 ins_pipe( pipe_slow ); 4606 %} 4607 4608 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4609 predicate(Matcher::vector_length(n) == 8); 4610 match(Set dst (VectorInsert (Binary src val) idx)); 4611 effect(TEMP vtmp); 4612 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4613 ins_encode %{ 4614 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4615 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4616 4617 uint x_idx = $idx$$constant & right_n_bits(1); 4618 uint y_idx = ($idx$$constant >> 1) & 3; 4619 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4620 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4621 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4622 %} 4623 ins_pipe( pipe_slow ); 4624 %} 4625 #endif 4626 4627 instruct insertF(vec dst, regF val, immU8 idx) %{ 4628 predicate(Matcher::vector_length(n) < 8); 4629 match(Set dst (VectorInsert (Binary dst val) idx)); 4630 format %{ "vector_insert $dst,$val,$idx" %} 4631 ins_encode %{ 4632 assert(UseSSE >= 4, "sanity"); 4633 4634 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4635 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4636 4637 uint x_idx = $idx$$constant & right_n_bits(2); 4638 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4639 %} 4640 ins_pipe( pipe_slow ); 4641 %} 4642 4643 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4644 predicate(Matcher::vector_length(n) >= 8); 4645 match(Set dst (VectorInsert (Binary src val) idx)); 4646 effect(TEMP vtmp); 4647 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4648 ins_encode %{ 4649 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4650 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4651 4652 int vlen = Matcher::vector_length(this); 4653 uint x_idx = $idx$$constant & right_n_bits(2); 4654 if (vlen == 8) { 4655 uint y_idx = ($idx$$constant >> 2) & 1; 4656 int vlen_enc = Assembler::AVX_256bit; 4657 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4658 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4659 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4660 } else { 4661 assert(vlen == 16, "sanity"); 4662 uint y_idx = ($idx$$constant >> 2) & 3; 4663 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4664 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4665 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4666 } 4667 %} 4668 ins_pipe( pipe_slow ); 4669 %} 4670 4671 #ifdef _LP64 4672 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4673 predicate(Matcher::vector_length(n) == 2); 4674 match(Set dst (VectorInsert (Binary dst val) idx)); 4675 effect(TEMP tmp); 4676 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4677 ins_encode %{ 4678 assert(UseSSE >= 4, "sanity"); 4679 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4680 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4681 4682 __ movq($tmp$$Register, $val$$XMMRegister); 4683 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4684 %} 4685 ins_pipe( pipe_slow ); 4686 %} 4687 4688 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4689 predicate(Matcher::vector_length(n) == 4); 4690 match(Set dst (VectorInsert (Binary src val) idx)); 4691 effect(TEMP vtmp, TEMP tmp); 4692 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4693 ins_encode %{ 4694 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4695 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4696 4697 uint x_idx = $idx$$constant & right_n_bits(1); 4698 uint y_idx = ($idx$$constant >> 1) & 1; 4699 int vlen_enc = Assembler::AVX_256bit; 4700 __ movq($tmp$$Register, $val$$XMMRegister); 4701 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4702 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4703 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4704 %} 4705 ins_pipe( pipe_slow ); 4706 %} 4707 4708 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4709 predicate(Matcher::vector_length(n) == 8); 4710 match(Set dst (VectorInsert (Binary src val) idx)); 4711 effect(TEMP tmp, TEMP vtmp); 4712 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4713 ins_encode %{ 4714 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4715 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4716 4717 uint x_idx = $idx$$constant & right_n_bits(1); 4718 uint y_idx = ($idx$$constant >> 1) & 3; 4719 __ movq($tmp$$Register, $val$$XMMRegister); 4720 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4721 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4722 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4723 %} 4724 ins_pipe( pipe_slow ); 4725 %} 4726 #endif 4727 4728 // ====================REDUCTION ARITHMETIC======================================= 4729 4730 // =======================Int Reduction========================================== 4731 4732 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4733 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 4734 match(Set dst (AddReductionVI src1 src2)); 4735 match(Set dst (MulReductionVI src1 src2)); 4736 match(Set dst (AndReductionV src1 src2)); 4737 match(Set dst ( OrReductionV src1 src2)); 4738 match(Set dst (XorReductionV src1 src2)); 4739 match(Set dst (MinReductionV src1 src2)); 4740 match(Set dst (MaxReductionV src1 src2)); 4741 effect(TEMP vtmp1, TEMP vtmp2); 4742 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4743 ins_encode %{ 4744 int opcode = this->ideal_Opcode(); 4745 int vlen = Matcher::vector_length(this, $src2); 4746 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4747 %} 4748 ins_pipe( pipe_slow ); 4749 %} 4750 4751 // =======================Long Reduction========================================== 4752 4753 #ifdef _LP64 4754 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4755 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 4756 match(Set dst (AddReductionVL src1 src2)); 4757 match(Set dst (MulReductionVL src1 src2)); 4758 match(Set dst (AndReductionV src1 src2)); 4759 match(Set dst ( OrReductionV src1 src2)); 4760 match(Set dst (XorReductionV src1 src2)); 4761 match(Set dst (MinReductionV src1 src2)); 4762 match(Set dst (MaxReductionV src1 src2)); 4763 effect(TEMP vtmp1, TEMP vtmp2); 4764 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4765 ins_encode %{ 4766 int opcode = this->ideal_Opcode(); 4767 int vlen = Matcher::vector_length(this, $src2); 4768 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4769 %} 4770 ins_pipe( pipe_slow ); 4771 %} 4772 4773 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 4774 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 4775 match(Set dst (AddReductionVL src1 src2)); 4776 match(Set dst (MulReductionVL src1 src2)); 4777 match(Set dst (AndReductionV src1 src2)); 4778 match(Set dst ( OrReductionV src1 src2)); 4779 match(Set dst (XorReductionV src1 src2)); 4780 match(Set dst (MinReductionV src1 src2)); 4781 match(Set dst (MaxReductionV src1 src2)); 4782 effect(TEMP vtmp1, TEMP vtmp2); 4783 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4784 ins_encode %{ 4785 int opcode = this->ideal_Opcode(); 4786 int vlen = Matcher::vector_length(this, $src2); 4787 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4788 %} 4789 ins_pipe( pipe_slow ); 4790 %} 4791 #endif // _LP64 4792 4793 // =======================Float Reduction========================================== 4794 4795 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 4796 predicate(Matcher::vector_length(n->in(2)) <= 4); // src 4797 match(Set dst (AddReductionVF dst src)); 4798 match(Set dst (MulReductionVF dst src)); 4799 effect(TEMP dst, TEMP vtmp); 4800 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 4801 ins_encode %{ 4802 int opcode = this->ideal_Opcode(); 4803 int vlen = Matcher::vector_length(this, $src); 4804 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4805 %} 4806 ins_pipe( pipe_slow ); 4807 %} 4808 4809 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 4810 predicate(Matcher::vector_length(n->in(2)) == 8); // src 4811 match(Set dst (AddReductionVF dst src)); 4812 match(Set dst (MulReductionVF dst src)); 4813 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4814 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4815 ins_encode %{ 4816 int opcode = this->ideal_Opcode(); 4817 int vlen = Matcher::vector_length(this, $src); 4818 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4819 %} 4820 ins_pipe( pipe_slow ); 4821 %} 4822 4823 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4824 predicate(Matcher::vector_length(n->in(2)) == 16); // src 4825 match(Set dst (AddReductionVF dst src)); 4826 match(Set dst (MulReductionVF dst src)); 4827 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4828 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4829 ins_encode %{ 4830 int opcode = this->ideal_Opcode(); 4831 int vlen = Matcher::vector_length(this, $src); 4832 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4833 %} 4834 ins_pipe( pipe_slow ); 4835 %} 4836 4837 // =======================Double Reduction========================================== 4838 4839 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 4840 predicate(Matcher::vector_length(n->in(2)) == 2); // src 4841 match(Set dst (AddReductionVD dst src)); 4842 match(Set dst (MulReductionVD dst src)); 4843 effect(TEMP dst, TEMP vtmp); 4844 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 4845 ins_encode %{ 4846 int opcode = this->ideal_Opcode(); 4847 int vlen = Matcher::vector_length(this, $src); 4848 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4849 %} 4850 ins_pipe( pipe_slow ); 4851 %} 4852 4853 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 4854 predicate(Matcher::vector_length(n->in(2)) == 4); // src 4855 match(Set dst (AddReductionVD dst src)); 4856 match(Set dst (MulReductionVD dst src)); 4857 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4858 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4859 ins_encode %{ 4860 int opcode = this->ideal_Opcode(); 4861 int vlen = Matcher::vector_length(this, $src); 4862 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4863 %} 4864 ins_pipe( pipe_slow ); 4865 %} 4866 4867 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4868 predicate(Matcher::vector_length(n->in(2)) == 8); // src 4869 match(Set dst (AddReductionVD dst src)); 4870 match(Set dst (MulReductionVD dst src)); 4871 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4872 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4873 ins_encode %{ 4874 int opcode = this->ideal_Opcode(); 4875 int vlen = Matcher::vector_length(this, $src); 4876 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4877 %} 4878 ins_pipe( pipe_slow ); 4879 %} 4880 4881 // =======================Byte Reduction========================================== 4882 4883 #ifdef _LP64 4884 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4885 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 4886 match(Set dst (AddReductionVI src1 src2)); 4887 match(Set dst (AndReductionV src1 src2)); 4888 match(Set dst ( OrReductionV src1 src2)); 4889 match(Set dst (XorReductionV src1 src2)); 4890 match(Set dst (MinReductionV src1 src2)); 4891 match(Set dst (MaxReductionV src1 src2)); 4892 effect(TEMP vtmp1, TEMP vtmp2); 4893 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4894 ins_encode %{ 4895 int opcode = this->ideal_Opcode(); 4896 int vlen = Matcher::vector_length(this, $src2); 4897 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4898 %} 4899 ins_pipe( pipe_slow ); 4900 %} 4901 4902 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 4903 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 4904 match(Set dst (AddReductionVI src1 src2)); 4905 match(Set dst (AndReductionV src1 src2)); 4906 match(Set dst ( OrReductionV src1 src2)); 4907 match(Set dst (XorReductionV src1 src2)); 4908 match(Set dst (MinReductionV src1 src2)); 4909 match(Set dst (MaxReductionV src1 src2)); 4910 effect(TEMP vtmp1, TEMP vtmp2); 4911 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4912 ins_encode %{ 4913 int opcode = this->ideal_Opcode(); 4914 int vlen = Matcher::vector_length(this, $src2); 4915 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4916 %} 4917 ins_pipe( pipe_slow ); 4918 %} 4919 #endif 4920 4921 // =======================Short Reduction========================================== 4922 4923 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4924 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 4925 match(Set dst (AddReductionVI src1 src2)); 4926 match(Set dst (MulReductionVI src1 src2)); 4927 match(Set dst (AndReductionV src1 src2)); 4928 match(Set dst ( OrReductionV src1 src2)); 4929 match(Set dst (XorReductionV src1 src2)); 4930 match(Set dst (MinReductionV src1 src2)); 4931 match(Set dst (MaxReductionV src1 src2)); 4932 effect(TEMP vtmp1, TEMP vtmp2); 4933 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4934 ins_encode %{ 4935 int opcode = this->ideal_Opcode(); 4936 int vlen = Matcher::vector_length(this, $src2); 4937 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4938 %} 4939 ins_pipe( pipe_slow ); 4940 %} 4941 4942 // =======================Mul Reduction========================================== 4943 4944 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 4945 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 4946 Matcher::vector_length(n->in(2)) <= 32); // src2 4947 match(Set dst (MulReductionVI src1 src2)); 4948 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4949 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 4950 ins_encode %{ 4951 int opcode = this->ideal_Opcode(); 4952 int vlen = Matcher::vector_length(this, $src2); 4953 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4954 %} 4955 ins_pipe( pipe_slow ); 4956 %} 4957 4958 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4959 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 4960 Matcher::vector_length(n->in(2)) == 64); // src2 4961 match(Set dst (MulReductionVI src1 src2)); 4962 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4963 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 4964 ins_encode %{ 4965 int opcode = this->ideal_Opcode(); 4966 int vlen = Matcher::vector_length(this, $src2); 4967 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4968 %} 4969 ins_pipe( pipe_slow ); 4970 %} 4971 4972 //--------------------Min/Max Float Reduction -------------------- 4973 // Float Min Reduction 4974 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 4975 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 4976 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 4977 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 4978 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 4979 Matcher::vector_length(n->in(2)) == 2); 4980 match(Set dst (MinReductionV src1 src2)); 4981 match(Set dst (MaxReductionV src1 src2)); 4982 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 4983 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 4984 ins_encode %{ 4985 assert(UseAVX > 0, "sanity"); 4986 4987 int opcode = this->ideal_Opcode(); 4988 int vlen = Matcher::vector_length(this, $src2); 4989 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 4990 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 4991 %} 4992 ins_pipe( pipe_slow ); 4993 %} 4994 4995 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 4996 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 4997 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 4998 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 4999 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5000 Matcher::vector_length(n->in(2)) >= 4); 5001 match(Set dst (MinReductionV src1 src2)); 5002 match(Set dst (MaxReductionV src1 src2)); 5003 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5004 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5005 ins_encode %{ 5006 assert(UseAVX > 0, "sanity"); 5007 5008 int opcode = this->ideal_Opcode(); 5009 int vlen = Matcher::vector_length(this, $src2); 5010 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5011 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5012 %} 5013 ins_pipe( pipe_slow ); 5014 %} 5015 5016 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 5017 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5018 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5019 Matcher::vector_length(n->in(2)) == 2); 5020 match(Set dst (MinReductionV dst src)); 5021 match(Set dst (MaxReductionV dst src)); 5022 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5023 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5024 ins_encode %{ 5025 assert(UseAVX > 0, "sanity"); 5026 5027 int opcode = this->ideal_Opcode(); 5028 int vlen = Matcher::vector_length(this, $src); 5029 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5030 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5031 %} 5032 ins_pipe( pipe_slow ); 5033 %} 5034 5035 5036 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5037 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5038 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5039 Matcher::vector_length(n->in(2)) >= 4); 5040 match(Set dst (MinReductionV dst src)); 5041 match(Set dst (MaxReductionV dst src)); 5042 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5043 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5044 ins_encode %{ 5045 assert(UseAVX > 0, "sanity"); 5046 5047 int opcode = this->ideal_Opcode(); 5048 int vlen = Matcher::vector_length(this, $src); 5049 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5050 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5051 %} 5052 ins_pipe( pipe_slow ); 5053 %} 5054 5055 5056 //--------------------Min Double Reduction -------------------- 5057 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5058 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5059 rFlagsReg cr) %{ 5060 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5061 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5062 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5063 Matcher::vector_length(n->in(2)) == 2); 5064 match(Set dst (MinReductionV src1 src2)); 5065 match(Set dst (MaxReductionV src1 src2)); 5066 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5067 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5068 ins_encode %{ 5069 assert(UseAVX > 0, "sanity"); 5070 5071 int opcode = this->ideal_Opcode(); 5072 int vlen = Matcher::vector_length(this, $src2); 5073 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5074 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5075 %} 5076 ins_pipe( pipe_slow ); 5077 %} 5078 5079 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5080 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5081 rFlagsReg cr) %{ 5082 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5083 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5084 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5085 Matcher::vector_length(n->in(2)) >= 4); 5086 match(Set dst (MinReductionV src1 src2)); 5087 match(Set dst (MaxReductionV src1 src2)); 5088 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5089 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5090 ins_encode %{ 5091 assert(UseAVX > 0, "sanity"); 5092 5093 int opcode = this->ideal_Opcode(); 5094 int vlen = Matcher::vector_length(this, $src2); 5095 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5096 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5097 %} 5098 ins_pipe( pipe_slow ); 5099 %} 5100 5101 5102 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5103 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5104 rFlagsReg cr) %{ 5105 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5106 Matcher::vector_length(n->in(2)) == 2); 5107 match(Set dst (MinReductionV dst src)); 5108 match(Set dst (MaxReductionV dst src)); 5109 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5110 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5111 ins_encode %{ 5112 assert(UseAVX > 0, "sanity"); 5113 5114 int opcode = this->ideal_Opcode(); 5115 int vlen = Matcher::vector_length(this, $src); 5116 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5117 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5118 %} 5119 ins_pipe( pipe_slow ); 5120 %} 5121 5122 instruct minmax_reductionD_av(legRegD dst, legVec src, 5123 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5124 rFlagsReg cr) %{ 5125 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5126 Matcher::vector_length(n->in(2)) >= 4); 5127 match(Set dst (MinReductionV dst src)); 5128 match(Set dst (MaxReductionV dst src)); 5129 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5130 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5131 ins_encode %{ 5132 assert(UseAVX > 0, "sanity"); 5133 5134 int opcode = this->ideal_Opcode(); 5135 int vlen = Matcher::vector_length(this, $src); 5136 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5137 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5138 %} 5139 ins_pipe( pipe_slow ); 5140 %} 5141 5142 // ====================VECTOR ARITHMETIC======================================= 5143 5144 // --------------------------------- ADD -------------------------------------- 5145 5146 // Bytes vector add 5147 instruct vaddB(vec dst, vec src) %{ 5148 predicate(UseAVX == 0); 5149 match(Set dst (AddVB dst src)); 5150 format %{ "paddb $dst,$src\t! add packedB" %} 5151 ins_encode %{ 5152 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5153 %} 5154 ins_pipe( pipe_slow ); 5155 %} 5156 5157 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5158 predicate(UseAVX > 0); 5159 match(Set dst (AddVB src1 src2)); 5160 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5161 ins_encode %{ 5162 int vlen_enc = vector_length_encoding(this); 5163 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5164 %} 5165 ins_pipe( pipe_slow ); 5166 %} 5167 5168 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5169 predicate((UseAVX > 0) && 5170 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5171 match(Set dst (AddVB src (LoadVector mem))); 5172 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5173 ins_encode %{ 5174 int vlen_enc = vector_length_encoding(this); 5175 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5176 %} 5177 ins_pipe( pipe_slow ); 5178 %} 5179 5180 // Shorts/Chars vector add 5181 instruct vaddS(vec dst, vec src) %{ 5182 predicate(UseAVX == 0); 5183 match(Set dst (AddVS dst src)); 5184 format %{ "paddw $dst,$src\t! add packedS" %} 5185 ins_encode %{ 5186 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5187 %} 5188 ins_pipe( pipe_slow ); 5189 %} 5190 5191 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5192 predicate(UseAVX > 0); 5193 match(Set dst (AddVS src1 src2)); 5194 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5195 ins_encode %{ 5196 int vlen_enc = vector_length_encoding(this); 5197 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5198 %} 5199 ins_pipe( pipe_slow ); 5200 %} 5201 5202 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5203 predicate((UseAVX > 0) && 5204 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5205 match(Set dst (AddVS src (LoadVector mem))); 5206 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5207 ins_encode %{ 5208 int vlen_enc = vector_length_encoding(this); 5209 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5210 %} 5211 ins_pipe( pipe_slow ); 5212 %} 5213 5214 // Integers vector add 5215 instruct vaddI(vec dst, vec src) %{ 5216 predicate(UseAVX == 0); 5217 match(Set dst (AddVI dst src)); 5218 format %{ "paddd $dst,$src\t! add packedI" %} 5219 ins_encode %{ 5220 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5221 %} 5222 ins_pipe( pipe_slow ); 5223 %} 5224 5225 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5226 predicate(UseAVX > 0); 5227 match(Set dst (AddVI src1 src2)); 5228 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5229 ins_encode %{ 5230 int vlen_enc = vector_length_encoding(this); 5231 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5232 %} 5233 ins_pipe( pipe_slow ); 5234 %} 5235 5236 5237 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5238 predicate((UseAVX > 0) && 5239 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5240 match(Set dst (AddVI src (LoadVector mem))); 5241 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5242 ins_encode %{ 5243 int vlen_enc = vector_length_encoding(this); 5244 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5245 %} 5246 ins_pipe( pipe_slow ); 5247 %} 5248 5249 // Longs vector add 5250 instruct vaddL(vec dst, vec src) %{ 5251 predicate(UseAVX == 0); 5252 match(Set dst (AddVL dst src)); 5253 format %{ "paddq $dst,$src\t! add packedL" %} 5254 ins_encode %{ 5255 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5256 %} 5257 ins_pipe( pipe_slow ); 5258 %} 5259 5260 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5261 predicate(UseAVX > 0); 5262 match(Set dst (AddVL src1 src2)); 5263 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5264 ins_encode %{ 5265 int vlen_enc = vector_length_encoding(this); 5266 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5267 %} 5268 ins_pipe( pipe_slow ); 5269 %} 5270 5271 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5272 predicate((UseAVX > 0) && 5273 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5274 match(Set dst (AddVL src (LoadVector mem))); 5275 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5276 ins_encode %{ 5277 int vlen_enc = vector_length_encoding(this); 5278 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5279 %} 5280 ins_pipe( pipe_slow ); 5281 %} 5282 5283 // Floats vector add 5284 instruct vaddF(vec dst, vec src) %{ 5285 predicate(UseAVX == 0); 5286 match(Set dst (AddVF dst src)); 5287 format %{ "addps $dst,$src\t! add packedF" %} 5288 ins_encode %{ 5289 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5290 %} 5291 ins_pipe( pipe_slow ); 5292 %} 5293 5294 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5295 predicate(UseAVX > 0); 5296 match(Set dst (AddVF src1 src2)); 5297 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5298 ins_encode %{ 5299 int vlen_enc = vector_length_encoding(this); 5300 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5301 %} 5302 ins_pipe( pipe_slow ); 5303 %} 5304 5305 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5306 predicate((UseAVX > 0) && 5307 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5308 match(Set dst (AddVF src (LoadVector mem))); 5309 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5310 ins_encode %{ 5311 int vlen_enc = vector_length_encoding(this); 5312 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5313 %} 5314 ins_pipe( pipe_slow ); 5315 %} 5316 5317 // Doubles vector add 5318 instruct vaddD(vec dst, vec src) %{ 5319 predicate(UseAVX == 0); 5320 match(Set dst (AddVD dst src)); 5321 format %{ "addpd $dst,$src\t! add packedD" %} 5322 ins_encode %{ 5323 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5324 %} 5325 ins_pipe( pipe_slow ); 5326 %} 5327 5328 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5329 predicate(UseAVX > 0); 5330 match(Set dst (AddVD src1 src2)); 5331 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5332 ins_encode %{ 5333 int vlen_enc = vector_length_encoding(this); 5334 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5335 %} 5336 ins_pipe( pipe_slow ); 5337 %} 5338 5339 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5340 predicate((UseAVX > 0) && 5341 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5342 match(Set dst (AddVD src (LoadVector mem))); 5343 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5344 ins_encode %{ 5345 int vlen_enc = vector_length_encoding(this); 5346 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5347 %} 5348 ins_pipe( pipe_slow ); 5349 %} 5350 5351 // --------------------------------- SUB -------------------------------------- 5352 5353 // Bytes vector sub 5354 instruct vsubB(vec dst, vec src) %{ 5355 predicate(UseAVX == 0); 5356 match(Set dst (SubVB dst src)); 5357 format %{ "psubb $dst,$src\t! sub packedB" %} 5358 ins_encode %{ 5359 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5360 %} 5361 ins_pipe( pipe_slow ); 5362 %} 5363 5364 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5365 predicate(UseAVX > 0); 5366 match(Set dst (SubVB src1 src2)); 5367 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5368 ins_encode %{ 5369 int vlen_enc = vector_length_encoding(this); 5370 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5371 %} 5372 ins_pipe( pipe_slow ); 5373 %} 5374 5375 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5376 predicate((UseAVX > 0) && 5377 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5378 match(Set dst (SubVB src (LoadVector mem))); 5379 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5380 ins_encode %{ 5381 int vlen_enc = vector_length_encoding(this); 5382 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5383 %} 5384 ins_pipe( pipe_slow ); 5385 %} 5386 5387 // Shorts/Chars vector sub 5388 instruct vsubS(vec dst, vec src) %{ 5389 predicate(UseAVX == 0); 5390 match(Set dst (SubVS dst src)); 5391 format %{ "psubw $dst,$src\t! sub packedS" %} 5392 ins_encode %{ 5393 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5394 %} 5395 ins_pipe( pipe_slow ); 5396 %} 5397 5398 5399 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5400 predicate(UseAVX > 0); 5401 match(Set dst (SubVS src1 src2)); 5402 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5403 ins_encode %{ 5404 int vlen_enc = vector_length_encoding(this); 5405 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5406 %} 5407 ins_pipe( pipe_slow ); 5408 %} 5409 5410 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5411 predicate((UseAVX > 0) && 5412 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5413 match(Set dst (SubVS src (LoadVector mem))); 5414 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5415 ins_encode %{ 5416 int vlen_enc = vector_length_encoding(this); 5417 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5418 %} 5419 ins_pipe( pipe_slow ); 5420 %} 5421 5422 // Integers vector sub 5423 instruct vsubI(vec dst, vec src) %{ 5424 predicate(UseAVX == 0); 5425 match(Set dst (SubVI dst src)); 5426 format %{ "psubd $dst,$src\t! sub packedI" %} 5427 ins_encode %{ 5428 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5429 %} 5430 ins_pipe( pipe_slow ); 5431 %} 5432 5433 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5434 predicate(UseAVX > 0); 5435 match(Set dst (SubVI src1 src2)); 5436 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5437 ins_encode %{ 5438 int vlen_enc = vector_length_encoding(this); 5439 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5440 %} 5441 ins_pipe( pipe_slow ); 5442 %} 5443 5444 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5445 predicate((UseAVX > 0) && 5446 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5447 match(Set dst (SubVI src (LoadVector mem))); 5448 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5449 ins_encode %{ 5450 int vlen_enc = vector_length_encoding(this); 5451 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5452 %} 5453 ins_pipe( pipe_slow ); 5454 %} 5455 5456 // Longs vector sub 5457 instruct vsubL(vec dst, vec src) %{ 5458 predicate(UseAVX == 0); 5459 match(Set dst (SubVL dst src)); 5460 format %{ "psubq $dst,$src\t! sub packedL" %} 5461 ins_encode %{ 5462 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5463 %} 5464 ins_pipe( pipe_slow ); 5465 %} 5466 5467 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5468 predicate(UseAVX > 0); 5469 match(Set dst (SubVL src1 src2)); 5470 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5471 ins_encode %{ 5472 int vlen_enc = vector_length_encoding(this); 5473 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5474 %} 5475 ins_pipe( pipe_slow ); 5476 %} 5477 5478 5479 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5480 predicate((UseAVX > 0) && 5481 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5482 match(Set dst (SubVL src (LoadVector mem))); 5483 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5484 ins_encode %{ 5485 int vlen_enc = vector_length_encoding(this); 5486 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5487 %} 5488 ins_pipe( pipe_slow ); 5489 %} 5490 5491 // Floats vector sub 5492 instruct vsubF(vec dst, vec src) %{ 5493 predicate(UseAVX == 0); 5494 match(Set dst (SubVF dst src)); 5495 format %{ "subps $dst,$src\t! sub packedF" %} 5496 ins_encode %{ 5497 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5498 %} 5499 ins_pipe( pipe_slow ); 5500 %} 5501 5502 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5503 predicate(UseAVX > 0); 5504 match(Set dst (SubVF src1 src2)); 5505 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5506 ins_encode %{ 5507 int vlen_enc = vector_length_encoding(this); 5508 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5509 %} 5510 ins_pipe( pipe_slow ); 5511 %} 5512 5513 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5514 predicate((UseAVX > 0) && 5515 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5516 match(Set dst (SubVF src (LoadVector mem))); 5517 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5518 ins_encode %{ 5519 int vlen_enc = vector_length_encoding(this); 5520 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5521 %} 5522 ins_pipe( pipe_slow ); 5523 %} 5524 5525 // Doubles vector sub 5526 instruct vsubD(vec dst, vec src) %{ 5527 predicate(UseAVX == 0); 5528 match(Set dst (SubVD dst src)); 5529 format %{ "subpd $dst,$src\t! sub packedD" %} 5530 ins_encode %{ 5531 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5532 %} 5533 ins_pipe( pipe_slow ); 5534 %} 5535 5536 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5537 predicate(UseAVX > 0); 5538 match(Set dst (SubVD src1 src2)); 5539 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5540 ins_encode %{ 5541 int vlen_enc = vector_length_encoding(this); 5542 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5543 %} 5544 ins_pipe( pipe_slow ); 5545 %} 5546 5547 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 5548 predicate((UseAVX > 0) && 5549 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5550 match(Set dst (SubVD src (LoadVector mem))); 5551 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 5552 ins_encode %{ 5553 int vlen_enc = vector_length_encoding(this); 5554 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5555 %} 5556 ins_pipe( pipe_slow ); 5557 %} 5558 5559 // --------------------------------- MUL -------------------------------------- 5560 5561 // Byte vector mul 5562 instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 5563 predicate(Matcher::vector_length(n) == 4 || 5564 Matcher::vector_length(n) == 8); 5565 match(Set dst (MulVB src1 src2)); 5566 effect(TEMP dst, TEMP tmp, TEMP scratch); 5567 format %{"vector_mulB $dst,$src1,$src2" %} 5568 ins_encode %{ 5569 assert(UseSSE > 3, "required"); 5570 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 5571 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 5572 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 5573 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5574 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 5575 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5576 %} 5577 ins_pipe( pipe_slow ); 5578 %} 5579 5580 instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5581 predicate(Matcher::vector_length(n) == 16 && UseAVX <= 1); 5582 match(Set dst (MulVB src1 src2)); 5583 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5584 format %{"vector_mulB $dst,$src1,$src2" %} 5585 ins_encode %{ 5586 assert(UseSSE > 3, "required"); 5587 __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister); 5588 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 5589 __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister); 5590 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE); 5591 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE); 5592 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 5593 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 5594 __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); 5595 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5596 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 5597 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 5598 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 5599 %} 5600 ins_pipe( pipe_slow ); 5601 %} 5602 5603 instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 5604 predicate(Matcher::vector_length(n) == 16 && UseAVX > 1); 5605 match(Set dst (MulVB src1 src2)); 5606 effect(TEMP dst, TEMP tmp, TEMP scratch); 5607 format %{"vector_mulB $dst,$src1,$src2" %} 5608 ins_encode %{ 5609 int vlen_enc = Assembler::AVX_256bit; 5610 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5611 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5612 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5613 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5614 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5615 __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); 5616 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); 5617 %} 5618 ins_pipe( pipe_slow ); 5619 %} 5620 5621 instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5622 predicate(Matcher::vector_length(n) == 32); 5623 match(Set dst (MulVB src1 src2)); 5624 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5625 format %{"vector_mulB $dst,$src1,$src2" %} 5626 ins_encode %{ 5627 assert(UseAVX > 1, "required"); 5628 int vlen_enc = Assembler::AVX_256bit; 5629 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 5630 __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister); 5631 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5632 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5633 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5634 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5635 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5636 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5637 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5638 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5639 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5640 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 5641 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5642 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 5643 %} 5644 ins_pipe( pipe_slow ); 5645 %} 5646 5647 instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5648 predicate(Matcher::vector_length(n) == 64); 5649 match(Set dst (MulVB src1 src2)); 5650 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5651 format %{"vector_mulB $dst,$src1,$src2\n\t" %} 5652 ins_encode %{ 5653 assert(UseAVX > 2, "required"); 5654 int vlen_enc = Assembler::AVX_512bit; 5655 __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); 5656 __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister); 5657 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5658 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5659 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5660 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5661 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5662 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5663 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5664 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5665 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5666 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5667 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 5668 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register); 5669 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5670 %} 5671 ins_pipe( pipe_slow ); 5672 %} 5673 5674 // Shorts/Chars vector mul 5675 instruct vmulS(vec dst, vec src) %{ 5676 predicate(UseAVX == 0); 5677 match(Set dst (MulVS dst src)); 5678 format %{ "pmullw $dst,$src\t! mul packedS" %} 5679 ins_encode %{ 5680 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 5681 %} 5682 ins_pipe( pipe_slow ); 5683 %} 5684 5685 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 5686 predicate(UseAVX > 0); 5687 match(Set dst (MulVS src1 src2)); 5688 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 5689 ins_encode %{ 5690 int vlen_enc = vector_length_encoding(this); 5691 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5692 %} 5693 ins_pipe( pipe_slow ); 5694 %} 5695 5696 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 5697 predicate((UseAVX > 0) && 5698 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5699 match(Set dst (MulVS src (LoadVector mem))); 5700 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 5701 ins_encode %{ 5702 int vlen_enc = vector_length_encoding(this); 5703 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5704 %} 5705 ins_pipe( pipe_slow ); 5706 %} 5707 5708 // Integers vector mul 5709 instruct vmulI(vec dst, vec src) %{ 5710 predicate(UseAVX == 0); 5711 match(Set dst (MulVI dst src)); 5712 format %{ "pmulld $dst,$src\t! mul packedI" %} 5713 ins_encode %{ 5714 assert(UseSSE > 3, "required"); 5715 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 5716 %} 5717 ins_pipe( pipe_slow ); 5718 %} 5719 5720 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 5721 predicate(UseAVX > 0); 5722 match(Set dst (MulVI src1 src2)); 5723 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 5724 ins_encode %{ 5725 int vlen_enc = vector_length_encoding(this); 5726 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5727 %} 5728 ins_pipe( pipe_slow ); 5729 %} 5730 5731 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 5732 predicate((UseAVX > 0) && 5733 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5734 match(Set dst (MulVI src (LoadVector mem))); 5735 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 5736 ins_encode %{ 5737 int vlen_enc = vector_length_encoding(this); 5738 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5739 %} 5740 ins_pipe( pipe_slow ); 5741 %} 5742 5743 // Longs vector mul 5744 instruct vmulL_reg(vec dst, vec src1, vec src2) %{ 5745 predicate(VM_Version::supports_avx512dq()); 5746 match(Set dst (MulVL src1 src2)); 5747 format %{ "vpmullq $dst,$src1,$src2\t! mul packedL" %} 5748 ins_encode %{ 5749 assert(UseAVX > 2, "required"); 5750 int vlen_enc = vector_length_encoding(this); 5751 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5752 %} 5753 ins_pipe( pipe_slow ); 5754 %} 5755 5756 instruct vmulL_mem(vec dst, vec src, memory mem) %{ 5757 predicate(VM_Version::supports_avx512dq() && 5758 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5759 match(Set dst (MulVL src (LoadVector mem))); 5760 format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %} 5761 ins_encode %{ 5762 assert(UseAVX > 2, "required"); 5763 int vlen_enc = vector_length_encoding(this); 5764 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5765 %} 5766 ins_pipe( pipe_slow ); 5767 %} 5768 5769 instruct mul2L_reg(vec dst, vec src2, legVec tmp) %{ 5770 predicate(Matcher::vector_length(n) == 2 && !VM_Version::supports_avx512dq()); 5771 match(Set dst (MulVL dst src2)); 5772 effect(TEMP dst, TEMP tmp); 5773 format %{ "pshufd $tmp,$src2, 177\n\t" 5774 "pmulld $tmp,$dst\n\t" 5775 "phaddd $tmp,$tmp\n\t" 5776 "pmovzxdq $tmp,$tmp\n\t" 5777 "psllq $tmp, 32\n\t" 5778 "pmuludq $dst,$src2\n\t" 5779 "paddq $dst,$tmp\n\t! mul packed2L" %} 5780 5781 ins_encode %{ 5782 assert(VM_Version::supports_sse4_1(), "required"); 5783 int vlen_enc = Assembler::AVX_128bit; 5784 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177); 5785 __ pmulld($tmp$$XMMRegister, $dst$$XMMRegister); 5786 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5787 __ pmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister); 5788 __ psllq($tmp$$XMMRegister, 32); 5789 __ pmuludq($dst$$XMMRegister, $src2$$XMMRegister); 5790 __ paddq($dst$$XMMRegister, $tmp$$XMMRegister); 5791 %} 5792 ins_pipe( pipe_slow ); 5793 %} 5794 5795 instruct vmul4L_reg_avx(vec dst, vec src1, vec src2, legVec tmp, legVec tmp1) %{ 5796 predicate(Matcher::vector_length(n) == 4 && !VM_Version::supports_avx512dq()); 5797 match(Set dst (MulVL src1 src2)); 5798 effect(TEMP tmp1, TEMP tmp); 5799 format %{ "vpshufd $tmp,$src2\n\t" 5800 "vpmulld $tmp,$src1,$tmp\n\t" 5801 "vphaddd $tmp,$tmp,$tmp\n\t" 5802 "vpmovzxdq $tmp,$tmp\n\t" 5803 "vpsllq $tmp,$tmp\n\t" 5804 "vpmuludq $tmp1,$src1,$src2\n\t" 5805 "vpaddq $dst,$tmp,$tmp1\t! mul packed4L" %} 5806 ins_encode %{ 5807 int vlen_enc = Assembler::AVX_256bit; 5808 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vlen_enc); 5809 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5810 __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister); 5811 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5812 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5813 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vlen_enc); 5814 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5815 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5816 %} 5817 ins_pipe( pipe_slow ); 5818 %} 5819 5820 // Floats vector mul 5821 instruct vmulF(vec dst, vec src) %{ 5822 predicate(UseAVX == 0); 5823 match(Set dst (MulVF dst src)); 5824 format %{ "mulps $dst,$src\t! mul packedF" %} 5825 ins_encode %{ 5826 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 5827 %} 5828 ins_pipe( pipe_slow ); 5829 %} 5830 5831 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 5832 predicate(UseAVX > 0); 5833 match(Set dst (MulVF src1 src2)); 5834 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 5835 ins_encode %{ 5836 int vlen_enc = vector_length_encoding(this); 5837 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5838 %} 5839 ins_pipe( pipe_slow ); 5840 %} 5841 5842 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 5843 predicate((UseAVX > 0) && 5844 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5845 match(Set dst (MulVF src (LoadVector mem))); 5846 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 5847 ins_encode %{ 5848 int vlen_enc = vector_length_encoding(this); 5849 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5850 %} 5851 ins_pipe( pipe_slow ); 5852 %} 5853 5854 // Doubles vector mul 5855 instruct vmulD(vec dst, vec src) %{ 5856 predicate(UseAVX == 0); 5857 match(Set dst (MulVD dst src)); 5858 format %{ "mulpd $dst,$src\t! mul packedD" %} 5859 ins_encode %{ 5860 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 5861 %} 5862 ins_pipe( pipe_slow ); 5863 %} 5864 5865 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 5866 predicate(UseAVX > 0); 5867 match(Set dst (MulVD src1 src2)); 5868 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 5869 ins_encode %{ 5870 int vlen_enc = vector_length_encoding(this); 5871 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5872 %} 5873 ins_pipe( pipe_slow ); 5874 %} 5875 5876 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 5877 predicate((UseAVX > 0) && 5878 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5879 match(Set dst (MulVD src (LoadVector mem))); 5880 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 5881 ins_encode %{ 5882 int vlen_enc = vector_length_encoding(this); 5883 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5884 %} 5885 ins_pipe( pipe_slow ); 5886 %} 5887 5888 instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 5889 predicate(Matcher::vector_length(n) == 8); 5890 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 5891 effect(TEMP dst, USE src1, USE src2); 5892 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 5893 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 5894 %} 5895 ins_encode %{ 5896 assert(UseAVX > 0, "required"); 5897 5898 int vlen_enc = Assembler::AVX_256bit; 5899 int cond = (Assembler::Condition)($copnd$$cmpcode); 5900 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); 5901 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5902 %} 5903 ins_pipe( pipe_slow ); 5904 %} 5905 5906 instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 5907 predicate(Matcher::vector_length(n) == 4); 5908 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 5909 effect(TEMP dst, USE src1, USE src2); 5910 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 5911 "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 5912 %} 5913 ins_encode %{ 5914 assert(UseAVX > 0, "required"); 5915 5916 int vlen_enc = Assembler::AVX_256bit; 5917 int cond = (Assembler::Condition)($copnd$$cmpcode); 5918 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); 5919 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5920 %} 5921 ins_pipe( pipe_slow ); 5922 %} 5923 5924 // --------------------------------- DIV -------------------------------------- 5925 5926 // Floats vector div 5927 instruct vdivF(vec dst, vec src) %{ 5928 predicate(UseAVX == 0); 5929 match(Set dst (DivVF dst src)); 5930 format %{ "divps $dst,$src\t! div packedF" %} 5931 ins_encode %{ 5932 __ divps($dst$$XMMRegister, $src$$XMMRegister); 5933 %} 5934 ins_pipe( pipe_slow ); 5935 %} 5936 5937 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 5938 predicate(UseAVX > 0); 5939 match(Set dst (DivVF src1 src2)); 5940 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 5941 ins_encode %{ 5942 int vlen_enc = vector_length_encoding(this); 5943 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5944 %} 5945 ins_pipe( pipe_slow ); 5946 %} 5947 5948 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 5949 predicate((UseAVX > 0) && 5950 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5951 match(Set dst (DivVF src (LoadVector mem))); 5952 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 5953 ins_encode %{ 5954 int vlen_enc = vector_length_encoding(this); 5955 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5956 %} 5957 ins_pipe( pipe_slow ); 5958 %} 5959 5960 // Doubles vector div 5961 instruct vdivD(vec dst, vec src) %{ 5962 predicate(UseAVX == 0); 5963 match(Set dst (DivVD dst src)); 5964 format %{ "divpd $dst,$src\t! div packedD" %} 5965 ins_encode %{ 5966 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 5967 %} 5968 ins_pipe( pipe_slow ); 5969 %} 5970 5971 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 5972 predicate(UseAVX > 0); 5973 match(Set dst (DivVD src1 src2)); 5974 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 5975 ins_encode %{ 5976 int vlen_enc = vector_length_encoding(this); 5977 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5978 %} 5979 ins_pipe( pipe_slow ); 5980 %} 5981 5982 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 5983 predicate((UseAVX > 0) && 5984 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5985 match(Set dst (DivVD src (LoadVector mem))); 5986 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 5987 ins_encode %{ 5988 int vlen_enc = vector_length_encoding(this); 5989 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5990 %} 5991 ins_pipe( pipe_slow ); 5992 %} 5993 5994 // ------------------------------ MinMax --------------------------------------- 5995 5996 // Byte, Short, Int vector Min/Max 5997 instruct minmax_reg_sse(vec dst, vec src) %{ 5998 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 5999 UseAVX == 0); 6000 match(Set dst (MinV dst src)); 6001 match(Set dst (MaxV dst src)); 6002 format %{ "vector_minmax $dst,$src\t! " %} 6003 ins_encode %{ 6004 assert(UseSSE >= 4, "required"); 6005 6006 int opcode = this->ideal_Opcode(); 6007 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6008 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6009 %} 6010 ins_pipe( pipe_slow ); 6011 %} 6012 6013 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6014 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6015 UseAVX > 0); 6016 match(Set dst (MinV src1 src2)); 6017 match(Set dst (MaxV src1 src2)); 6018 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6019 ins_encode %{ 6020 int opcode = this->ideal_Opcode(); 6021 int vlen_enc = vector_length_encoding(this); 6022 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6023 6024 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6025 %} 6026 ins_pipe( pipe_slow ); 6027 %} 6028 6029 // Long vector Min/Max 6030 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6031 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6032 UseAVX == 0); 6033 match(Set dst (MinV dst src)); 6034 match(Set dst (MaxV src dst)); 6035 effect(TEMP dst, TEMP tmp); 6036 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6037 ins_encode %{ 6038 assert(UseSSE >= 4, "required"); 6039 6040 int opcode = this->ideal_Opcode(); 6041 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6042 assert(elem_bt == T_LONG, "sanity"); 6043 6044 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6045 %} 6046 ins_pipe( pipe_slow ); 6047 %} 6048 6049 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6050 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6051 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6052 match(Set dst (MinV src1 src2)); 6053 match(Set dst (MaxV src1 src2)); 6054 effect(TEMP dst); 6055 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6056 ins_encode %{ 6057 int vlen_enc = vector_length_encoding(this); 6058 int opcode = this->ideal_Opcode(); 6059 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6060 assert(elem_bt == T_LONG, "sanity"); 6061 6062 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6063 %} 6064 ins_pipe( pipe_slow ); 6065 %} 6066 6067 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6068 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6069 Matcher::vector_element_basic_type(n) == T_LONG); 6070 match(Set dst (MinV src1 src2)); 6071 match(Set dst (MaxV src1 src2)); 6072 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6073 ins_encode %{ 6074 assert(UseAVX > 2, "required"); 6075 6076 int vlen_enc = vector_length_encoding(this); 6077 int opcode = this->ideal_Opcode(); 6078 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6079 assert(elem_bt == T_LONG, "sanity"); 6080 6081 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6082 %} 6083 ins_pipe( pipe_slow ); 6084 %} 6085 6086 // Float/Double vector Min/Max 6087 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6088 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6089 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6090 UseAVX > 0); 6091 match(Set dst (MinV a b)); 6092 match(Set dst (MaxV a b)); 6093 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6094 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6095 ins_encode %{ 6096 assert(UseAVX > 0, "required"); 6097 6098 int opcode = this->ideal_Opcode(); 6099 int vlen_enc = vector_length_encoding(this); 6100 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6101 6102 __ vminmax_fp(opcode, elem_bt, 6103 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6104 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6105 %} 6106 ins_pipe( pipe_slow ); 6107 %} 6108 6109 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6110 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6111 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6112 match(Set dst (MinV a b)); 6113 match(Set dst (MaxV a b)); 6114 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6115 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6116 ins_encode %{ 6117 assert(UseAVX > 2, "required"); 6118 6119 int opcode = this->ideal_Opcode(); 6120 int vlen_enc = vector_length_encoding(this); 6121 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6122 6123 __ evminmax_fp(opcode, elem_bt, 6124 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6125 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6126 %} 6127 ins_pipe( pipe_slow ); 6128 %} 6129 6130 // --------------------------------- Signum/CopySign --------------------------- 6131 6132 instruct signumF_reg(regF dst, regF zero, regF one, rRegP scratch, rFlagsReg cr) %{ 6133 match(Set dst (SignumF dst (Binary zero one))); 6134 effect(TEMP scratch, KILL cr); 6135 format %{ "signumF $dst, $dst\t! using $scratch as TEMP" %} 6136 ins_encode %{ 6137 int opcode = this->ideal_Opcode(); 6138 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register); 6139 %} 6140 ins_pipe( pipe_slow ); 6141 %} 6142 6143 instruct signumD_reg(regD dst, regD zero, regD one, rRegP scratch, rFlagsReg cr) %{ 6144 match(Set dst (SignumD dst (Binary zero one))); 6145 effect(TEMP scratch, KILL cr); 6146 format %{ "signumD $dst, $dst\t! using $scratch as TEMP" %} 6147 ins_encode %{ 6148 int opcode = this->ideal_Opcode(); 6149 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register); 6150 %} 6151 ins_pipe( pipe_slow ); 6152 %} 6153 6154 // --------------------------------------- 6155 // For copySign use 0xE4 as writemask for vpternlog 6156 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6157 // C (xmm2) is set to 0x7FFFFFFF 6158 // Wherever xmm2 is 0, we want to pick from B (sign) 6159 // Wherever xmm2 is 1, we want to pick from A (src) 6160 // 6161 // A B C Result 6162 // 0 0 0 0 6163 // 0 0 1 0 6164 // 0 1 0 1 6165 // 0 1 1 0 6166 // 1 0 0 0 6167 // 1 0 1 1 6168 // 1 1 0 1 6169 // 1 1 1 1 6170 // 6171 // Result going from high bit to low bit is 0x11100100 = 0xe4 6172 // --------------------------------------- 6173 6174 #ifdef _LP64 6175 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6176 match(Set dst (CopySignF dst src)); 6177 effect(TEMP tmp1, TEMP tmp2); 6178 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6179 ins_encode %{ 6180 __ movl($tmp2$$Register, 0x7FFFFFFF); 6181 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6182 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6183 %} 6184 ins_pipe( pipe_slow ); 6185 %} 6186 6187 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6188 match(Set dst (CopySignD dst (Binary src zero))); 6189 ins_cost(100); 6190 effect(TEMP tmp1, TEMP tmp2); 6191 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6192 ins_encode %{ 6193 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6194 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6195 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6196 %} 6197 ins_pipe( pipe_slow ); 6198 %} 6199 #endif // _LP64 6200 6201 // --------------------------------- Sqrt -------------------------------------- 6202 6203 instruct vsqrtF_reg(vec dst, vec src) %{ 6204 match(Set dst (SqrtVF src)); 6205 ins_cost(400); 6206 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6207 ins_encode %{ 6208 assert(UseAVX > 0, "required"); 6209 int vlen_enc = vector_length_encoding(this); 6210 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6211 %} 6212 ins_pipe( pipe_slow ); 6213 %} 6214 6215 instruct vsqrtF_mem(vec dst, memory mem) %{ 6216 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6217 match(Set dst (SqrtVF (LoadVector mem))); 6218 ins_cost(400); 6219 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6220 ins_encode %{ 6221 assert(UseAVX > 0, "required"); 6222 int vlen_enc = vector_length_encoding(this); 6223 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6224 %} 6225 ins_pipe( pipe_slow ); 6226 %} 6227 6228 // Floating point vector sqrt 6229 instruct vsqrtD_reg(vec dst, vec src) %{ 6230 match(Set dst (SqrtVD src)); 6231 ins_cost(400); 6232 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6233 ins_encode %{ 6234 assert(UseAVX > 0, "required"); 6235 int vlen_enc = vector_length_encoding(this); 6236 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6237 %} 6238 ins_pipe( pipe_slow ); 6239 %} 6240 6241 instruct vsqrtD_mem(vec dst, memory mem) %{ 6242 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6243 match(Set dst (SqrtVD (LoadVector mem))); 6244 ins_cost(400); 6245 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6246 ins_encode %{ 6247 assert(UseAVX > 0, "required"); 6248 int vlen_enc = vector_length_encoding(this); 6249 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6250 %} 6251 ins_pipe( pipe_slow ); 6252 %} 6253 6254 // ------------------------------ Shift --------------------------------------- 6255 6256 // Left and right shift count vectors are the same on x86 6257 // (only lowest bits of xmm reg are used for count). 6258 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6259 match(Set dst (LShiftCntV cnt)); 6260 match(Set dst (RShiftCntV cnt)); 6261 format %{ "movdl $dst,$cnt\t! load shift count" %} 6262 ins_encode %{ 6263 __ movdl($dst$$XMMRegister, $cnt$$Register); 6264 %} 6265 ins_pipe( pipe_slow ); 6266 %} 6267 6268 // Byte vector shift 6269 instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6270 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6271 match(Set dst ( LShiftVB src shift)); 6272 match(Set dst ( RShiftVB src shift)); 6273 match(Set dst (URShiftVB src shift)); 6274 effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch); 6275 format %{"vector_byte_shift $dst,$src,$shift" %} 6276 ins_encode %{ 6277 assert(UseSSE > 3, "required"); 6278 int opcode = this->ideal_Opcode(); 6279 bool sign = (opcode != Op_URShiftVB); 6280 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6281 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6282 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6283 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6284 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6285 %} 6286 ins_pipe( pipe_slow ); 6287 %} 6288 6289 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 6290 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6291 UseAVX <= 1); 6292 match(Set dst ( LShiftVB src shift)); 6293 match(Set dst ( RShiftVB src shift)); 6294 match(Set dst (URShiftVB src shift)); 6295 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch); 6296 format %{"vector_byte_shift $dst,$src,$shift" %} 6297 ins_encode %{ 6298 assert(UseSSE > 3, "required"); 6299 int opcode = this->ideal_Opcode(); 6300 bool sign = (opcode != Op_URShiftVB); 6301 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6302 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6303 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6304 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6305 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6306 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6307 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6308 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6309 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6310 %} 6311 ins_pipe( pipe_slow ); 6312 %} 6313 6314 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6315 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6316 UseAVX > 1); 6317 match(Set dst ( LShiftVB src shift)); 6318 match(Set dst ( RShiftVB src shift)); 6319 match(Set dst (URShiftVB src shift)); 6320 effect(TEMP dst, TEMP tmp, TEMP scratch); 6321 format %{"vector_byte_shift $dst,$src,$shift" %} 6322 ins_encode %{ 6323 int opcode = this->ideal_Opcode(); 6324 bool sign = (opcode != Op_URShiftVB); 6325 int vlen_enc = Assembler::AVX_256bit; 6326 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6327 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6328 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 6329 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6330 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6331 %} 6332 ins_pipe( pipe_slow ); 6333 %} 6334 6335 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6336 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6337 match(Set dst ( LShiftVB src shift)); 6338 match(Set dst ( RShiftVB src shift)); 6339 match(Set dst (URShiftVB src shift)); 6340 effect(TEMP dst, TEMP tmp, TEMP scratch); 6341 format %{"vector_byte_shift $dst,$src,$shift" %} 6342 ins_encode %{ 6343 assert(UseAVX > 1, "required"); 6344 int opcode = this->ideal_Opcode(); 6345 bool sign = (opcode != Op_URShiftVB); 6346 int vlen_enc = Assembler::AVX_256bit; 6347 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6348 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6349 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6350 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6351 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6352 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 6353 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 6354 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6355 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6356 %} 6357 ins_pipe( pipe_slow ); 6358 %} 6359 6360 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 6361 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6362 match(Set dst ( LShiftVB src shift)); 6363 match(Set dst (RShiftVB src shift)); 6364 match(Set dst (URShiftVB src shift)); 6365 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 6366 format %{"vector_byte_shift $dst,$src,$shift" %} 6367 ins_encode %{ 6368 assert(UseAVX > 2, "required"); 6369 int opcode = this->ideal_Opcode(); 6370 bool sign = (opcode != Op_URShiftVB); 6371 int vlen_enc = Assembler::AVX_512bit; 6372 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6373 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6374 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6375 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6376 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6377 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6378 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6379 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6380 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6381 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6382 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register); 6383 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6384 %} 6385 ins_pipe( pipe_slow ); 6386 %} 6387 6388 // Shorts vector logical right shift produces incorrect Java result 6389 // for negative data because java code convert short value into int with 6390 // sign extension before a shift. But char vectors are fine since chars are 6391 // unsigned values. 6392 // Shorts/Chars vector left shift 6393 instruct vshiftS(vec dst, vec src, vec shift) %{ 6394 predicate(!n->as_ShiftV()->is_var_shift()); 6395 match(Set dst ( LShiftVS src shift)); 6396 match(Set dst ( RShiftVS src shift)); 6397 match(Set dst (URShiftVS src shift)); 6398 effect(TEMP dst, USE src, USE shift); 6399 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6400 ins_encode %{ 6401 int opcode = this->ideal_Opcode(); 6402 if (UseAVX > 0) { 6403 int vlen_enc = vector_length_encoding(this); 6404 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6405 } else { 6406 int vlen = Matcher::vector_length(this); 6407 if (vlen == 2) { 6408 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6409 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6410 } else if (vlen == 4) { 6411 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6412 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6413 } else { 6414 assert (vlen == 8, "sanity"); 6415 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6416 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6417 } 6418 } 6419 %} 6420 ins_pipe( pipe_slow ); 6421 %} 6422 6423 // Integers vector left shift 6424 instruct vshiftI(vec dst, vec src, vec shift) %{ 6425 predicate(!n->as_ShiftV()->is_var_shift()); 6426 match(Set dst ( LShiftVI src shift)); 6427 match(Set dst ( RShiftVI src shift)); 6428 match(Set dst (URShiftVI src shift)); 6429 effect(TEMP dst, USE src, USE shift); 6430 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6431 ins_encode %{ 6432 int opcode = this->ideal_Opcode(); 6433 if (UseAVX > 0) { 6434 int vlen_enc = vector_length_encoding(this); 6435 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6436 } else { 6437 int vlen = Matcher::vector_length(this); 6438 if (vlen == 2) { 6439 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6440 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6441 } else { 6442 assert(vlen == 4, "sanity"); 6443 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6444 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6445 } 6446 } 6447 %} 6448 ins_pipe( pipe_slow ); 6449 %} 6450 6451 // Integers vector left constant shift 6452 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6453 match(Set dst (LShiftVI src (LShiftCntV shift))); 6454 match(Set dst (RShiftVI src (RShiftCntV shift))); 6455 match(Set dst (URShiftVI src (RShiftCntV shift))); 6456 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6457 ins_encode %{ 6458 int opcode = this->ideal_Opcode(); 6459 if (UseAVX > 0) { 6460 int vector_len = vector_length_encoding(this); 6461 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6462 } else { 6463 int vlen = Matcher::vector_length(this); 6464 if (vlen == 2) { 6465 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6466 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6467 } else { 6468 assert(vlen == 4, "sanity"); 6469 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6470 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6471 } 6472 } 6473 %} 6474 ins_pipe( pipe_slow ); 6475 %} 6476 6477 // Longs vector shift 6478 instruct vshiftL(vec dst, vec src, vec shift) %{ 6479 predicate(!n->as_ShiftV()->is_var_shift()); 6480 match(Set dst ( LShiftVL src shift)); 6481 match(Set dst (URShiftVL src shift)); 6482 effect(TEMP dst, USE src, USE shift); 6483 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6484 ins_encode %{ 6485 int opcode = this->ideal_Opcode(); 6486 if (UseAVX > 0) { 6487 int vlen_enc = vector_length_encoding(this); 6488 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6489 } else { 6490 assert(Matcher::vector_length(this) == 2, ""); 6491 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6492 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6493 } 6494 %} 6495 ins_pipe( pipe_slow ); 6496 %} 6497 6498 // Longs vector constant shift 6499 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 6500 match(Set dst (LShiftVL src (LShiftCntV shift))); 6501 match(Set dst (URShiftVL src (RShiftCntV shift))); 6502 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 6503 ins_encode %{ 6504 int opcode = this->ideal_Opcode(); 6505 if (UseAVX > 0) { 6506 int vector_len = vector_length_encoding(this); 6507 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6508 } else { 6509 assert(Matcher::vector_length(this) == 2, ""); 6510 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6511 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6512 } 6513 %} 6514 ins_pipe( pipe_slow ); 6515 %} 6516 6517 // -------------------ArithmeticRightShift ----------------------------------- 6518 // Long vector arithmetic right shift 6519 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6520 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 6521 match(Set dst (RShiftVL src shift)); 6522 effect(TEMP dst, TEMP tmp, TEMP scratch); 6523 format %{ "vshiftq $dst,$src,$shift" %} 6524 ins_encode %{ 6525 uint vlen = Matcher::vector_length(this); 6526 if (vlen == 2) { 6527 assert(UseSSE >= 2, "required"); 6528 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6529 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6530 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 6531 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6532 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6533 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 6534 } else { 6535 assert(vlen == 4, "sanity"); 6536 assert(UseAVX > 1, "required"); 6537 int vlen_enc = Assembler::AVX_256bit; 6538 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6539 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 6540 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6541 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6542 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6543 } 6544 %} 6545 ins_pipe( pipe_slow ); 6546 %} 6547 6548 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 6549 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 6550 match(Set dst (RShiftVL src shift)); 6551 format %{ "vshiftq $dst,$src,$shift" %} 6552 ins_encode %{ 6553 int vlen_enc = vector_length_encoding(this); 6554 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6555 %} 6556 ins_pipe( pipe_slow ); 6557 %} 6558 6559 // ------------------- Variable Shift ----------------------------- 6560 // Byte variable shift 6561 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 6562 predicate(Matcher::vector_length(n) <= 8 && 6563 n->as_ShiftV()->is_var_shift() && 6564 !VM_Version::supports_avx512bw()); 6565 match(Set dst ( LShiftVB src shift)); 6566 match(Set dst ( RShiftVB src shift)); 6567 match(Set dst (URShiftVB src shift)); 6568 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6569 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %} 6570 ins_encode %{ 6571 assert(UseAVX >= 2, "required"); 6572 6573 int opcode = this->ideal_Opcode(); 6574 int vlen_enc = Assembler::AVX_128bit; 6575 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register); 6576 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 6577 %} 6578 ins_pipe( pipe_slow ); 6579 %} 6580 6581 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 6582 predicate(Matcher::vector_length(n) == 16 && 6583 n->as_ShiftV()->is_var_shift() && 6584 !VM_Version::supports_avx512bw()); 6585 match(Set dst ( LShiftVB src shift)); 6586 match(Set dst ( RShiftVB src shift)); 6587 match(Set dst (URShiftVB src shift)); 6588 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6589 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %} 6590 ins_encode %{ 6591 assert(UseAVX >= 2, "required"); 6592 6593 int opcode = this->ideal_Opcode(); 6594 int vlen_enc = Assembler::AVX_128bit; 6595 // Shift lower half and get word result in dst 6596 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6597 6598 // Shift upper half and get word result in vtmp1 6599 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6600 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6601 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6602 6603 // Merge and down convert the two word results to byte in dst 6604 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6605 %} 6606 ins_pipe( pipe_slow ); 6607 %} 6608 6609 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4, rRegP scratch) %{ 6610 predicate(Matcher::vector_length(n) == 32 && 6611 n->as_ShiftV()->is_var_shift() && 6612 !VM_Version::supports_avx512bw()); 6613 match(Set dst ( LShiftVB src shift)); 6614 match(Set dst ( RShiftVB src shift)); 6615 match(Set dst (URShiftVB src shift)); 6616 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP scratch); 6617 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 and $scratch as TEMP" %} 6618 ins_encode %{ 6619 assert(UseAVX >= 2, "required"); 6620 6621 int opcode = this->ideal_Opcode(); 6622 int vlen_enc = Assembler::AVX_128bit; 6623 // Process lower 128 bits and get result in dst 6624 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6625 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6626 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6627 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6628 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6629 6630 // Process higher 128 bits and get result in vtmp3 6631 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6632 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6633 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister, $scratch$$Register); 6634 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 6635 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 6636 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6637 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 6638 6639 // Merge the two results in dst 6640 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6641 %} 6642 ins_pipe( pipe_slow ); 6643 %} 6644 6645 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 6646 predicate(Matcher::vector_length(n) <= 32 && 6647 n->as_ShiftV()->is_var_shift() && 6648 VM_Version::supports_avx512bw()); 6649 match(Set dst ( LShiftVB src shift)); 6650 match(Set dst ( RShiftVB src shift)); 6651 match(Set dst (URShiftVB src shift)); 6652 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6653 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %} 6654 ins_encode %{ 6655 assert(UseAVX > 2, "required"); 6656 6657 int opcode = this->ideal_Opcode(); 6658 int vlen_enc = vector_length_encoding(this); 6659 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register); 6660 %} 6661 ins_pipe( pipe_slow ); 6662 %} 6663 6664 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 6665 predicate(Matcher::vector_length(n) == 64 && 6666 n->as_ShiftV()->is_var_shift() && 6667 VM_Version::supports_avx512bw()); 6668 match(Set dst ( LShiftVB src shift)); 6669 match(Set dst ( RShiftVB src shift)); 6670 match(Set dst (URShiftVB src shift)); 6671 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6672 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %} 6673 ins_encode %{ 6674 assert(UseAVX > 2, "required"); 6675 6676 int opcode = this->ideal_Opcode(); 6677 int vlen_enc = Assembler::AVX_256bit; 6678 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6679 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6680 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6681 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6682 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6683 %} 6684 ins_pipe( pipe_slow ); 6685 %} 6686 6687 // Short variable shift 6688 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 6689 predicate(Matcher::vector_length(n) <= 8 && 6690 n->as_ShiftV()->is_var_shift() && 6691 !VM_Version::supports_avx512bw()); 6692 match(Set dst ( LShiftVS src shift)); 6693 match(Set dst ( RShiftVS src shift)); 6694 match(Set dst (URShiftVS src shift)); 6695 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6696 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6697 ins_encode %{ 6698 assert(UseAVX >= 2, "required"); 6699 6700 int opcode = this->ideal_Opcode(); 6701 bool sign = (opcode != Op_URShiftVS); 6702 int vlen_enc = Assembler::AVX_256bit; 6703 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 6704 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 6705 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 6706 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6707 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 6708 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 6709 %} 6710 ins_pipe( pipe_slow ); 6711 %} 6712 6713 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 6714 predicate(Matcher::vector_length(n) == 16 && 6715 n->as_ShiftV()->is_var_shift() && 6716 !VM_Version::supports_avx512bw()); 6717 match(Set dst ( LShiftVS src shift)); 6718 match(Set dst ( RShiftVS src shift)); 6719 match(Set dst (URShiftVS src shift)); 6720 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6721 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6722 ins_encode %{ 6723 assert(UseAVX >= 2, "required"); 6724 6725 int opcode = this->ideal_Opcode(); 6726 bool sign = (opcode != Op_URShiftVS); 6727 int vlen_enc = Assembler::AVX_256bit; 6728 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 6729 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6730 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6731 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6732 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6733 6734 // Shift upper half, with result in dst using vtmp1 as TEMP 6735 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 6736 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 6737 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6738 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6739 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6740 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6741 6742 // Merge lower and upper half result into dst 6743 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6744 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6745 %} 6746 ins_pipe( pipe_slow ); 6747 %} 6748 6749 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 6750 predicate(n->as_ShiftV()->is_var_shift() && 6751 VM_Version::supports_avx512bw()); 6752 match(Set dst ( LShiftVS src shift)); 6753 match(Set dst ( RShiftVS src shift)); 6754 match(Set dst (URShiftVS src shift)); 6755 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 6756 ins_encode %{ 6757 assert(UseAVX > 2, "required"); 6758 6759 int opcode = this->ideal_Opcode(); 6760 int vlen_enc = vector_length_encoding(this); 6761 if (!VM_Version::supports_avx512vl()) { 6762 vlen_enc = Assembler::AVX_512bit; 6763 } 6764 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6765 %} 6766 ins_pipe( pipe_slow ); 6767 %} 6768 6769 //Integer variable shift 6770 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 6771 predicate(n->as_ShiftV()->is_var_shift()); 6772 match(Set dst ( LShiftVI src shift)); 6773 match(Set dst ( RShiftVI src shift)); 6774 match(Set dst (URShiftVI src shift)); 6775 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 6776 ins_encode %{ 6777 assert(UseAVX >= 2, "required"); 6778 6779 int opcode = this->ideal_Opcode(); 6780 int vlen_enc = vector_length_encoding(this); 6781 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6782 %} 6783 ins_pipe( pipe_slow ); 6784 %} 6785 6786 //Long variable shift 6787 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 6788 predicate(n->as_ShiftV()->is_var_shift()); 6789 match(Set dst ( LShiftVL src shift)); 6790 match(Set dst (URShiftVL src shift)); 6791 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 6792 ins_encode %{ 6793 assert(UseAVX >= 2, "required"); 6794 6795 int opcode = this->ideal_Opcode(); 6796 int vlen_enc = vector_length_encoding(this); 6797 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6798 %} 6799 ins_pipe( pipe_slow ); 6800 %} 6801 6802 //Long variable right shift arithmetic 6803 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 6804 predicate(Matcher::vector_length(n) <= 4 && 6805 n->as_ShiftV()->is_var_shift() && 6806 UseAVX == 2); 6807 match(Set dst (RShiftVL src shift)); 6808 effect(TEMP dst, TEMP vtmp); 6809 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 6810 ins_encode %{ 6811 int opcode = this->ideal_Opcode(); 6812 int vlen_enc = vector_length_encoding(this); 6813 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 6814 $vtmp$$XMMRegister); 6815 %} 6816 ins_pipe( pipe_slow ); 6817 %} 6818 6819 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 6820 predicate(n->as_ShiftV()->is_var_shift() && 6821 UseAVX > 2); 6822 match(Set dst (RShiftVL src shift)); 6823 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 6824 ins_encode %{ 6825 int opcode = this->ideal_Opcode(); 6826 int vlen_enc = vector_length_encoding(this); 6827 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6828 %} 6829 ins_pipe( pipe_slow ); 6830 %} 6831 6832 // --------------------------------- AND -------------------------------------- 6833 6834 instruct vand(vec dst, vec src) %{ 6835 predicate(UseAVX == 0); 6836 match(Set dst (AndV dst src)); 6837 format %{ "pand $dst,$src\t! and vectors" %} 6838 ins_encode %{ 6839 __ pand($dst$$XMMRegister, $src$$XMMRegister); 6840 %} 6841 ins_pipe( pipe_slow ); 6842 %} 6843 6844 instruct vand_reg(vec dst, vec src1, vec src2) %{ 6845 predicate(UseAVX > 0); 6846 match(Set dst (AndV src1 src2)); 6847 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 6848 ins_encode %{ 6849 int vlen_enc = vector_length_encoding(this); 6850 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6851 %} 6852 ins_pipe( pipe_slow ); 6853 %} 6854 6855 instruct vand_mem(vec dst, vec src, memory mem) %{ 6856 predicate((UseAVX > 0) && 6857 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6858 match(Set dst (AndV src (LoadVector mem))); 6859 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 6860 ins_encode %{ 6861 int vlen_enc = vector_length_encoding(this); 6862 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6863 %} 6864 ins_pipe( pipe_slow ); 6865 %} 6866 6867 // --------------------------------- OR --------------------------------------- 6868 6869 instruct vor(vec dst, vec src) %{ 6870 predicate(UseAVX == 0); 6871 match(Set dst (OrV dst src)); 6872 format %{ "por $dst,$src\t! or vectors" %} 6873 ins_encode %{ 6874 __ por($dst$$XMMRegister, $src$$XMMRegister); 6875 %} 6876 ins_pipe( pipe_slow ); 6877 %} 6878 6879 instruct vor_reg(vec dst, vec src1, vec src2) %{ 6880 predicate(UseAVX > 0); 6881 match(Set dst (OrV src1 src2)); 6882 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 6883 ins_encode %{ 6884 int vlen_enc = vector_length_encoding(this); 6885 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6886 %} 6887 ins_pipe( pipe_slow ); 6888 %} 6889 6890 instruct vor_mem(vec dst, vec src, memory mem) %{ 6891 predicate((UseAVX > 0) && 6892 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6893 match(Set dst (OrV src (LoadVector mem))); 6894 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 6895 ins_encode %{ 6896 int vlen_enc = vector_length_encoding(this); 6897 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6898 %} 6899 ins_pipe( pipe_slow ); 6900 %} 6901 6902 // --------------------------------- XOR -------------------------------------- 6903 6904 instruct vxor(vec dst, vec src) %{ 6905 predicate(UseAVX == 0); 6906 match(Set dst (XorV dst src)); 6907 format %{ "pxor $dst,$src\t! xor vectors" %} 6908 ins_encode %{ 6909 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 6910 %} 6911 ins_pipe( pipe_slow ); 6912 %} 6913 6914 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 6915 predicate(UseAVX > 0); 6916 match(Set dst (XorV src1 src2)); 6917 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 6918 ins_encode %{ 6919 int vlen_enc = vector_length_encoding(this); 6920 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6921 %} 6922 ins_pipe( pipe_slow ); 6923 %} 6924 6925 instruct vxor_mem(vec dst, vec src, memory mem) %{ 6926 predicate((UseAVX > 0) && 6927 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6928 match(Set dst (XorV src (LoadVector mem))); 6929 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 6930 ins_encode %{ 6931 int vlen_enc = vector_length_encoding(this); 6932 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6933 %} 6934 ins_pipe( pipe_slow ); 6935 %} 6936 6937 // --------------------------------- VectorCast -------------------------------------- 6938 6939 instruct vcastBtoX(vec dst, vec src) %{ 6940 match(Set dst (VectorCastB2X src)); 6941 format %{ "vector_cast_b2x $dst,$src\t!" %} 6942 ins_encode %{ 6943 assert(UseAVX > 0, "required"); 6944 6945 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 6946 int vlen_enc = vector_length_encoding(this); 6947 switch (to_elem_bt) { 6948 case T_SHORT: 6949 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6950 break; 6951 case T_INT: 6952 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6953 break; 6954 case T_FLOAT: 6955 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6956 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6957 break; 6958 case T_LONG: 6959 __ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6960 break; 6961 case T_DOUBLE: { 6962 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 6963 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 6964 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6965 break; 6966 } 6967 default: assert(false, "%s", type2name(to_elem_bt)); 6968 } 6969 %} 6970 ins_pipe( pipe_slow ); 6971 %} 6972 6973 instruct castStoX(vec dst, vec src, rRegP scratch) %{ 6974 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 6975 Matcher::vector_length(n->in(1)) <= 8 && // src 6976 Matcher::vector_element_basic_type(n) == T_BYTE); 6977 effect(TEMP scratch); 6978 match(Set dst (VectorCastS2X src)); 6979 format %{ "vector_cast_s2x $dst,$src\t! using $scratch as TEMP" %} 6980 ins_encode %{ 6981 assert(UseAVX > 0, "required"); 6982 6983 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, $scratch$$Register); 6984 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 6985 %} 6986 ins_pipe( pipe_slow ); 6987 %} 6988 6989 instruct vcastStoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ 6990 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 6991 Matcher::vector_length(n->in(1)) == 16 && // src 6992 Matcher::vector_element_basic_type(n) == T_BYTE); 6993 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6994 match(Set dst (VectorCastS2X src)); 6995 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp, $scratch as TEMP" %} 6996 ins_encode %{ 6997 assert(UseAVX > 0, "required"); 6998 6999 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7000 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 7001 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7002 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7003 %} 7004 ins_pipe( pipe_slow ); 7005 %} 7006 7007 instruct vcastStoX_evex(vec dst, vec src) %{ 7008 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7009 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7010 match(Set dst (VectorCastS2X src)); 7011 format %{ "vector_cast_s2x $dst,$src\t!" %} 7012 ins_encode %{ 7013 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7014 int src_vlen_enc = vector_length_encoding(this, $src); 7015 int vlen_enc = vector_length_encoding(this); 7016 switch (to_elem_bt) { 7017 case T_BYTE: 7018 if (!VM_Version::supports_avx512vl()) { 7019 vlen_enc = Assembler::AVX_512bit; 7020 } 7021 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7022 break; 7023 case T_INT: 7024 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7025 break; 7026 case T_FLOAT: 7027 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7028 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7029 break; 7030 case T_LONG: 7031 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7032 break; 7033 case T_DOUBLE: { 7034 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7035 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7036 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7037 break; 7038 } 7039 default: 7040 ShouldNotReachHere(); 7041 } 7042 %} 7043 ins_pipe( pipe_slow ); 7044 %} 7045 7046 instruct castItoX(vec dst, vec src, rRegP scratch) %{ 7047 predicate(UseAVX <= 2 && 7048 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7049 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7050 match(Set dst (VectorCastI2X src)); 7051 format %{ "vector_cast_i2x $dst,$src\t! using $scratch as TEMP" %} 7052 effect(TEMP scratch); 7053 ins_encode %{ 7054 assert(UseAVX > 0, "required"); 7055 7056 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7057 int vlen_enc = vector_length_encoding(this, $src); 7058 7059 if (to_elem_bt == T_BYTE) { 7060 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register); 7061 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7062 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7063 } else { 7064 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7065 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 7066 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7067 } 7068 %} 7069 ins_pipe( pipe_slow ); 7070 %} 7071 7072 instruct vcastItoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ 7073 predicate(UseAVX <= 2 && 7074 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7075 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7076 match(Set dst (VectorCastI2X src)); 7077 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp and $scratch as TEMP" %} 7078 effect(TEMP dst, TEMP vtmp, TEMP scratch); 7079 ins_encode %{ 7080 assert(UseAVX > 0, "required"); 7081 7082 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7083 int vlen_enc = vector_length_encoding(this, $src); 7084 7085 if (to_elem_bt == T_BYTE) { 7086 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register); 7087 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7088 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7089 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7090 } else { 7091 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7092 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 7093 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7094 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7095 } 7096 %} 7097 ins_pipe( pipe_slow ); 7098 %} 7099 7100 instruct vcastItoX_evex(vec dst, vec src) %{ 7101 predicate(UseAVX > 2 || 7102 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7103 match(Set dst (VectorCastI2X src)); 7104 format %{ "vector_cast_i2x $dst,$src\t!" %} 7105 ins_encode %{ 7106 assert(UseAVX > 0, "required"); 7107 7108 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7109 int src_vlen_enc = vector_length_encoding(this, $src); 7110 int dst_vlen_enc = vector_length_encoding(this); 7111 switch (dst_elem_bt) { 7112 case T_BYTE: 7113 if (!VM_Version::supports_avx512vl()) { 7114 src_vlen_enc = Assembler::AVX_512bit; 7115 } 7116 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7117 break; 7118 case T_SHORT: 7119 if (!VM_Version::supports_avx512vl()) { 7120 src_vlen_enc = Assembler::AVX_512bit; 7121 } 7122 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7123 break; 7124 case T_FLOAT: 7125 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7126 break; 7127 case T_LONG: 7128 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7129 break; 7130 case T_DOUBLE: 7131 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7132 break; 7133 default: 7134 ShouldNotReachHere(); 7135 } 7136 %} 7137 ins_pipe( pipe_slow ); 7138 %} 7139 7140 instruct vcastLtoBS(vec dst, vec src, rRegP scratch) %{ 7141 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7142 UseAVX <= 2); 7143 match(Set dst (VectorCastL2X src)); 7144 effect(TEMP scratch); 7145 format %{ "vector_cast_l2x $dst,$src\t! using $scratch as TEMP" %} 7146 ins_encode %{ 7147 assert(UseAVX > 0, "required"); 7148 7149 int vlen = Matcher::vector_length_in_bytes(this, $src); 7150 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7151 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7152 : ExternalAddress(vector_int_to_short_mask()); 7153 if (vlen <= 16) { 7154 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7155 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register); 7156 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7157 } else { 7158 assert(vlen <= 32, "required"); 7159 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7160 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7161 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register); 7162 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7163 } 7164 if (to_elem_bt == T_BYTE) { 7165 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7166 } 7167 %} 7168 ins_pipe( pipe_slow ); 7169 %} 7170 7171 instruct vcastLtoX_evex(vec dst, vec src) %{ 7172 predicate(UseAVX > 2 || 7173 (Matcher::vector_element_basic_type(n) == T_INT || 7174 Matcher::vector_element_basic_type(n) == T_FLOAT || 7175 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7176 match(Set dst (VectorCastL2X src)); 7177 format %{ "vector_cast_l2x $dst,$src\t!" %} 7178 ins_encode %{ 7179 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7180 int vlen = Matcher::vector_length_in_bytes(this, $src); 7181 int vlen_enc = vector_length_encoding(this, $src); 7182 switch (to_elem_bt) { 7183 case T_BYTE: 7184 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7185 vlen_enc = Assembler::AVX_512bit; 7186 } 7187 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7188 break; 7189 case T_SHORT: 7190 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7191 vlen_enc = Assembler::AVX_512bit; 7192 } 7193 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7194 break; 7195 case T_INT: 7196 if (vlen == 8) { 7197 if ($dst$$XMMRegister != $src$$XMMRegister) { 7198 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7199 } 7200 } else if (vlen == 16) { 7201 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7202 } else if (vlen == 32) { 7203 if (UseAVX > 2) { 7204 if (!VM_Version::supports_avx512vl()) { 7205 vlen_enc = Assembler::AVX_512bit; 7206 } 7207 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7208 } else { 7209 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7210 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7211 } 7212 } else { // vlen == 64 7213 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7214 } 7215 break; 7216 case T_FLOAT: 7217 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7218 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7219 break; 7220 case T_DOUBLE: 7221 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7222 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7223 break; 7224 7225 default: assert(false, "%s", type2name(to_elem_bt)); 7226 } 7227 %} 7228 ins_pipe( pipe_slow ); 7229 %} 7230 7231 instruct vcastFtoD_reg(vec dst, vec src) %{ 7232 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7233 match(Set dst (VectorCastF2X src)); 7234 format %{ "vector_cast_f2d $dst,$src\t!" %} 7235 ins_encode %{ 7236 int vlen_enc = vector_length_encoding(this); 7237 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7238 %} 7239 ins_pipe( pipe_slow ); 7240 %} 7241 7242 7243 instruct castFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{ 7244 predicate(!VM_Version::supports_avx512vl() && 7245 Matcher::vector_length_in_bytes(n) < 64 && 7246 Matcher::vector_element_basic_type(n) == T_INT); 7247 match(Set dst (VectorCastF2X src)); 7248 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP scratch, KILL cr); 7249 format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $scratch as TEMP" %} 7250 ins_encode %{ 7251 int vlen_enc = vector_length_encoding(this); 7252 __ vector_castF2I_avx($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7253 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7254 ExternalAddress(vector_float_signflip()), $scratch$$Register, vlen_enc); 7255 %} 7256 ins_pipe( pipe_slow ); 7257 %} 7258 7259 instruct castFtoI_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{ 7260 predicate((VM_Version::supports_avx512vl() || 7261 Matcher::vector_length_in_bytes(n) == 64) && 7262 Matcher::vector_element_basic_type(n) == T_INT); 7263 match(Set dst (VectorCastF2X src)); 7264 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr); 7265 format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %} 7266 ins_encode %{ 7267 int vlen_enc = vector_length_encoding(this); 7268 __ vector_castF2I_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7269 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7270 ExternalAddress(vector_float_signflip()), $scratch$$Register, vlen_enc); 7271 %} 7272 ins_pipe( pipe_slow ); 7273 %} 7274 7275 instruct vcastDtoF_reg(vec dst, vec src) %{ 7276 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7277 match(Set dst (VectorCastD2X src)); 7278 format %{ "vector_cast_d2x $dst,$src\t!" %} 7279 ins_encode %{ 7280 int vlen_enc = vector_length_encoding(this, $src); 7281 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7282 %} 7283 ins_pipe( pipe_slow ); 7284 %} 7285 7286 instruct castDtoL_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{ 7287 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7288 match(Set dst (VectorCastD2X src)); 7289 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr); 7290 format %{ "vector_cast_d2l $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %} 7291 ins_encode %{ 7292 int vlen_enc = vector_length_encoding(this); 7293 __ vector_castD2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7294 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7295 ExternalAddress(vector_double_signflip()), $scratch$$Register, vlen_enc); 7296 %} 7297 ins_pipe( pipe_slow ); 7298 %} 7299 7300 instruct vucast(vec dst, vec src) %{ 7301 match(Set dst (VectorUCastB2X src)); 7302 match(Set dst (VectorUCastS2X src)); 7303 match(Set dst (VectorUCastI2X src)); 7304 format %{ "vector_ucast $dst,$src\t!" %} 7305 ins_encode %{ 7306 assert(UseAVX > 0, "required"); 7307 7308 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7309 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7310 int vlen_enc = vector_length_encoding(this); 7311 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7312 %} 7313 ins_pipe( pipe_slow ); 7314 %} 7315 7316 #ifdef _LP64 7317 instruct vround_float_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{ 7318 predicate(!VM_Version::supports_avx512vl() && 7319 Matcher::vector_length_in_bytes(n) < 64 && 7320 Matcher::vector_element_basic_type(n) == T_INT); 7321 match(Set dst (RoundVF src)); 7322 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP scratch, KILL cr); 7323 format %{ "vector_round_float $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $scratch as TEMP" %} 7324 ins_encode %{ 7325 int vlen_enc = vector_length_encoding(this); 7326 InternalAddress new_mxcsr = $constantaddress((jint)0x3F80); 7327 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7328 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7329 ExternalAddress(vector_float_signflip()), new_mxcsr, $scratch$$Register, vlen_enc); 7330 %} 7331 ins_pipe( pipe_slow ); 7332 %} 7333 7334 instruct vround_float_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{ 7335 predicate((VM_Version::supports_avx512vl() || 7336 Matcher::vector_length_in_bytes(n) == 64) && 7337 Matcher::vector_element_basic_type(n) == T_INT); 7338 match(Set dst (RoundVF src)); 7339 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr); 7340 format %{ "vector_round_float $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %} 7341 ins_encode %{ 7342 int vlen_enc = vector_length_encoding(this); 7343 InternalAddress new_mxcsr = $constantaddress((jint)0x3F80); 7344 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7345 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7346 ExternalAddress(vector_float_signflip()), new_mxcsr, $scratch$$Register, vlen_enc); 7347 %} 7348 ins_pipe( pipe_slow ); 7349 %} 7350 7351 instruct vround_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{ 7352 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7353 match(Set dst (RoundVD src)); 7354 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr); 7355 format %{ "vector_round_long $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %} 7356 ins_encode %{ 7357 int vlen_enc = vector_length_encoding(this); 7358 InternalAddress new_mxcsr = $constantaddress((jint)0x3F80); 7359 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7360 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7361 ExternalAddress(vector_double_signflip()), new_mxcsr, $scratch$$Register, vlen_enc); 7362 %} 7363 ins_pipe( pipe_slow ); 7364 %} 7365 #endif 7366 // --------------------------------- VectorMaskCmp -------------------------------------- 7367 7368 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7369 predicate(n->bottom_type()->isa_vectmask() == NULL && 7370 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7371 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7372 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7373 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7374 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7375 ins_encode %{ 7376 int vlen_enc = vector_length_encoding(this, $src1); 7377 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7378 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7379 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7380 } else { 7381 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7382 } 7383 %} 7384 ins_pipe( pipe_slow ); 7385 %} 7386 7387 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{ 7388 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7389 n->bottom_type()->isa_vectmask() == NULL && 7390 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7391 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7392 effect(TEMP scratch, TEMP ktmp); 7393 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} 7394 ins_encode %{ 7395 int vlen_enc = Assembler::AVX_512bit; 7396 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7397 KRegister mask = k0; // The comparison itself is not being masked. 7398 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7399 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7400 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register); 7401 } else { 7402 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7403 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register); 7404 } 7405 %} 7406 ins_pipe( pipe_slow ); 7407 %} 7408 7409 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7410 predicate(n->bottom_type()->isa_vectmask() && 7411 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7412 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7413 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7414 ins_encode %{ 7415 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7416 int vlen_enc = vector_length_encoding(this, $src1); 7417 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7418 KRegister mask = k0; // The comparison itself is not being masked. 7419 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7420 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7421 } else { 7422 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7423 } 7424 %} 7425 ins_pipe( pipe_slow ); 7426 %} 7427 7428 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7429 predicate(n->bottom_type()->isa_vectmask() == NULL && 7430 !is_unsigned_booltest_pred(n->in(2)->get_int()) && 7431 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7432 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7433 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7434 (n->in(2)->get_int() == BoolTest::eq || 7435 n->in(2)->get_int() == BoolTest::lt || 7436 n->in(2)->get_int() == BoolTest::gt)); // cond 7437 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7438 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7439 ins_encode %{ 7440 int vlen_enc = vector_length_encoding(this, $src1); 7441 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7442 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7443 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 7444 %} 7445 ins_pipe( pipe_slow ); 7446 %} 7447 7448 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7449 predicate(n->bottom_type()->isa_vectmask() == NULL && 7450 !is_unsigned_booltest_pred(n->in(2)->get_int()) && 7451 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7452 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7453 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7454 (n->in(2)->get_int() == BoolTest::ne || 7455 n->in(2)->get_int() == BoolTest::le || 7456 n->in(2)->get_int() == BoolTest::ge)); // cond 7457 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7458 effect(TEMP dst, TEMP xtmp); 7459 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7460 ins_encode %{ 7461 int vlen_enc = vector_length_encoding(this, $src1); 7462 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7463 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7464 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7465 %} 7466 ins_pipe( pipe_slow ); 7467 %} 7468 7469 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7470 predicate(n->bottom_type()->isa_vectmask() == NULL && 7471 is_unsigned_booltest_pred(n->in(2)->get_int()) && 7472 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7473 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7474 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7475 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7476 effect(TEMP dst, TEMP xtmp); 7477 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7478 ins_encode %{ 7479 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 7480 int vlen_enc = vector_length_encoding(this, $src1); 7481 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7482 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7483 7484 if (vlen_enc == Assembler::AVX_128bit) { 7485 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7486 } else { 7487 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7488 } 7489 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7490 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7491 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7492 %} 7493 ins_pipe( pipe_slow ); 7494 %} 7495 7496 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{ 7497 predicate((n->bottom_type()->isa_vectmask() == NULL && 7498 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 7499 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7500 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7501 effect(TEMP scratch, TEMP ktmp); 7502 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} 7503 ins_encode %{ 7504 assert(UseAVX > 2, "required"); 7505 7506 int vlen_enc = vector_length_encoding(this, $src1); 7507 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7508 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 7509 KRegister mask = k0; // The comparison itself is not being masked. 7510 bool merge = false; 7511 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7512 7513 switch (src1_elem_bt) { 7514 case T_INT: { 7515 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7516 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 7517 break; 7518 } 7519 case T_LONG: { 7520 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7521 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 7522 break; 7523 } 7524 default: assert(false, "%s", type2name(src1_elem_bt)); 7525 } 7526 %} 7527 ins_pipe( pipe_slow ); 7528 %} 7529 7530 7531 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 7532 predicate(n->bottom_type()->isa_vectmask() && 7533 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7534 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7535 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 7536 ins_encode %{ 7537 assert(UseAVX > 2, "required"); 7538 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7539 7540 int vlen_enc = vector_length_encoding(this, $src1); 7541 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7542 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 7543 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7544 7545 // Comparison i 7546 switch (src1_elem_bt) { 7547 case T_BYTE: { 7548 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7549 break; 7550 } 7551 case T_SHORT: { 7552 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7553 break; 7554 } 7555 case T_INT: { 7556 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7557 break; 7558 } 7559 case T_LONG: { 7560 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7561 break; 7562 } 7563 default: assert(false, "%s", type2name(src1_elem_bt)); 7564 } 7565 %} 7566 ins_pipe( pipe_slow ); 7567 %} 7568 7569 // Extract 7570 7571 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 7572 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 7573 match(Set dst (ExtractI src idx)); 7574 match(Set dst (ExtractS src idx)); 7575 #ifdef _LP64 7576 match(Set dst (ExtractB src idx)); 7577 #endif 7578 format %{ "extractI $dst,$src,$idx\t!" %} 7579 ins_encode %{ 7580 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7581 7582 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 7583 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7584 %} 7585 ins_pipe( pipe_slow ); 7586 %} 7587 7588 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 7589 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 7590 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 7591 match(Set dst (ExtractI src idx)); 7592 match(Set dst (ExtractS src idx)); 7593 #ifdef _LP64 7594 match(Set dst (ExtractB src idx)); 7595 #endif 7596 effect(TEMP vtmp); 7597 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 7598 ins_encode %{ 7599 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7600 7601 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 7602 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7603 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 7604 %} 7605 ins_pipe( pipe_slow ); 7606 %} 7607 7608 #ifdef _LP64 7609 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 7610 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 7611 match(Set dst (ExtractL src idx)); 7612 format %{ "extractL $dst,$src,$idx\t!" %} 7613 ins_encode %{ 7614 assert(UseSSE >= 4, "required"); 7615 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7616 7617 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7618 %} 7619 ins_pipe( pipe_slow ); 7620 %} 7621 7622 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 7623 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 7624 Matcher::vector_length(n->in(1)) == 8); // src 7625 match(Set dst (ExtractL src idx)); 7626 effect(TEMP vtmp); 7627 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 7628 ins_encode %{ 7629 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7630 7631 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7632 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 7633 %} 7634 ins_pipe( pipe_slow ); 7635 %} 7636 #endif 7637 7638 instruct extractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ 7639 predicate(Matcher::vector_length(n->in(1)) <= 4); 7640 match(Set dst (ExtractF src idx)); 7641 effect(TEMP dst, TEMP tmp, TEMP vtmp); 7642 format %{ "extractF $dst,$src,$idx\t! using $tmp, $vtmp as TEMP" %} 7643 ins_encode %{ 7644 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7645 7646 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $tmp$$Register, $vtmp$$XMMRegister); 7647 %} 7648 ins_pipe( pipe_slow ); 7649 %} 7650 7651 instruct vextractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ 7652 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 7653 Matcher::vector_length(n->in(1)/*src*/) == 16); 7654 match(Set dst (ExtractF src idx)); 7655 effect(TEMP tmp, TEMP vtmp); 7656 format %{ "vextractF $dst,$src,$idx\t! using $tmp, $vtmp as TEMP" %} 7657 ins_encode %{ 7658 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7659 7660 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7661 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant, $tmp$$Register); 7662 %} 7663 ins_pipe( pipe_slow ); 7664 %} 7665 7666 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 7667 predicate(Matcher::vector_length(n->in(1)) == 2); // src 7668 match(Set dst (ExtractD src idx)); 7669 format %{ "extractD $dst,$src,$idx\t!" %} 7670 ins_encode %{ 7671 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7672 7673 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7674 %} 7675 ins_pipe( pipe_slow ); 7676 %} 7677 7678 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 7679 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 7680 Matcher::vector_length(n->in(1)) == 8); // src 7681 match(Set dst (ExtractD src idx)); 7682 effect(TEMP vtmp); 7683 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 7684 ins_encode %{ 7685 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7686 7687 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7688 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 7689 %} 7690 ins_pipe( pipe_slow ); 7691 %} 7692 7693 // --------------------------------- Vector Blend -------------------------------------- 7694 7695 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 7696 predicate(UseAVX == 0); 7697 match(Set dst (VectorBlend (Binary dst src) mask)); 7698 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 7699 effect(TEMP tmp); 7700 ins_encode %{ 7701 assert(UseSSE >= 4, "required"); 7702 7703 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 7704 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 7705 } 7706 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 7707 %} 7708 ins_pipe( pipe_slow ); 7709 %} 7710 7711 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7712 predicate(UseAVX > 0 && 7713 n->in(2)->bottom_type()->isa_vectmask() == NULL && 7714 Matcher::vector_length_in_bytes(n) <= 32 && 7715 is_integral_type(Matcher::vector_element_basic_type(n))); 7716 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7717 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7718 ins_encode %{ 7719 int vlen_enc = vector_length_encoding(this); 7720 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7721 %} 7722 ins_pipe( pipe_slow ); 7723 %} 7724 7725 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7726 predicate(UseAVX > 0 && 7727 n->in(2)->bottom_type()->isa_vectmask() == NULL && 7728 Matcher::vector_length_in_bytes(n) <= 32 && 7729 !is_integral_type(Matcher::vector_element_basic_type(n))); 7730 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7731 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7732 ins_encode %{ 7733 int vlen_enc = vector_length_encoding(this); 7734 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7735 %} 7736 ins_pipe( pipe_slow ); 7737 %} 7738 7739 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch, kReg ktmp) %{ 7740 predicate(Matcher::vector_length_in_bytes(n) == 64 && 7741 n->in(2)->bottom_type()->isa_vectmask() == NULL); 7742 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7743 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %} 7744 effect(TEMP scratch, TEMP ktmp); 7745 ins_encode %{ 7746 int vlen_enc = Assembler::AVX_512bit; 7747 BasicType elem_bt = Matcher::vector_element_basic_type(this); 7748 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, $scratch$$Register); 7749 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7750 %} 7751 ins_pipe( pipe_slow ); 7752 %} 7753 7754 7755 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask, rRegP scratch) %{ 7756 predicate(n->in(2)->bottom_type()->isa_vectmask() && 7757 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 7758 VM_Version::supports_avx512bw())); 7759 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7760 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %} 7761 effect(TEMP scratch); 7762 ins_encode %{ 7763 int vlen_enc = vector_length_encoding(this); 7764 BasicType elem_bt = Matcher::vector_element_basic_type(this); 7765 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7766 %} 7767 ins_pipe( pipe_slow ); 7768 %} 7769 7770 // --------------------------------- ABS -------------------------------------- 7771 // a = |a| 7772 instruct vabsB_reg(vec dst, vec src) %{ 7773 match(Set dst (AbsVB src)); 7774 ins_cost(450); 7775 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 7776 ins_encode %{ 7777 uint vlen = Matcher::vector_length(this); 7778 if (vlen <= 16) { 7779 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 7780 } else { 7781 int vlen_enc = vector_length_encoding(this); 7782 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7783 } 7784 %} 7785 ins_pipe( pipe_slow ); 7786 %} 7787 7788 instruct vabsS_reg(vec dst, vec src) %{ 7789 match(Set dst (AbsVS src)); 7790 ins_cost(450); 7791 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 7792 ins_encode %{ 7793 uint vlen = Matcher::vector_length(this); 7794 if (vlen <= 8) { 7795 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 7796 } else { 7797 int vlen_enc = vector_length_encoding(this); 7798 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7799 } 7800 %} 7801 ins_pipe( pipe_slow ); 7802 %} 7803 7804 instruct vabsI_reg(vec dst, vec src) %{ 7805 match(Set dst (AbsVI src)); 7806 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 7807 ins_cost(250); 7808 ins_encode %{ 7809 uint vlen = Matcher::vector_length(this); 7810 if (vlen <= 4) { 7811 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 7812 } else { 7813 int vlen_enc = vector_length_encoding(this); 7814 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7815 } 7816 %} 7817 ins_pipe( pipe_slow ); 7818 %} 7819 7820 instruct vabsL_reg(vec dst, vec src) %{ 7821 match(Set dst (AbsVL src)); 7822 ins_cost(450); 7823 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 7824 ins_encode %{ 7825 assert(UseAVX > 2, "required"); 7826 int vlen_enc = vector_length_encoding(this); 7827 if (!VM_Version::supports_avx512vl()) { 7828 vlen_enc = Assembler::AVX_512bit; 7829 } 7830 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7831 %} 7832 ins_pipe( pipe_slow ); 7833 %} 7834 7835 // --------------------------------- ABSNEG -------------------------------------- 7836 7837 instruct vabsnegF(vec dst, vec src, rRegI scratch) %{ 7838 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 7839 match(Set dst (AbsVF src)); 7840 match(Set dst (NegVF src)); 7841 effect(TEMP scratch); 7842 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 7843 ins_cost(150); 7844 ins_encode %{ 7845 int opcode = this->ideal_Opcode(); 7846 int vlen = Matcher::vector_length(this); 7847 if (vlen == 2) { 7848 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 7849 } else { 7850 assert(vlen == 8 || vlen == 16, "required"); 7851 int vlen_enc = vector_length_encoding(this); 7852 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 7853 } 7854 %} 7855 ins_pipe( pipe_slow ); 7856 %} 7857 7858 instruct vabsneg4F(vec dst, rRegI scratch) %{ 7859 predicate(Matcher::vector_length(n) == 4); 7860 match(Set dst (AbsVF dst)); 7861 match(Set dst (NegVF dst)); 7862 effect(TEMP scratch); 7863 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 7864 ins_cost(150); 7865 ins_encode %{ 7866 int opcode = this->ideal_Opcode(); 7867 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $scratch$$Register); 7868 %} 7869 ins_pipe( pipe_slow ); 7870 %} 7871 7872 instruct vabsnegD(vec dst, vec src, rRegI scratch) %{ 7873 match(Set dst (AbsVD src)); 7874 match(Set dst (NegVD src)); 7875 effect(TEMP scratch); 7876 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 7877 ins_encode %{ 7878 int opcode = this->ideal_Opcode(); 7879 uint vlen = Matcher::vector_length(this); 7880 if (vlen == 2) { 7881 assert(UseSSE >= 2, "required"); 7882 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 7883 } else { 7884 int vlen_enc = vector_length_encoding(this); 7885 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 7886 } 7887 %} 7888 ins_pipe( pipe_slow ); 7889 %} 7890 7891 //------------------------------------- VectorTest -------------------------------------------- 7892 7893 #ifdef _LP64 7894 instruct vptest_alltrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp1, legVec vtmp2, rFlagsReg cr) %{ 7895 predicate(!VM_Version::supports_avx512bwdq() && 7896 Matcher::vector_length_in_bytes(n->in(1)) >= 4 && 7897 Matcher::vector_length_in_bytes(n->in(1)) < 16 && 7898 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 7899 match(Set dst (VectorTest src1 src2 )); 7900 effect(TEMP vtmp1, TEMP vtmp2, KILL cr); 7901 format %{ "vptest_alltrue_lt16 $dst,$src1, $src2\t! using $vtmp1, $vtmp2 and $cr as TEMP" %} 7902 ins_encode %{ 7903 int vlen = Matcher::vector_length_in_bytes(this, $src1); 7904 __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 7905 __ setb(Assembler::carrySet, $dst$$Register); 7906 __ movzbl($dst$$Register, $dst$$Register); 7907 %} 7908 ins_pipe( pipe_slow ); 7909 %} 7910 7911 instruct vptest_alltrue_ge16(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ 7912 predicate(!VM_Version::supports_avx512bwdq() && 7913 Matcher::vector_length_in_bytes(n->in(1)) >= 16 && 7914 Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7915 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 7916 match(Set dst (VectorTest src1 src2 )); 7917 effect(KILL cr); 7918 format %{ "vptest_alltrue_ge16 $dst,$src1, $src2\t! using $cr as TEMP" %} 7919 ins_encode %{ 7920 int vlen = Matcher::vector_length_in_bytes(this, $src1); 7921 __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); 7922 __ setb(Assembler::carrySet, $dst$$Register); 7923 __ movzbl($dst$$Register, $dst$$Register); 7924 %} 7925 ins_pipe( pipe_slow ); 7926 %} 7927 7928 instruct vptest_alltrue_lt8_evex(rRegI dst, kReg src1, kReg src2, kReg kscratch, rFlagsReg cr) %{ 7929 predicate(VM_Version::supports_avx512bwdq() && 7930 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow && 7931 n->in(1)->bottom_type()->isa_vectmask() && 7932 Matcher::vector_length(n->in(1)) < 8); 7933 match(Set dst (VectorTest src1 src2)); 7934 effect(KILL cr, TEMP kscratch); 7935 format %{ "vptest_alltrue_lt8_evex $dst,$src1,$src2\t! using $cr as TEMP" %} 7936 ins_encode %{ 7937 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 7938 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 7939 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 7940 uint masklen = Matcher::vector_length(this, $src1); 7941 __ alltrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister, $kscratch$$KRegister); 7942 %} 7943 ins_pipe( pipe_slow ); 7944 %} 7945 7946 7947 instruct vptest_alltrue_ge8_evex(rRegI dst, kReg src1, kReg src2, rFlagsReg cr) %{ 7948 predicate(VM_Version::supports_avx512bwdq() && 7949 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow && 7950 n->in(1)->bottom_type()->isa_vectmask() && 7951 Matcher::vector_length(n->in(1)) >= 8); 7952 match(Set dst (VectorTest src1 src2)); 7953 effect(KILL cr); 7954 format %{ "vptest_alltrue_ge8_evex $dst,$src1,$src2\t! using $cr as TEMP" %} 7955 ins_encode %{ 7956 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 7957 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 7958 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 7959 uint masklen = Matcher::vector_length(this, $src1); 7960 __ alltrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister, knoreg); 7961 %} 7962 ins_pipe( pipe_slow ); 7963 %} 7964 7965 7966 instruct vptest_anytrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp, rFlagsReg cr) %{ 7967 predicate(!VM_Version::supports_avx512bwdq() && 7968 Matcher::vector_length_in_bytes(n->in(1)) >= 4 && 7969 Matcher::vector_length_in_bytes(n->in(1)) < 16 && 7970 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 7971 match(Set dst (VectorTest src1 src2 )); 7972 effect(TEMP vtmp, KILL cr); 7973 format %{ "vptest_anytrue_lt16 $dst,$src1,$src2\t! using $vtmp, $cr as TEMP" %} 7974 ins_encode %{ 7975 int vlen = Matcher::vector_length_in_bytes(this, $src1); 7976 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 7977 __ setb(Assembler::notZero, $dst$$Register); 7978 __ movzbl($dst$$Register, $dst$$Register); 7979 %} 7980 ins_pipe( pipe_slow ); 7981 %} 7982 7983 instruct vptest_anytrue_ge16(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ 7984 predicate(!VM_Version::supports_avx512bwdq() && 7985 Matcher::vector_length_in_bytes(n->in(1)) >= 16 && 7986 Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7987 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 7988 match(Set dst (VectorTest src1 src2 )); 7989 effect(KILL cr); 7990 format %{ "vptest_anytrue_ge16 $dst,$src1,$src2\t! using $cr as TEMP" %} 7991 ins_encode %{ 7992 int vlen = Matcher::vector_length_in_bytes(this, $src1); 7993 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); 7994 __ setb(Assembler::notZero, $dst$$Register); 7995 __ movzbl($dst$$Register, $dst$$Register); 7996 %} 7997 ins_pipe( pipe_slow ); 7998 %} 7999 8000 instruct vptest_anytrue_evex(rRegI dst, kReg src1, kReg src2, rFlagsReg cr) %{ 8001 predicate(VM_Version::supports_avx512bwdq() && 8002 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8003 match(Set dst (VectorTest src1 src2)); 8004 effect(KILL cr); 8005 format %{ "vptest_anytrue_lt8_evex $dst,$src1,$src2\t! using $cr as TEMP" %} 8006 ins_encode %{ 8007 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 8008 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 8009 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 8010 uint masklen = Matcher::vector_length(this, $src1); 8011 __ anytrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister); 8012 %} 8013 ins_pipe( pipe_slow ); 8014 %} 8015 8016 instruct cmpvptest_anytrue_lt16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero, legVec vtmp) %{ 8017 predicate(!VM_Version::supports_avx512bwdq() && 8018 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && 8019 Matcher::vector_length_in_bytes(n->in(1)->in(1)) < 16 && 8020 static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne); 8021 match(Set cr (CmpI (VectorTest src1 src2) zero)); 8022 effect(TEMP vtmp); 8023 format %{ "cmpvptest_anytrue_lt16 $src1,$src2\t! using $vtmp as TEMP" %} 8024 ins_encode %{ 8025 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8026 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 8027 %} 8028 ins_pipe( pipe_slow ); 8029 %} 8030 8031 instruct cmpvptest_anytrue_ge16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero) %{ 8032 predicate(!VM_Version::supports_avx512bwdq() && 8033 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 16 && 8034 Matcher::vector_length_in_bytes(n->in(1)->in(1)) < 64 && 8035 static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne); 8036 match(Set cr (CmpI (VectorTest src1 src2) zero)); 8037 format %{ "cmpvptest_anytrue_ge16 $src1,$src2\t!" %} 8038 ins_encode %{ 8039 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8040 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); 8041 %} 8042 ins_pipe( pipe_slow ); 8043 %} 8044 8045 instruct cmpvptest_anytrue_evex(rFlagsReg cr, kReg src1, kReg src2, immI_0 zero) %{ 8046 predicate(VM_Version::supports_avx512bwdq() && 8047 static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne); 8048 match(Set cr (CmpI (VectorTest src1 src2) zero)); 8049 format %{ "cmpvptest_anytrue_evex $src1,$src2\t!" %} 8050 ins_encode %{ 8051 uint masklen = Matcher::vector_length(this, $src1); 8052 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 8053 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 8054 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 8055 masklen = masklen < 8 ? 8 : masklen; 8056 __ ktest(masklen, $src1$$KRegister, $src2$$KRegister); 8057 %} 8058 ins_pipe( pipe_slow ); 8059 %} 8060 #endif 8061 8062 //------------------------------------- LoadMask -------------------------------------------- 8063 8064 instruct loadMask(legVec dst, legVec src) %{ 8065 predicate(n->bottom_type()->isa_vectmask() == NULL && !VM_Version::supports_avx512vlbw()); 8066 match(Set dst (VectorLoadMask src)); 8067 effect(TEMP dst); 8068 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8069 ins_encode %{ 8070 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8071 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8072 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8073 %} 8074 ins_pipe( pipe_slow ); 8075 %} 8076 8077 instruct loadMask64(kReg dst, vec src, vec xtmp, rRegI tmp) %{ 8078 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8079 match(Set dst (VectorLoadMask src)); 8080 effect(TEMP xtmp, TEMP tmp); 8081 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp and $tmp as TEMP" %} 8082 ins_encode %{ 8083 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8084 $tmp$$Register, true, Assembler::AVX_512bit); 8085 %} 8086 ins_pipe( pipe_slow ); 8087 %} 8088 8089 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8090 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8091 match(Set dst (VectorLoadMask src)); 8092 effect(TEMP xtmp); 8093 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8094 ins_encode %{ 8095 int vlen_enc = vector_length_encoding(in(1)); 8096 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8097 noreg, false, vlen_enc); 8098 %} 8099 ins_pipe( pipe_slow ); 8100 %} 8101 8102 //------------------------------------- StoreMask -------------------------------------------- 8103 8104 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8105 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8106 match(Set dst (VectorStoreMask src size)); 8107 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8108 ins_encode %{ 8109 int vlen = Matcher::vector_length(this); 8110 if (vlen <= 16 && UseAVX <= 2) { 8111 assert(UseSSE >= 3, "required"); 8112 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8113 } else { 8114 assert(UseAVX > 0, "required"); 8115 int src_vlen_enc = vector_length_encoding(this, $src); 8116 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8117 } 8118 %} 8119 ins_pipe( pipe_slow ); 8120 %} 8121 8122 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8123 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8124 match(Set dst (VectorStoreMask src size)); 8125 effect(TEMP_DEF dst, TEMP xtmp); 8126 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8127 ins_encode %{ 8128 int vlen_enc = Assembler::AVX_128bit; 8129 int vlen = Matcher::vector_length(this); 8130 if (vlen <= 8) { 8131 assert(UseSSE >= 3, "required"); 8132 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8133 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8134 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8135 } else { 8136 assert(UseAVX > 0, "required"); 8137 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8138 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8139 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8140 } 8141 %} 8142 ins_pipe( pipe_slow ); 8143 %} 8144 8145 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8146 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8147 match(Set dst (VectorStoreMask src size)); 8148 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8149 effect(TEMP_DEF dst, TEMP xtmp); 8150 ins_encode %{ 8151 int vlen_enc = Assembler::AVX_128bit; 8152 int vlen = Matcher::vector_length(this); 8153 if (vlen <= 4) { 8154 assert(UseSSE >= 3, "required"); 8155 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8156 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8157 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8158 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8159 } else { 8160 assert(UseAVX > 0, "required"); 8161 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8162 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8163 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8164 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8165 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8166 } 8167 %} 8168 ins_pipe( pipe_slow ); 8169 %} 8170 8171 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8172 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8173 match(Set dst (VectorStoreMask src size)); 8174 effect(TEMP_DEF dst, TEMP xtmp); 8175 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8176 ins_encode %{ 8177 assert(UseSSE >= 3, "required"); 8178 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8179 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8180 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8181 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8182 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8183 %} 8184 ins_pipe( pipe_slow ); 8185 %} 8186 8187 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8188 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8189 match(Set dst (VectorStoreMask src size)); 8190 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8191 effect(TEMP_DEF dst, TEMP vtmp); 8192 ins_encode %{ 8193 int vlen_enc = Assembler::AVX_128bit; 8194 __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8195 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8196 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8197 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8198 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8199 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8200 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8201 %} 8202 ins_pipe( pipe_slow ); 8203 %} 8204 8205 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8206 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8207 match(Set dst (VectorStoreMask src size)); 8208 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8209 ins_encode %{ 8210 int src_vlen_enc = vector_length_encoding(this, $src); 8211 int dst_vlen_enc = vector_length_encoding(this); 8212 if (!VM_Version::supports_avx512vl()) { 8213 src_vlen_enc = Assembler::AVX_512bit; 8214 } 8215 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8216 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8217 %} 8218 ins_pipe( pipe_slow ); 8219 %} 8220 8221 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8222 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8223 match(Set dst (VectorStoreMask src size)); 8224 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8225 ins_encode %{ 8226 int src_vlen_enc = vector_length_encoding(this, $src); 8227 int dst_vlen_enc = vector_length_encoding(this); 8228 if (!VM_Version::supports_avx512vl()) { 8229 src_vlen_enc = Assembler::AVX_512bit; 8230 } 8231 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8232 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8233 %} 8234 ins_pipe( pipe_slow ); 8235 %} 8236 8237 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size, rRegI tmp) %{ 8238 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8239 match(Set dst (VectorStoreMask mask size)); 8240 effect(TEMP_DEF dst, TEMP tmp); 8241 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8242 ins_encode %{ 8243 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8244 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8245 false, Assembler::AVX_512bit, $tmp$$Register); 8246 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8247 %} 8248 ins_pipe( pipe_slow ); 8249 %} 8250 8251 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8252 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8253 match(Set dst (VectorStoreMask mask size)); 8254 effect(TEMP_DEF dst); 8255 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8256 ins_encode %{ 8257 int dst_vlen_enc = vector_length_encoding(this); 8258 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8259 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8260 %} 8261 ins_pipe( pipe_slow ); 8262 %} 8263 8264 instruct vmaskcast_evex(kReg dst) %{ 8265 predicate(Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); 8266 match(Set dst (VectorMaskCast dst)); 8267 ins_cost(0); 8268 format %{ "vector_mask_cast $dst" %} 8269 ins_encode %{ 8270 // empty 8271 %} 8272 ins_pipe(empty); 8273 %} 8274 8275 instruct vmaskcast(vec dst) %{ 8276 predicate((Matcher::vector_length(n) == Matcher::vector_length(n->in(1))) && 8277 (Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)))); 8278 match(Set dst (VectorMaskCast dst)); 8279 ins_cost(0); 8280 format %{ "vector_mask_cast $dst" %} 8281 ins_encode %{ 8282 // empty 8283 %} 8284 ins_pipe(empty); 8285 %} 8286 8287 //-------------------------------- Load Iota Indices ---------------------------------- 8288 8289 instruct loadIotaIndices(vec dst, immI_0 src, rRegP scratch) %{ 8290 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 8291 match(Set dst (VectorLoadConst src)); 8292 effect(TEMP scratch); 8293 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8294 ins_encode %{ 8295 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8296 __ load_iota_indices($dst$$XMMRegister, $scratch$$Register, vlen_in_bytes); 8297 %} 8298 ins_pipe( pipe_slow ); 8299 %} 8300 8301 //-------------------------------- Rearrange ---------------------------------- 8302 8303 // LoadShuffle/Rearrange for Byte 8304 8305 instruct loadShuffleB(vec dst) %{ 8306 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 8307 match(Set dst (VectorLoadShuffle dst)); 8308 format %{ "vector_load_shuffle $dst, $dst" %} 8309 ins_encode %{ 8310 // empty 8311 %} 8312 ins_pipe( pipe_slow ); 8313 %} 8314 8315 instruct rearrangeB(vec dst, vec shuffle) %{ 8316 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8317 Matcher::vector_length(n) < 32); 8318 match(Set dst (VectorRearrange dst shuffle)); 8319 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8320 ins_encode %{ 8321 assert(UseSSE >= 4, "required"); 8322 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8323 %} 8324 ins_pipe( pipe_slow ); 8325 %} 8326 8327 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2, rRegP scratch) %{ 8328 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8329 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8330 match(Set dst (VectorRearrange src shuffle)); 8331 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 8332 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2, $scratch as TEMP" %} 8333 ins_encode %{ 8334 assert(UseAVX >= 2, "required"); 8335 // Swap src into vtmp1 8336 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8337 // Shuffle swapped src to get entries from other 128 bit lane 8338 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8339 // Shuffle original src to get entries from self 128 bit lane 8340 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8341 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8342 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, $scratch$$Register); 8343 // Perform the blend 8344 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8345 %} 8346 ins_pipe( pipe_slow ); 8347 %} 8348 8349 instruct rearrangeB_evex(vec dst, vec src, vec shuffle) %{ 8350 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8351 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8352 match(Set dst (VectorRearrange src shuffle)); 8353 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8354 ins_encode %{ 8355 int vlen_enc = vector_length_encoding(this); 8356 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8357 %} 8358 ins_pipe( pipe_slow ); 8359 %} 8360 8361 // LoadShuffle/Rearrange for Short 8362 8363 instruct loadShuffleS(vec dst, vec src, vec vtmp, rRegP scratch) %{ 8364 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8365 Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS 8366 match(Set dst (VectorLoadShuffle src)); 8367 effect(TEMP dst, TEMP vtmp, TEMP scratch); 8368 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 8369 ins_encode %{ 8370 // Create a byte shuffle mask from short shuffle mask 8371 // only byte shuffle instruction available on these platforms 8372 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8373 if (UseAVX == 0) { 8374 assert(vlen_in_bytes <= 16, "required"); 8375 // Multiply each shuffle by two to get byte index 8376 __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); 8377 __ psllw($vtmp$$XMMRegister, 1); 8378 8379 // Duplicate to create 2 copies of byte index 8380 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8381 __ psllw($dst$$XMMRegister, 8); 8382 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8383 8384 // Add one to get alternate byte index 8385 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), $scratch$$Register); 8386 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8387 } else { 8388 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8389 int vlen_enc = vector_length_encoding(this); 8390 // Multiply each shuffle by two to get byte index 8391 __ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8392 __ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8393 8394 // Duplicate to create 2 copies of byte index 8395 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8396 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8397 8398 // Add one to get alternate byte index 8399 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, $scratch$$Register); 8400 } 8401 %} 8402 ins_pipe( pipe_slow ); 8403 %} 8404 8405 instruct rearrangeS(vec dst, vec shuffle) %{ 8406 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8407 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8408 match(Set dst (VectorRearrange dst shuffle)); 8409 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8410 ins_encode %{ 8411 assert(UseSSE >= 4, "required"); 8412 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8413 %} 8414 ins_pipe( pipe_slow ); 8415 %} 8416 8417 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2, rRegP scratch) %{ 8418 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8419 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8420 match(Set dst (VectorRearrange src shuffle)); 8421 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 8422 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2, $scratch as TEMP" %} 8423 ins_encode %{ 8424 assert(UseAVX >= 2, "required"); 8425 // Swap src into vtmp1 8426 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8427 // Shuffle swapped src to get entries from other 128 bit lane 8428 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8429 // Shuffle original src to get entries from self 128 bit lane 8430 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8431 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8432 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, $scratch$$Register); 8433 // Perform the blend 8434 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8435 %} 8436 ins_pipe( pipe_slow ); 8437 %} 8438 8439 instruct loadShuffleS_evex(vec dst, vec src) %{ 8440 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8441 VM_Version::supports_avx512bw()); 8442 match(Set dst (VectorLoadShuffle src)); 8443 format %{ "vector_load_shuffle $dst, $src" %} 8444 ins_encode %{ 8445 int vlen_enc = vector_length_encoding(this); 8446 if (!VM_Version::supports_avx512vl()) { 8447 vlen_enc = Assembler::AVX_512bit; 8448 } 8449 __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8450 %} 8451 ins_pipe( pipe_slow ); 8452 %} 8453 8454 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8455 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8456 VM_Version::supports_avx512bw()); 8457 match(Set dst (VectorRearrange src shuffle)); 8458 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8459 ins_encode %{ 8460 int vlen_enc = vector_length_encoding(this); 8461 if (!VM_Version::supports_avx512vl()) { 8462 vlen_enc = Assembler::AVX_512bit; 8463 } 8464 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8465 %} 8466 ins_pipe( pipe_slow ); 8467 %} 8468 8469 // LoadShuffle/Rearrange for Integer and Float 8470 8471 instruct loadShuffleI(vec dst, vec src, vec vtmp, rRegP scratch) %{ 8472 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8473 Matcher::vector_length(n) == 4 && UseAVX < 2); 8474 match(Set dst (VectorLoadShuffle src)); 8475 effect(TEMP dst, TEMP vtmp, TEMP scratch); 8476 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 8477 ins_encode %{ 8478 assert(UseSSE >= 4, "required"); 8479 8480 // Create a byte shuffle mask from int shuffle mask 8481 // only byte shuffle instruction available on these platforms 8482 8483 // Duplicate and multiply each shuffle by 4 8484 __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); 8485 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8486 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8487 __ psllw($vtmp$$XMMRegister, 2); 8488 8489 // Duplicate again to create 4 copies of byte index 8490 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8491 __ psllw($dst$$XMMRegister, 8); 8492 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 8493 8494 // Add 3,2,1,0 to get alternate byte index 8495 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), $scratch$$Register); 8496 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8497 %} 8498 ins_pipe( pipe_slow ); 8499 %} 8500 8501 instruct rearrangeI(vec dst, vec shuffle) %{ 8502 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8503 Matcher::vector_length(n) == 4 && UseAVX < 2); 8504 match(Set dst (VectorRearrange dst shuffle)); 8505 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8506 ins_encode %{ 8507 assert(UseSSE >= 4, "required"); 8508 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8509 %} 8510 ins_pipe( pipe_slow ); 8511 %} 8512 8513 instruct loadShuffleI_avx(vec dst, vec src) %{ 8514 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8515 UseAVX >= 2); 8516 match(Set dst (VectorLoadShuffle src)); 8517 format %{ "vector_load_shuffle $dst, $src" %} 8518 ins_encode %{ 8519 int vlen_enc = vector_length_encoding(this); 8520 __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8521 %} 8522 ins_pipe( pipe_slow ); 8523 %} 8524 8525 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 8526 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8527 UseAVX >= 2); 8528 match(Set dst (VectorRearrange src shuffle)); 8529 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8530 ins_encode %{ 8531 int vlen_enc = vector_length_encoding(this); 8532 if (vlen_enc == Assembler::AVX_128bit) { 8533 vlen_enc = Assembler::AVX_256bit; 8534 } 8535 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8536 %} 8537 ins_pipe( pipe_slow ); 8538 %} 8539 8540 // LoadShuffle/Rearrange for Long and Double 8541 8542 instruct loadShuffleL(vec dst, vec src, vec vtmp, rRegP scratch) %{ 8543 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8544 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8545 match(Set dst (VectorLoadShuffle src)); 8546 effect(TEMP dst, TEMP vtmp, TEMP scratch); 8547 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 8548 ins_encode %{ 8549 assert(UseAVX >= 2, "required"); 8550 8551 int vlen_enc = vector_length_encoding(this); 8552 // Create a double word shuffle mask from long shuffle mask 8553 // only double word shuffle instruction available on these platforms 8554 8555 // Multiply each shuffle by two to get double word index 8556 __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8557 __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8558 8559 // Duplicate each double word shuffle 8560 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 8561 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8562 8563 // Add one to get alternate double word index 8564 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, $scratch$$Register); 8565 %} 8566 ins_pipe( pipe_slow ); 8567 %} 8568 8569 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 8570 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8571 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8572 match(Set dst (VectorRearrange src shuffle)); 8573 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8574 ins_encode %{ 8575 assert(UseAVX >= 2, "required"); 8576 8577 int vlen_enc = vector_length_encoding(this); 8578 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8579 %} 8580 ins_pipe( pipe_slow ); 8581 %} 8582 8583 instruct loadShuffleL_evex(vec dst, vec src) %{ 8584 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8585 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8586 match(Set dst (VectorLoadShuffle src)); 8587 format %{ "vector_load_shuffle $dst, $src" %} 8588 ins_encode %{ 8589 assert(UseAVX > 2, "required"); 8590 8591 int vlen_enc = vector_length_encoding(this); 8592 __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8593 %} 8594 ins_pipe( pipe_slow ); 8595 %} 8596 8597 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 8598 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8599 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8600 match(Set dst (VectorRearrange src shuffle)); 8601 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8602 ins_encode %{ 8603 assert(UseAVX > 2, "required"); 8604 8605 int vlen_enc = vector_length_encoding(this); 8606 if (vlen_enc == Assembler::AVX_128bit) { 8607 vlen_enc = Assembler::AVX_256bit; 8608 } 8609 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8610 %} 8611 ins_pipe( pipe_slow ); 8612 %} 8613 8614 // --------------------------------- FMA -------------------------------------- 8615 // a * b + c 8616 8617 instruct vfmaF_reg(vec a, vec b, vec c) %{ 8618 match(Set c (FmaVF c (Binary a b))); 8619 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8620 ins_cost(150); 8621 ins_encode %{ 8622 assert(UseFMA, "not enabled"); 8623 int vlen_enc = vector_length_encoding(this); 8624 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8625 %} 8626 ins_pipe( pipe_slow ); 8627 %} 8628 8629 instruct vfmaF_mem(vec a, memory b, vec c) %{ 8630 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8631 match(Set c (FmaVF c (Binary a (LoadVector b)))); 8632 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8633 ins_cost(150); 8634 ins_encode %{ 8635 assert(UseFMA, "not enabled"); 8636 int vlen_enc = vector_length_encoding(this); 8637 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8638 %} 8639 ins_pipe( pipe_slow ); 8640 %} 8641 8642 instruct vfmaD_reg(vec a, vec b, vec c) %{ 8643 match(Set c (FmaVD c (Binary a b))); 8644 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8645 ins_cost(150); 8646 ins_encode %{ 8647 assert(UseFMA, "not enabled"); 8648 int vlen_enc = vector_length_encoding(this); 8649 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8650 %} 8651 ins_pipe( pipe_slow ); 8652 %} 8653 8654 instruct vfmaD_mem(vec a, memory b, vec c) %{ 8655 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8656 match(Set c (FmaVD c (Binary a (LoadVector b)))); 8657 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8658 ins_cost(150); 8659 ins_encode %{ 8660 assert(UseFMA, "not enabled"); 8661 int vlen_enc = vector_length_encoding(this); 8662 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8663 %} 8664 ins_pipe( pipe_slow ); 8665 %} 8666 8667 // --------------------------------- Vector Multiply Add -------------------------------------- 8668 8669 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 8670 predicate(UseAVX == 0); 8671 match(Set dst (MulAddVS2VI dst src1)); 8672 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 8673 ins_encode %{ 8674 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 8675 %} 8676 ins_pipe( pipe_slow ); 8677 %} 8678 8679 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 8680 predicate(UseAVX > 0); 8681 match(Set dst (MulAddVS2VI src1 src2)); 8682 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 8683 ins_encode %{ 8684 int vlen_enc = vector_length_encoding(this); 8685 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8686 %} 8687 ins_pipe( pipe_slow ); 8688 %} 8689 8690 // --------------------------------- Vector Multiply Add Add ---------------------------------- 8691 8692 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 8693 predicate(VM_Version::supports_avx512_vnni()); 8694 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 8695 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 8696 ins_encode %{ 8697 assert(UseAVX > 2, "required"); 8698 int vlen_enc = vector_length_encoding(this); 8699 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8700 %} 8701 ins_pipe( pipe_slow ); 8702 ins_cost(10); 8703 %} 8704 8705 // --------------------------------- PopCount -------------------------------------- 8706 8707 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 8708 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8709 match(Set dst (PopCountVI src)); 8710 match(Set dst (PopCountVL src)); 8711 ins_cost(400); 8712 format %{ "vector_popcount_integral $dst, $src" %} 8713 ins_encode %{ 8714 int opcode = this->ideal_Opcode(); 8715 int vlen_enc = vector_length_encoding(this, $src); 8716 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8717 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 8718 // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL 8719 // should be succeeded by its corresponding vector IR and following 8720 // special handling should be removed. 8721 if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) { 8722 __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8723 } 8724 %} 8725 ins_pipe( pipe_slow ); 8726 %} 8727 8728 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 8729 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8730 match(Set dst (PopCountVI src mask)); 8731 match(Set dst (PopCountVL src mask)); 8732 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 8733 ins_encode %{ 8734 int vlen_enc = vector_length_encoding(this, $src); 8735 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8736 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8737 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 8738 %} 8739 ins_pipe( pipe_slow ); 8740 %} 8741 8742 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 8743 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8744 match(Set dst (PopCountVI src)); 8745 match(Set dst (PopCountVL src)); 8746 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 8747 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 8748 ins_encode %{ 8749 int opcode = this->ideal_Opcode(); 8750 int vlen_enc = vector_length_encoding(this, $src); 8751 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8752 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8753 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 8754 // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL 8755 // should be succeeded by its corresponding vector IR and following 8756 // special handling should be removed. 8757 if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) { 8758 if (VM_Version::supports_avx512vl()) { 8759 __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8760 } else { 8761 assert(VM_Version::supports_avx2(), ""); 8762 __ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 8763 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 8764 } 8765 } 8766 %} 8767 ins_pipe( pipe_slow ); 8768 %} 8769 8770 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 8771 8772 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 8773 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 8774 Matcher::vector_length_in_bytes(n->in(1)))); 8775 match(Set dst (CountTrailingZerosV src)); 8776 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 8777 ins_cost(400); 8778 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 8779 ins_encode %{ 8780 int vlen_enc = vector_length_encoding(this, $src); 8781 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8782 BasicType rbt = Matcher::vector_element_basic_type(this); 8783 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 8784 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 8785 // TODO: Once auto-vectorizer supports ConvL2I operation, CountTrailingZerosV 8786 // should be succeeded by its corresponding vector IR and following 8787 // special handling should be removed. 8788 if (bt == T_LONG && rbt == T_INT) { 8789 __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8790 } 8791 %} 8792 ins_pipe( pipe_slow ); 8793 %} 8794 8795 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 8796 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 8797 VM_Version::supports_avx512cd() && 8798 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 8799 match(Set dst (CountTrailingZerosV src)); 8800 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 8801 ins_cost(400); 8802 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 8803 ins_encode %{ 8804 int vlen_enc = vector_length_encoding(this, $src); 8805 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8806 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8807 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 8808 %} 8809 ins_pipe( pipe_slow ); 8810 %} 8811 8812 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 8813 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 8814 match(Set dst (CountTrailingZerosV src)); 8815 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 8816 ins_cost(400); 8817 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 8818 ins_encode %{ 8819 int vlen_enc = vector_length_encoding(this, $src); 8820 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8821 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8822 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 8823 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 8824 %} 8825 ins_pipe( pipe_slow ); 8826 %} 8827 8828 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 8829 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 8830 match(Set dst (CountTrailingZerosV src)); 8831 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 8832 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 8833 ins_encode %{ 8834 int vlen_enc = vector_length_encoding(this, $src); 8835 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8836 BasicType rbt = Matcher::vector_element_basic_type(this); 8837 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8838 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 8839 // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL 8840 // should be succeeded by its corresponding vector IR and following 8841 // special handling should be removed. 8842 if (bt == T_LONG && rbt == T_INT) { 8843 if (VM_Version::supports_avx512vl()) { 8844 __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8845 } else { 8846 assert(VM_Version::supports_avx2(), ""); 8847 __ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 8848 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 8849 } 8850 } 8851 %} 8852 ins_pipe( pipe_slow ); 8853 %} 8854 8855 8856 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 8857 8858 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 8859 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 8860 effect(TEMP dst); 8861 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8862 ins_encode %{ 8863 int vector_len = vector_length_encoding(this); 8864 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 8865 %} 8866 ins_pipe( pipe_slow ); 8867 %} 8868 8869 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 8870 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 8871 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 8872 effect(TEMP dst); 8873 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8874 ins_encode %{ 8875 int vector_len = vector_length_encoding(this); 8876 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 8877 %} 8878 ins_pipe( pipe_slow ); 8879 %} 8880 8881 // --------------------------------- Rotation Operations ---------------------------------- 8882 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 8883 match(Set dst (RotateLeftV src shift)); 8884 match(Set dst (RotateRightV src shift)); 8885 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 8886 ins_encode %{ 8887 int opcode = this->ideal_Opcode(); 8888 int vector_len = vector_length_encoding(this); 8889 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 8890 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 8891 %} 8892 ins_pipe( pipe_slow ); 8893 %} 8894 8895 instruct vprorate(vec dst, vec src, vec shift) %{ 8896 match(Set dst (RotateLeftV src shift)); 8897 match(Set dst (RotateRightV src shift)); 8898 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 8899 ins_encode %{ 8900 int opcode = this->ideal_Opcode(); 8901 int vector_len = vector_length_encoding(this); 8902 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 8903 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8904 %} 8905 ins_pipe( pipe_slow ); 8906 %} 8907 8908 #ifdef _LP64 8909 // ---------------------------------- Masked Operations ------------------------------------ 8910 8911 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 8912 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 8913 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 8914 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 8915 ins_encode %{ 8916 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 8917 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 8918 8919 Label DONE; 8920 int vlen_enc = vector_length_encoding(this, $src1); 8921 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 8922 8923 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 8924 __ mov64($dst$$Register, -1L); 8925 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 8926 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 8927 __ jccb(Assembler::carrySet, DONE); 8928 __ kmovql($dst$$Register, $ktmp1$$KRegister); 8929 __ notq($dst$$Register); 8930 __ tzcntq($dst$$Register, $dst$$Register); 8931 __ bind(DONE); 8932 %} 8933 ins_pipe( pipe_slow ); 8934 %} 8935 8936 8937 instruct vmasked_load64(vec dst, memory mem, kReg mask) %{ 8938 match(Set dst (LoadVectorMasked mem mask)); 8939 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 8940 ins_encode %{ 8941 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 8942 int vector_len = vector_length_encoding(this); 8943 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, vector_len); 8944 %} 8945 ins_pipe( pipe_slow ); 8946 %} 8947 8948 instruct vmask_gen(kReg dst, rRegL len, rRegL temp) %{ 8949 match(Set dst (VectorMaskGen len)); 8950 effect(TEMP temp); 8951 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 8952 ins_encode %{ 8953 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 8954 %} 8955 ins_pipe( pipe_slow ); 8956 %} 8957 8958 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 8959 match(Set dst (VectorMaskGen len)); 8960 format %{ "vector_mask_gen $len \t! vector mask generator" %} 8961 effect(TEMP temp); 8962 ins_encode %{ 8963 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 8964 __ kmovql($dst$$KRegister, $temp$$Register); 8965 %} 8966 ins_pipe( pipe_slow ); 8967 %} 8968 8969 instruct vmasked_store64(memory mem, vec src, kReg mask) %{ 8970 match(Set mem (StoreVectorMasked mem (Binary src mask))); 8971 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 8972 ins_encode %{ 8973 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 8974 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 8975 int vector_len = vector_length_encoding(src_node); 8976 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, vector_len); 8977 %} 8978 ins_pipe( pipe_slow ); 8979 %} 8980 8981 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 8982 predicate(n->in(1)->bottom_type()->isa_vectmask()); 8983 match(Set dst (VectorMaskToLong mask)); 8984 effect(TEMP dst, KILL cr); 8985 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 8986 ins_encode %{ 8987 int opcode = this->ideal_Opcode(); 8988 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8989 int mask_len = Matcher::vector_length(this, $mask); 8990 int mask_size = mask_len * type2aelembytes(mbt); 8991 int vlen_enc = vector_length_encoding(this, $mask); 8992 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 8993 $dst$$Register, mask_len, mask_size, vlen_enc); 8994 %} 8995 ins_pipe( pipe_slow ); 8996 %} 8997 8998 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 8999 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL); 9000 match(Set dst (VectorMaskToLong mask)); 9001 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9002 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9003 ins_encode %{ 9004 int opcode = this->ideal_Opcode(); 9005 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9006 int mask_len = Matcher::vector_length(this, $mask); 9007 int vlen_enc = vector_length_encoding(this, $mask); 9008 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9009 $dst$$Register, mask_len, mbt, vlen_enc); 9010 %} 9011 ins_pipe( pipe_slow ); 9012 %} 9013 9014 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9015 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL); 9016 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9017 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9018 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9019 ins_encode %{ 9020 int opcode = this->ideal_Opcode(); 9021 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9022 int mask_len = Matcher::vector_length(this, $mask); 9023 int vlen_enc = vector_length_encoding(this, $mask); 9024 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9025 $dst$$Register, mask_len, mbt, vlen_enc); 9026 %} 9027 ins_pipe( pipe_slow ); 9028 %} 9029 9030 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9031 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9032 match(Set dst (VectorMaskTrueCount mask)); 9033 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9034 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9035 ins_encode %{ 9036 int opcode = this->ideal_Opcode(); 9037 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9038 int mask_len = Matcher::vector_length(this, $mask); 9039 int mask_size = mask_len * type2aelembytes(mbt); 9040 int vlen_enc = vector_length_encoding(this, $mask); 9041 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9042 $tmp$$Register, mask_len, mask_size, vlen_enc); 9043 %} 9044 ins_pipe( pipe_slow ); 9045 %} 9046 9047 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9048 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL); 9049 match(Set dst (VectorMaskTrueCount mask)); 9050 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9051 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9052 ins_encode %{ 9053 int opcode = this->ideal_Opcode(); 9054 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9055 int mask_len = Matcher::vector_length(this, $mask); 9056 int vlen_enc = vector_length_encoding(this, $mask); 9057 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9058 $tmp$$Register, mask_len, mbt, vlen_enc); 9059 %} 9060 ins_pipe( pipe_slow ); 9061 %} 9062 9063 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9064 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL); 9065 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9066 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9067 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9068 ins_encode %{ 9069 int opcode = this->ideal_Opcode(); 9070 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9071 int mask_len = Matcher::vector_length(this, $mask); 9072 int vlen_enc = vector_length_encoding(this, $mask); 9073 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9074 $tmp$$Register, mask_len, mbt, vlen_enc); 9075 %} 9076 ins_pipe( pipe_slow ); 9077 %} 9078 9079 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9080 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9081 match(Set dst (VectorMaskFirstTrue mask)); 9082 match(Set dst (VectorMaskLastTrue mask)); 9083 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9084 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9085 ins_encode %{ 9086 int opcode = this->ideal_Opcode(); 9087 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9088 int mask_len = Matcher::vector_length(this, $mask); 9089 int mask_size = mask_len * type2aelembytes(mbt); 9090 int vlen_enc = vector_length_encoding(this, $mask); 9091 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9092 $tmp$$Register, mask_len, mask_size, vlen_enc); 9093 %} 9094 ins_pipe( pipe_slow ); 9095 %} 9096 9097 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9098 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL); 9099 match(Set dst (VectorMaskFirstTrue mask)); 9100 match(Set dst (VectorMaskLastTrue mask)); 9101 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9102 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9103 ins_encode %{ 9104 int opcode = this->ideal_Opcode(); 9105 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9106 int mask_len = Matcher::vector_length(this, $mask); 9107 int vlen_enc = vector_length_encoding(this, $mask); 9108 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9109 $tmp$$Register, mask_len, mbt, vlen_enc); 9110 %} 9111 ins_pipe( pipe_slow ); 9112 %} 9113 9114 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9115 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL); 9116 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9117 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9118 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9119 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9120 ins_encode %{ 9121 int opcode = this->ideal_Opcode(); 9122 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9123 int mask_len = Matcher::vector_length(this, $mask); 9124 int vlen_enc = vector_length_encoding(this, $mask); 9125 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9126 $tmp$$Register, mask_len, mbt, vlen_enc); 9127 %} 9128 ins_pipe( pipe_slow ); 9129 %} 9130 9131 // --------------------------------- Compress/Expand Operations --------------------------- 9132 9133 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9134 match(Set dst (CompressV src mask)); 9135 match(Set dst (ExpandV src mask)); 9136 format %{ "vector_compress_expand $dst, $src, $mask" %} 9137 ins_encode %{ 9138 int opcode = this->ideal_Opcode(); 9139 int vector_len = vector_length_encoding(this); 9140 BasicType bt = Matcher::vector_element_basic_type(this); 9141 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9142 %} 9143 ins_pipe( pipe_slow ); 9144 %} 9145 9146 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9147 match(Set dst (CompressM mask)); 9148 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9149 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9150 ins_encode %{ 9151 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9152 int mask_len = Matcher::vector_length(this); 9153 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9154 %} 9155 ins_pipe( pipe_slow ); 9156 %} 9157 9158 #endif // _LP64 9159 9160 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9161 9162 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9163 predicate(!VM_Version::supports_gfni()); 9164 match(Set dst (ReverseV src)); 9165 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9166 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9167 ins_encode %{ 9168 int vec_enc = vector_length_encoding(this); 9169 BasicType bt = Matcher::vector_element_basic_type(this); 9170 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9171 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9172 %} 9173 ins_pipe( pipe_slow ); 9174 %} 9175 9176 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp, rRegI rtmp) %{ 9177 predicate(VM_Version::supports_gfni()); 9178 match(Set dst (ReverseV src)); 9179 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 9180 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $rtmp and $xtmp as TEMP" %} 9181 ins_encode %{ 9182 int vec_enc = vector_length_encoding(this); 9183 BasicType bt = Matcher::vector_element_basic_type(this); 9184 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1)); 9185 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 9186 addr, $rtmp$$Register, vec_enc); 9187 %} 9188 ins_pipe( pipe_slow ); 9189 %} 9190 9191 instruct vreverse_byte_reg(vec dst, vec src, rRegI rtmp) %{ 9192 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9193 match(Set dst (ReverseBytesV src)); 9194 effect(TEMP dst, TEMP rtmp); 9195 format %{ "vector_reverse_byte $dst, $src!\t using $rtmp as TEMP" %} 9196 ins_encode %{ 9197 int vec_enc = vector_length_encoding(this); 9198 BasicType bt = Matcher::vector_element_basic_type(this); 9199 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, $rtmp$$Register, vec_enc); 9200 %} 9201 ins_pipe( pipe_slow ); 9202 %} 9203 9204 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9205 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9206 match(Set dst (ReverseBytesV src)); 9207 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9208 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9209 ins_encode %{ 9210 int vec_enc = vector_length_encoding(this); 9211 BasicType bt = Matcher::vector_element_basic_type(this); 9212 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9213 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9214 %} 9215 ins_pipe( pipe_slow ); 9216 %} 9217 9218 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9219 9220 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9221 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9222 Matcher::vector_length_in_bytes(n->in(1)))); 9223 match(Set dst (CountLeadingZerosV src)); 9224 format %{ "vector_count_leading_zeros $dst, $src" %} 9225 ins_encode %{ 9226 int vlen_enc = vector_length_encoding(this, $src); 9227 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9228 BasicType rbt = Matcher::vector_element_basic_type(this); 9229 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9230 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9231 // TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV 9232 // should be succeeded by its corresponding vector IR and following 9233 // special handling should be removed. 9234 if (rbt == T_INT && bt == T_LONG) { 9235 __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 9236 } 9237 %} 9238 ins_pipe( pipe_slow ); 9239 %} 9240 9241 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9242 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9243 Matcher::vector_length_in_bytes(n->in(1)))); 9244 match(Set dst (CountLeadingZerosV src mask)); 9245 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9246 ins_encode %{ 9247 int vlen_enc = vector_length_encoding(this, $src); 9248 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9249 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9250 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9251 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9252 %} 9253 ins_pipe( pipe_slow ); 9254 %} 9255 9256 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9257 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9258 VM_Version::supports_avx512cd() && 9259 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9260 match(Set dst (CountLeadingZerosV src)); 9261 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9262 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9263 ins_encode %{ 9264 int vlen_enc = vector_length_encoding(this, $src); 9265 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9266 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9267 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9268 %} 9269 ins_pipe( pipe_slow ); 9270 %} 9271 9272 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9273 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9274 match(Set dst (CountLeadingZerosV src)); 9275 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9276 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9277 ins_encode %{ 9278 int vlen_enc = vector_length_encoding(this, $src); 9279 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9280 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9281 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9282 $rtmp$$Register, true, vlen_enc); 9283 %} 9284 ins_pipe( pipe_slow ); 9285 %} 9286 9287 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9288 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9289 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9290 match(Set dst (CountLeadingZerosV src)); 9291 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9292 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9293 ins_encode %{ 9294 int vlen_enc = vector_length_encoding(this, $src); 9295 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9296 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9297 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9298 %} 9299 ins_pipe( pipe_slow ); 9300 %} 9301 9302 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9303 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9304 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9305 match(Set dst (CountLeadingZerosV src)); 9306 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9307 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9308 ins_encode %{ 9309 int vlen_enc = vector_length_encoding(this, $src); 9310 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9311 BasicType rbt = Matcher::vector_element_basic_type(this); 9312 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9313 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9314 // TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV 9315 // should be succeeded by its corresponding vector IR and following 9316 // special handling should be removed. 9317 if (rbt == T_INT && bt == T_LONG) { 9318 __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 9319 } 9320 %} 9321 ins_pipe( pipe_slow ); 9322 %} 9323 9324 // ---------------------------------- Vector Masked Operations ------------------------------------ 9325 9326 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9327 match(Set dst (AddVB (Binary dst src2) mask)); 9328 match(Set dst (AddVS (Binary dst src2) mask)); 9329 match(Set dst (AddVI (Binary dst src2) mask)); 9330 match(Set dst (AddVL (Binary dst src2) mask)); 9331 match(Set dst (AddVF (Binary dst src2) mask)); 9332 match(Set dst (AddVD (Binary dst src2) mask)); 9333 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9334 ins_encode %{ 9335 int vlen_enc = vector_length_encoding(this); 9336 BasicType bt = Matcher::vector_element_basic_type(this); 9337 int opc = this->ideal_Opcode(); 9338 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9339 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9340 %} 9341 ins_pipe( pipe_slow ); 9342 %} 9343 9344 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9345 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9346 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9347 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9348 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9349 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9350 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9351 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9352 ins_encode %{ 9353 int vlen_enc = vector_length_encoding(this); 9354 BasicType bt = Matcher::vector_element_basic_type(this); 9355 int opc = this->ideal_Opcode(); 9356 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9357 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9358 %} 9359 ins_pipe( pipe_slow ); 9360 %} 9361 9362 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9363 match(Set dst (XorV (Binary dst src2) mask)); 9364 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9365 ins_encode %{ 9366 int vlen_enc = vector_length_encoding(this); 9367 BasicType bt = Matcher::vector_element_basic_type(this); 9368 int opc = this->ideal_Opcode(); 9369 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9370 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9371 %} 9372 ins_pipe( pipe_slow ); 9373 %} 9374 9375 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9376 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9377 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9378 ins_encode %{ 9379 int vlen_enc = vector_length_encoding(this); 9380 BasicType bt = Matcher::vector_element_basic_type(this); 9381 int opc = this->ideal_Opcode(); 9382 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9383 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9384 %} 9385 ins_pipe( pipe_slow ); 9386 %} 9387 9388 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9389 match(Set dst (OrV (Binary dst src2) mask)); 9390 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9391 ins_encode %{ 9392 int vlen_enc = vector_length_encoding(this); 9393 BasicType bt = Matcher::vector_element_basic_type(this); 9394 int opc = this->ideal_Opcode(); 9395 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9396 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9397 %} 9398 ins_pipe( pipe_slow ); 9399 %} 9400 9401 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9402 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9403 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9404 ins_encode %{ 9405 int vlen_enc = vector_length_encoding(this); 9406 BasicType bt = Matcher::vector_element_basic_type(this); 9407 int opc = this->ideal_Opcode(); 9408 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9409 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9410 %} 9411 ins_pipe( pipe_slow ); 9412 %} 9413 9414 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9415 match(Set dst (AndV (Binary dst src2) mask)); 9416 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9417 ins_encode %{ 9418 int vlen_enc = vector_length_encoding(this); 9419 BasicType bt = Matcher::vector_element_basic_type(this); 9420 int opc = this->ideal_Opcode(); 9421 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9422 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9423 %} 9424 ins_pipe( pipe_slow ); 9425 %} 9426 9427 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9428 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9429 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9430 ins_encode %{ 9431 int vlen_enc = vector_length_encoding(this); 9432 BasicType bt = Matcher::vector_element_basic_type(this); 9433 int opc = this->ideal_Opcode(); 9434 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9435 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9436 %} 9437 ins_pipe( pipe_slow ); 9438 %} 9439 9440 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9441 match(Set dst (SubVB (Binary dst src2) mask)); 9442 match(Set dst (SubVS (Binary dst src2) mask)); 9443 match(Set dst (SubVI (Binary dst src2) mask)); 9444 match(Set dst (SubVL (Binary dst src2) mask)); 9445 match(Set dst (SubVF (Binary dst src2) mask)); 9446 match(Set dst (SubVD (Binary dst src2) mask)); 9447 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9448 ins_encode %{ 9449 int vlen_enc = vector_length_encoding(this); 9450 BasicType bt = Matcher::vector_element_basic_type(this); 9451 int opc = this->ideal_Opcode(); 9452 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9453 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9454 %} 9455 ins_pipe( pipe_slow ); 9456 %} 9457 9458 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9459 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9460 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9461 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9462 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9463 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9464 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9465 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9466 ins_encode %{ 9467 int vlen_enc = vector_length_encoding(this); 9468 BasicType bt = Matcher::vector_element_basic_type(this); 9469 int opc = this->ideal_Opcode(); 9470 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9471 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9472 %} 9473 ins_pipe( pipe_slow ); 9474 %} 9475 9476 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9477 match(Set dst (MulVS (Binary dst src2) mask)); 9478 match(Set dst (MulVI (Binary dst src2) mask)); 9479 match(Set dst (MulVL (Binary dst src2) mask)); 9480 match(Set dst (MulVF (Binary dst src2) mask)); 9481 match(Set dst (MulVD (Binary dst src2) mask)); 9482 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9483 ins_encode %{ 9484 int vlen_enc = vector_length_encoding(this); 9485 BasicType bt = Matcher::vector_element_basic_type(this); 9486 int opc = this->ideal_Opcode(); 9487 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9488 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9489 %} 9490 ins_pipe( pipe_slow ); 9491 %} 9492 9493 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 9494 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 9495 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 9496 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 9497 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 9498 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 9499 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9500 ins_encode %{ 9501 int vlen_enc = vector_length_encoding(this); 9502 BasicType bt = Matcher::vector_element_basic_type(this); 9503 int opc = this->ideal_Opcode(); 9504 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9505 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9506 %} 9507 ins_pipe( pipe_slow ); 9508 %} 9509 9510 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 9511 match(Set dst (SqrtVF dst mask)); 9512 match(Set dst (SqrtVD dst mask)); 9513 ins_cost(100); 9514 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 9515 ins_encode %{ 9516 int vlen_enc = vector_length_encoding(this); 9517 BasicType bt = Matcher::vector_element_basic_type(this); 9518 int opc = this->ideal_Opcode(); 9519 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9520 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9521 %} 9522 ins_pipe( pipe_slow ); 9523 %} 9524 9525 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 9526 match(Set dst (DivVF (Binary dst src2) mask)); 9527 match(Set dst (DivVD (Binary dst src2) mask)); 9528 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9529 ins_encode %{ 9530 int vlen_enc = vector_length_encoding(this); 9531 BasicType bt = Matcher::vector_element_basic_type(this); 9532 int opc = this->ideal_Opcode(); 9533 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9534 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9535 %} 9536 ins_pipe( pipe_slow ); 9537 %} 9538 9539 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 9540 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 9541 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 9542 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9543 ins_encode %{ 9544 int vlen_enc = vector_length_encoding(this); 9545 BasicType bt = Matcher::vector_element_basic_type(this); 9546 int opc = this->ideal_Opcode(); 9547 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9548 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9549 %} 9550 ins_pipe( pipe_slow ); 9551 %} 9552 9553 9554 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9555 match(Set dst (RotateLeftV (Binary dst shift) mask)); 9556 match(Set dst (RotateRightV (Binary dst shift) mask)); 9557 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 9558 ins_encode %{ 9559 int vlen_enc = vector_length_encoding(this); 9560 BasicType bt = Matcher::vector_element_basic_type(this); 9561 int opc = this->ideal_Opcode(); 9562 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9563 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9564 %} 9565 ins_pipe( pipe_slow ); 9566 %} 9567 9568 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 9569 match(Set dst (RotateLeftV (Binary dst src2) mask)); 9570 match(Set dst (RotateRightV (Binary dst src2) mask)); 9571 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 9572 ins_encode %{ 9573 int vlen_enc = vector_length_encoding(this); 9574 BasicType bt = Matcher::vector_element_basic_type(this); 9575 int opc = this->ideal_Opcode(); 9576 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9577 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9578 %} 9579 ins_pipe( pipe_slow ); 9580 %} 9581 9582 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9583 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 9584 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 9585 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 9586 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 9587 ins_encode %{ 9588 int vlen_enc = vector_length_encoding(this); 9589 BasicType bt = Matcher::vector_element_basic_type(this); 9590 int opc = this->ideal_Opcode(); 9591 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9592 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9593 %} 9594 ins_pipe( pipe_slow ); 9595 %} 9596 9597 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9598 predicate(!n->as_ShiftV()->is_var_shift()); 9599 match(Set dst (LShiftVS (Binary dst src2) mask)); 9600 match(Set dst (LShiftVI (Binary dst src2) mask)); 9601 match(Set dst (LShiftVL (Binary dst src2) mask)); 9602 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9603 ins_encode %{ 9604 int vlen_enc = vector_length_encoding(this); 9605 BasicType bt = Matcher::vector_element_basic_type(this); 9606 int opc = this->ideal_Opcode(); 9607 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9608 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9609 %} 9610 ins_pipe( pipe_slow ); 9611 %} 9612 9613 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9614 predicate(n->as_ShiftV()->is_var_shift()); 9615 match(Set dst (LShiftVS (Binary dst src2) mask)); 9616 match(Set dst (LShiftVI (Binary dst src2) mask)); 9617 match(Set dst (LShiftVL (Binary dst src2) mask)); 9618 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9619 ins_encode %{ 9620 int vlen_enc = vector_length_encoding(this); 9621 BasicType bt = Matcher::vector_element_basic_type(this); 9622 int opc = this->ideal_Opcode(); 9623 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9624 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9625 %} 9626 ins_pipe( pipe_slow ); 9627 %} 9628 9629 instruct vlshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9630 match(Set dst (LShiftVS (Binary dst (LoadVector src2)) mask)); 9631 match(Set dst (LShiftVI (Binary dst (LoadVector src2)) mask)); 9632 match(Set dst (LShiftVL (Binary dst (LoadVector src2)) mask)); 9633 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9634 ins_encode %{ 9635 int vlen_enc = vector_length_encoding(this); 9636 BasicType bt = Matcher::vector_element_basic_type(this); 9637 int opc = this->ideal_Opcode(); 9638 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9639 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9640 %} 9641 ins_pipe( pipe_slow ); 9642 %} 9643 9644 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9645 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 9646 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 9647 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 9648 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 9649 ins_encode %{ 9650 int vlen_enc = vector_length_encoding(this); 9651 BasicType bt = Matcher::vector_element_basic_type(this); 9652 int opc = this->ideal_Opcode(); 9653 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9654 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9655 %} 9656 ins_pipe( pipe_slow ); 9657 %} 9658 9659 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9660 predicate(!n->as_ShiftV()->is_var_shift()); 9661 match(Set dst (RShiftVS (Binary dst src2) mask)); 9662 match(Set dst (RShiftVI (Binary dst src2) mask)); 9663 match(Set dst (RShiftVL (Binary dst src2) mask)); 9664 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9665 ins_encode %{ 9666 int vlen_enc = vector_length_encoding(this); 9667 BasicType bt = Matcher::vector_element_basic_type(this); 9668 int opc = this->ideal_Opcode(); 9669 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9670 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9671 %} 9672 ins_pipe( pipe_slow ); 9673 %} 9674 9675 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9676 predicate(n->as_ShiftV()->is_var_shift()); 9677 match(Set dst (RShiftVS (Binary dst src2) mask)); 9678 match(Set dst (RShiftVI (Binary dst src2) mask)); 9679 match(Set dst (RShiftVL (Binary dst src2) mask)); 9680 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9681 ins_encode %{ 9682 int vlen_enc = vector_length_encoding(this); 9683 BasicType bt = Matcher::vector_element_basic_type(this); 9684 int opc = this->ideal_Opcode(); 9685 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9686 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9687 %} 9688 ins_pipe( pipe_slow ); 9689 %} 9690 9691 instruct vrshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9692 match(Set dst (RShiftVS (Binary dst (LoadVector src2)) mask)); 9693 match(Set dst (RShiftVI (Binary dst (LoadVector src2)) mask)); 9694 match(Set dst (RShiftVL (Binary dst (LoadVector src2)) mask)); 9695 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9696 ins_encode %{ 9697 int vlen_enc = vector_length_encoding(this); 9698 BasicType bt = Matcher::vector_element_basic_type(this); 9699 int opc = this->ideal_Opcode(); 9700 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9701 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9702 %} 9703 ins_pipe( pipe_slow ); 9704 %} 9705 9706 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9707 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 9708 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 9709 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 9710 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 9711 ins_encode %{ 9712 int vlen_enc = vector_length_encoding(this); 9713 BasicType bt = Matcher::vector_element_basic_type(this); 9714 int opc = this->ideal_Opcode(); 9715 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9716 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9717 %} 9718 ins_pipe( pipe_slow ); 9719 %} 9720 9721 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9722 predicate(!n->as_ShiftV()->is_var_shift()); 9723 match(Set dst (URShiftVS (Binary dst src2) mask)); 9724 match(Set dst (URShiftVI (Binary dst src2) mask)); 9725 match(Set dst (URShiftVL (Binary dst src2) mask)); 9726 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9727 ins_encode %{ 9728 int vlen_enc = vector_length_encoding(this); 9729 BasicType bt = Matcher::vector_element_basic_type(this); 9730 int opc = this->ideal_Opcode(); 9731 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9732 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9733 %} 9734 ins_pipe( pipe_slow ); 9735 %} 9736 9737 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9738 predicate(n->as_ShiftV()->is_var_shift()); 9739 match(Set dst (URShiftVS (Binary dst src2) mask)); 9740 match(Set dst (URShiftVI (Binary dst src2) mask)); 9741 match(Set dst (URShiftVL (Binary dst src2) mask)); 9742 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9743 ins_encode %{ 9744 int vlen_enc = vector_length_encoding(this); 9745 BasicType bt = Matcher::vector_element_basic_type(this); 9746 int opc = this->ideal_Opcode(); 9747 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9748 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9749 %} 9750 ins_pipe( pipe_slow ); 9751 %} 9752 9753 instruct vurshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9754 match(Set dst (URShiftVS (Binary dst (LoadVector src2)) mask)); 9755 match(Set dst (URShiftVI (Binary dst (LoadVector src2)) mask)); 9756 match(Set dst (URShiftVL (Binary dst (LoadVector src2)) mask)); 9757 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9758 ins_encode %{ 9759 int vlen_enc = vector_length_encoding(this); 9760 BasicType bt = Matcher::vector_element_basic_type(this); 9761 int opc = this->ideal_Opcode(); 9762 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9763 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9764 %} 9765 ins_pipe( pipe_slow ); 9766 %} 9767 9768 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 9769 match(Set dst (MaxV (Binary dst src2) mask)); 9770 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 9771 ins_encode %{ 9772 int vlen_enc = vector_length_encoding(this); 9773 BasicType bt = Matcher::vector_element_basic_type(this); 9774 int opc = this->ideal_Opcode(); 9775 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9776 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9777 %} 9778 ins_pipe( pipe_slow ); 9779 %} 9780 9781 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 9782 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 9783 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 9784 ins_encode %{ 9785 int vlen_enc = vector_length_encoding(this); 9786 BasicType bt = Matcher::vector_element_basic_type(this); 9787 int opc = this->ideal_Opcode(); 9788 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9789 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9790 %} 9791 ins_pipe( pipe_slow ); 9792 %} 9793 9794 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 9795 match(Set dst (MinV (Binary dst src2) mask)); 9796 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 9797 ins_encode %{ 9798 int vlen_enc = vector_length_encoding(this); 9799 BasicType bt = Matcher::vector_element_basic_type(this); 9800 int opc = this->ideal_Opcode(); 9801 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9802 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9803 %} 9804 ins_pipe( pipe_slow ); 9805 %} 9806 9807 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 9808 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 9809 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 9810 ins_encode %{ 9811 int vlen_enc = vector_length_encoding(this); 9812 BasicType bt = Matcher::vector_element_basic_type(this); 9813 int opc = this->ideal_Opcode(); 9814 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9815 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9816 %} 9817 ins_pipe( pipe_slow ); 9818 %} 9819 9820 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 9821 match(Set dst (VectorRearrange (Binary dst src2) mask)); 9822 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 9823 ins_encode %{ 9824 int vlen_enc = vector_length_encoding(this); 9825 BasicType bt = Matcher::vector_element_basic_type(this); 9826 int opc = this->ideal_Opcode(); 9827 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9828 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 9829 %} 9830 ins_pipe( pipe_slow ); 9831 %} 9832 9833 instruct vabs_masked(vec dst, kReg mask) %{ 9834 match(Set dst (AbsVB dst mask)); 9835 match(Set dst (AbsVS dst mask)); 9836 match(Set dst (AbsVI dst mask)); 9837 match(Set dst (AbsVL dst mask)); 9838 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 9839 ins_cost(100); 9840 ins_encode %{ 9841 int vlen_enc = vector_length_encoding(this); 9842 BasicType bt = Matcher::vector_element_basic_type(this); 9843 int opc = this->ideal_Opcode(); 9844 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9845 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9846 %} 9847 ins_pipe( pipe_slow ); 9848 %} 9849 9850 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 9851 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 9852 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 9853 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 9854 ins_encode %{ 9855 int vlen_enc = vector_length_encoding(this); 9856 BasicType bt = Matcher::vector_element_basic_type(this); 9857 int opc = this->ideal_Opcode(); 9858 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9859 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 9860 %} 9861 ins_pipe( pipe_slow ); 9862 %} 9863 9864 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 9865 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 9866 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 9867 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 9868 ins_encode %{ 9869 int vlen_enc = vector_length_encoding(this); 9870 BasicType bt = Matcher::vector_element_basic_type(this); 9871 int opc = this->ideal_Opcode(); 9872 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9873 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 9874 %} 9875 ins_pipe( pipe_slow ); 9876 %} 9877 9878 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask, rRegP scratch) %{ 9879 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 9880 effect(TEMP scratch); 9881 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask\t! using $scratch as TEMP" %} 9882 ins_encode %{ 9883 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 9884 int vlen_enc = vector_length_encoding(this, $src1); 9885 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 9886 9887 // Comparison i 9888 switch (src1_elem_bt) { 9889 case T_BYTE: { 9890 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 9891 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9892 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9893 break; 9894 } 9895 case T_SHORT: { 9896 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 9897 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9898 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9899 break; 9900 } 9901 case T_INT: { 9902 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 9903 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9904 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9905 break; 9906 } 9907 case T_LONG: { 9908 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 9909 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9910 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9911 break; 9912 } 9913 case T_FLOAT: { 9914 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 9915 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 9916 break; 9917 } 9918 case T_DOUBLE: { 9919 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 9920 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 9921 break; 9922 } 9923 default: assert(false, "%s", type2name(src1_elem_bt)); break; 9924 } 9925 %} 9926 ins_pipe( pipe_slow ); 9927 %} 9928 9929 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 9930 predicate(Matcher::vector_length(n) <= 32); 9931 match(Set dst (MaskAll src)); 9932 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 9933 ins_encode %{ 9934 int mask_len = Matcher::vector_length(this); 9935 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 9936 %} 9937 ins_pipe( pipe_slow ); 9938 %} 9939 9940 #ifdef _LP64 9941 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 9942 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 9943 match(Set dst (XorVMask src (MaskAll cnt))); 9944 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 9945 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 9946 ins_encode %{ 9947 uint masklen = Matcher::vector_length(this); 9948 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 9949 %} 9950 ins_pipe( pipe_slow ); 9951 %} 9952 9953 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 9954 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 9955 (Matcher::vector_length(n) == 16) || 9956 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 9957 match(Set dst (XorVMask src (MaskAll cnt))); 9958 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 9959 ins_encode %{ 9960 uint masklen = Matcher::vector_length(this); 9961 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 9962 %} 9963 ins_pipe( pipe_slow ); 9964 %} 9965 9966 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 9967 predicate(n->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length(n) <= 8); 9968 match(Set dst (VectorLongToMask src)); 9969 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 9970 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 9971 ins_encode %{ 9972 int mask_len = Matcher::vector_length(this); 9973 int vec_enc = vector_length_encoding(mask_len); 9974 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 9975 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 9976 %} 9977 ins_pipe( pipe_slow ); 9978 %} 9979 9980 9981 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 9982 predicate(n->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length(n) > 8); 9983 match(Set dst (VectorLongToMask src)); 9984 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 9985 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 9986 ins_encode %{ 9987 int mask_len = Matcher::vector_length(this); 9988 assert(mask_len <= 32, "invalid mask length"); 9989 int vec_enc = vector_length_encoding(mask_len); 9990 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 9991 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 9992 %} 9993 ins_pipe( pipe_slow ); 9994 %} 9995 9996 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 9997 predicate(n->bottom_type()->isa_vectmask()); 9998 match(Set dst (VectorLongToMask src)); 9999 format %{ "long_to_mask_evex $dst, $src\t!" %} 10000 ins_encode %{ 10001 __ kmov($dst$$KRegister, $src$$Register); 10002 %} 10003 ins_pipe( pipe_slow ); 10004 %} 10005 #endif 10006 10007 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10008 match(Set dst (AndVMask src1 src2)); 10009 match(Set dst (OrVMask src1 src2)); 10010 match(Set dst (XorVMask src1 src2)); 10011 effect(TEMP kscratch); 10012 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10013 ins_encode %{ 10014 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10015 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10016 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 10017 uint masklen = Matcher::vector_length(this); 10018 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10019 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10020 %} 10021 ins_pipe( pipe_slow ); 10022 %} 10023 10024 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10025 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10026 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10027 ins_encode %{ 10028 int vlen_enc = vector_length_encoding(this); 10029 BasicType bt = Matcher::vector_element_basic_type(this); 10030 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10031 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10032 %} 10033 ins_pipe( pipe_slow ); 10034 %} 10035 10036 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10037 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10038 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10039 ins_encode %{ 10040 int vlen_enc = vector_length_encoding(this); 10041 BasicType bt = Matcher::vector_element_basic_type(this); 10042 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10043 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10044 %} 10045 ins_pipe( pipe_slow ); 10046 %} 10047 10048 instruct castMM(kReg dst) 10049 %{ 10050 match(Set dst (CastVV dst)); 10051 10052 size(0); 10053 format %{ "# castVV of $dst" %} 10054 ins_encode(/* empty encoding */); 10055 ins_cost(0); 10056 ins_pipe(empty); 10057 %} 10058 10059 instruct castVV(vec dst) 10060 %{ 10061 match(Set dst (CastVV dst)); 10062 10063 size(0); 10064 format %{ "# castVV of $dst" %} 10065 ins_encode(/* empty encoding */); 10066 ins_cost(0); 10067 ins_pipe(empty); 10068 %} 10069 10070 instruct castVVLeg(legVec dst) 10071 %{ 10072 match(Set dst (CastVV dst)); 10073 10074 size(0); 10075 format %{ "# castVV of $dst" %} 10076 ins_encode(/* empty encoding */); 10077 ins_cost(0); 10078 ins_pipe(empty); 10079 %}