1 // 2 // Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // archtecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 // AVX3 Mask Registers. 632 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 633 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 634 635 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 636 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 637 638 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 639 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 640 641 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 642 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 643 644 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 645 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 646 647 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 648 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 649 650 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 651 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 652 653 654 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 655 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 656 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 657 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 658 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 659 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 660 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 661 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 662 #ifdef _LP64 663 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 664 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 665 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 666 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 667 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 668 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 669 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 670 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 671 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 672 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 673 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 674 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 675 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 676 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 677 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 678 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 679 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 680 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 681 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 682 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 683 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 684 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 685 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 686 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 687 #endif 688 ); 689 690 alloc_class chunk2(K7, K7_H, 691 K6, K6_H, 692 K5, K5_H, 693 K4, K4_H, 694 K3, K3_H, 695 K2, K2_H, 696 K1, K1_H); 697 698 reg_class vectmask_reg(K1, K1_H, 699 K2, K2_H, 700 K3, K3_H, 701 K4, K4_H, 702 K5, K5_H, 703 K6, K6_H, 704 K7, K7_H); 705 706 reg_class vectmask_reg_K1(K1, K1_H); 707 reg_class vectmask_reg_K2(K2, K2_H); 708 reg_class vectmask_reg_K3(K3, K3_H); 709 reg_class vectmask_reg_K4(K4, K4_H); 710 reg_class vectmask_reg_K5(K5, K5_H); 711 reg_class vectmask_reg_K6(K6, K6_H); 712 reg_class vectmask_reg_K7(K7, K7_H); 713 714 // flags allocation class should be last. 715 alloc_class chunk3(RFLAGS); 716 717 718 // Singleton class for condition codes 719 reg_class int_flags(RFLAGS); 720 721 // Class for pre evex float registers 722 reg_class float_reg_legacy(XMM0, 723 XMM1, 724 XMM2, 725 XMM3, 726 XMM4, 727 XMM5, 728 XMM6, 729 XMM7 730 #ifdef _LP64 731 ,XMM8, 732 XMM9, 733 XMM10, 734 XMM11, 735 XMM12, 736 XMM13, 737 XMM14, 738 XMM15 739 #endif 740 ); 741 742 // Class for evex float registers 743 reg_class float_reg_evex(XMM0, 744 XMM1, 745 XMM2, 746 XMM3, 747 XMM4, 748 XMM5, 749 XMM6, 750 XMM7 751 #ifdef _LP64 752 ,XMM8, 753 XMM9, 754 XMM10, 755 XMM11, 756 XMM12, 757 XMM13, 758 XMM14, 759 XMM15, 760 XMM16, 761 XMM17, 762 XMM18, 763 XMM19, 764 XMM20, 765 XMM21, 766 XMM22, 767 XMM23, 768 XMM24, 769 XMM25, 770 XMM26, 771 XMM27, 772 XMM28, 773 XMM29, 774 XMM30, 775 XMM31 776 #endif 777 ); 778 779 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 780 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 781 782 // Class for pre evex double registers 783 reg_class double_reg_legacy(XMM0, XMM0b, 784 XMM1, XMM1b, 785 XMM2, XMM2b, 786 XMM3, XMM3b, 787 XMM4, XMM4b, 788 XMM5, XMM5b, 789 XMM6, XMM6b, 790 XMM7, XMM7b 791 #ifdef _LP64 792 ,XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b 800 #endif 801 ); 802 803 // Class for evex double registers 804 reg_class double_reg_evex(XMM0, XMM0b, 805 XMM1, XMM1b, 806 XMM2, XMM2b, 807 XMM3, XMM3b, 808 XMM4, XMM4b, 809 XMM5, XMM5b, 810 XMM6, XMM6b, 811 XMM7, XMM7b 812 #ifdef _LP64 813 ,XMM8, XMM8b, 814 XMM9, XMM9b, 815 XMM10, XMM10b, 816 XMM11, XMM11b, 817 XMM12, XMM12b, 818 XMM13, XMM13b, 819 XMM14, XMM14b, 820 XMM15, XMM15b, 821 XMM16, XMM16b, 822 XMM17, XMM17b, 823 XMM18, XMM18b, 824 XMM19, XMM19b, 825 XMM20, XMM20b, 826 XMM21, XMM21b, 827 XMM22, XMM22b, 828 XMM23, XMM23b, 829 XMM24, XMM24b, 830 XMM25, XMM25b, 831 XMM26, XMM26b, 832 XMM27, XMM27b, 833 XMM28, XMM28b, 834 XMM29, XMM29b, 835 XMM30, XMM30b, 836 XMM31, XMM31b 837 #endif 838 ); 839 840 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 841 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 842 843 // Class for pre evex 32bit vector registers 844 reg_class vectors_reg_legacy(XMM0, 845 XMM1, 846 XMM2, 847 XMM3, 848 XMM4, 849 XMM5, 850 XMM6, 851 XMM7 852 #ifdef _LP64 853 ,XMM8, 854 XMM9, 855 XMM10, 856 XMM11, 857 XMM12, 858 XMM13, 859 XMM14, 860 XMM15 861 #endif 862 ); 863 864 // Class for evex 32bit vector registers 865 reg_class vectors_reg_evex(XMM0, 866 XMM1, 867 XMM2, 868 XMM3, 869 XMM4, 870 XMM5, 871 XMM6, 872 XMM7 873 #ifdef _LP64 874 ,XMM8, 875 XMM9, 876 XMM10, 877 XMM11, 878 XMM12, 879 XMM13, 880 XMM14, 881 XMM15, 882 XMM16, 883 XMM17, 884 XMM18, 885 XMM19, 886 XMM20, 887 XMM21, 888 XMM22, 889 XMM23, 890 XMM24, 891 XMM25, 892 XMM26, 893 XMM27, 894 XMM28, 895 XMM29, 896 XMM30, 897 XMM31 898 #endif 899 ); 900 901 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 902 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 903 904 // Class for all 64bit vector registers 905 reg_class vectord_reg_legacy(XMM0, XMM0b, 906 XMM1, XMM1b, 907 XMM2, XMM2b, 908 XMM3, XMM3b, 909 XMM4, XMM4b, 910 XMM5, XMM5b, 911 XMM6, XMM6b, 912 XMM7, XMM7b 913 #ifdef _LP64 914 ,XMM8, XMM8b, 915 XMM9, XMM9b, 916 XMM10, XMM10b, 917 XMM11, XMM11b, 918 XMM12, XMM12b, 919 XMM13, XMM13b, 920 XMM14, XMM14b, 921 XMM15, XMM15b 922 #endif 923 ); 924 925 // Class for all 64bit vector registers 926 reg_class vectord_reg_evex(XMM0, XMM0b, 927 XMM1, XMM1b, 928 XMM2, XMM2b, 929 XMM3, XMM3b, 930 XMM4, XMM4b, 931 XMM5, XMM5b, 932 XMM6, XMM6b, 933 XMM7, XMM7b 934 #ifdef _LP64 935 ,XMM8, XMM8b, 936 XMM9, XMM9b, 937 XMM10, XMM10b, 938 XMM11, XMM11b, 939 XMM12, XMM12b, 940 XMM13, XMM13b, 941 XMM14, XMM14b, 942 XMM15, XMM15b, 943 XMM16, XMM16b, 944 XMM17, XMM17b, 945 XMM18, XMM18b, 946 XMM19, XMM19b, 947 XMM20, XMM20b, 948 XMM21, XMM21b, 949 XMM22, XMM22b, 950 XMM23, XMM23b, 951 XMM24, XMM24b, 952 XMM25, XMM25b, 953 XMM26, XMM26b, 954 XMM27, XMM27b, 955 XMM28, XMM28b, 956 XMM29, XMM29b, 957 XMM30, XMM30b, 958 XMM31, XMM31b 959 #endif 960 ); 961 962 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 963 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 964 965 // Class for all 128bit vector registers 966 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 967 XMM1, XMM1b, XMM1c, XMM1d, 968 XMM2, XMM2b, XMM2c, XMM2d, 969 XMM3, XMM3b, XMM3c, XMM3d, 970 XMM4, XMM4b, XMM4c, XMM4d, 971 XMM5, XMM5b, XMM5c, XMM5d, 972 XMM6, XMM6b, XMM6c, XMM6d, 973 XMM7, XMM7b, XMM7c, XMM7d 974 #ifdef _LP64 975 ,XMM8, XMM8b, XMM8c, XMM8d, 976 XMM9, XMM9b, XMM9c, XMM9d, 977 XMM10, XMM10b, XMM10c, XMM10d, 978 XMM11, XMM11b, XMM11c, XMM11d, 979 XMM12, XMM12b, XMM12c, XMM12d, 980 XMM13, XMM13b, XMM13c, XMM13d, 981 XMM14, XMM14b, XMM14c, XMM14d, 982 XMM15, XMM15b, XMM15c, XMM15d 983 #endif 984 ); 985 986 // Class for all 128bit vector registers 987 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 988 XMM1, XMM1b, XMM1c, XMM1d, 989 XMM2, XMM2b, XMM2c, XMM2d, 990 XMM3, XMM3b, XMM3c, XMM3d, 991 XMM4, XMM4b, XMM4c, XMM4d, 992 XMM5, XMM5b, XMM5c, XMM5d, 993 XMM6, XMM6b, XMM6c, XMM6d, 994 XMM7, XMM7b, XMM7c, XMM7d 995 #ifdef _LP64 996 ,XMM8, XMM8b, XMM8c, XMM8d, 997 XMM9, XMM9b, XMM9c, XMM9d, 998 XMM10, XMM10b, XMM10c, XMM10d, 999 XMM11, XMM11b, XMM11c, XMM11d, 1000 XMM12, XMM12b, XMM12c, XMM12d, 1001 XMM13, XMM13b, XMM13c, XMM13d, 1002 XMM14, XMM14b, XMM14c, XMM14d, 1003 XMM15, XMM15b, XMM15c, XMM15d, 1004 XMM16, XMM16b, XMM16c, XMM16d, 1005 XMM17, XMM17b, XMM17c, XMM17d, 1006 XMM18, XMM18b, XMM18c, XMM18d, 1007 XMM19, XMM19b, XMM19c, XMM19d, 1008 XMM20, XMM20b, XMM20c, XMM20d, 1009 XMM21, XMM21b, XMM21c, XMM21d, 1010 XMM22, XMM22b, XMM22c, XMM22d, 1011 XMM23, XMM23b, XMM23c, XMM23d, 1012 XMM24, XMM24b, XMM24c, XMM24d, 1013 XMM25, XMM25b, XMM25c, XMM25d, 1014 XMM26, XMM26b, XMM26c, XMM26d, 1015 XMM27, XMM27b, XMM27c, XMM27d, 1016 XMM28, XMM28b, XMM28c, XMM28d, 1017 XMM29, XMM29b, XMM29c, XMM29d, 1018 XMM30, XMM30b, XMM30c, XMM30d, 1019 XMM31, XMM31b, XMM31c, XMM31d 1020 #endif 1021 ); 1022 1023 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1024 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1025 1026 // Class for all 256bit vector registers 1027 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1028 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1029 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1030 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1031 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1032 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1033 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1034 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1035 #ifdef _LP64 1036 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1037 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1038 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1039 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1040 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1041 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1042 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1043 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1044 #endif 1045 ); 1046 1047 // Class for all 256bit vector registers 1048 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1049 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1050 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1051 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1052 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1053 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1054 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1055 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1056 #ifdef _LP64 1057 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1058 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1059 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1060 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1061 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1062 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1063 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1064 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1065 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1066 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1067 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1068 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1069 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1070 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1071 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1072 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1073 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1074 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1075 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1076 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1077 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1078 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1079 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1080 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1081 #endif 1082 ); 1083 1084 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1085 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1086 1087 // Class for all 512bit vector registers 1088 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1089 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1090 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1091 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1092 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1093 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1094 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1095 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1096 #ifdef _LP64 1097 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1098 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1099 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1100 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1101 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1102 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1103 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1104 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1105 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1106 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1107 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1108 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1109 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1110 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1111 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1112 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1113 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1114 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1115 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1116 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1117 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1118 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1119 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1120 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1121 #endif 1122 ); 1123 1124 // Class for restricted 512bit vector registers 1125 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1126 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1127 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1128 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1129 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1130 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1131 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1132 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1133 #ifdef _LP64 1134 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1135 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1136 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1137 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1138 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1139 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1140 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1141 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1142 #endif 1143 ); 1144 1145 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1146 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1147 1148 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1149 %} 1150 1151 1152 //----------SOURCE BLOCK------------------------------------------------------- 1153 // This is a block of C++ code which provides values, functions, and 1154 // definitions necessary in the rest of the architecture description 1155 1156 source_hpp %{ 1157 // Header information of the source block. 1158 // Method declarations/definitions which are used outside 1159 // the ad-scope can conveniently be defined here. 1160 // 1161 // To keep related declarations/definitions/uses close together, 1162 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1163 1164 #include "runtime/vm_version.hpp" 1165 1166 class NativeJump; 1167 1168 class CallStubImpl { 1169 1170 //-------------------------------------------------------------- 1171 //---< Used for optimization in Compile::shorten_branches >--- 1172 //-------------------------------------------------------------- 1173 1174 public: 1175 // Size of call trampoline stub. 1176 static uint size_call_trampoline() { 1177 return 0; // no call trampolines on this platform 1178 } 1179 1180 // number of relocations needed by a call trampoline stub 1181 static uint reloc_call_trampoline() { 1182 return 0; // no call trampolines on this platform 1183 } 1184 }; 1185 1186 class HandlerImpl { 1187 1188 public: 1189 1190 static int emit_exception_handler(CodeBuffer &cbuf); 1191 static int emit_deopt_handler(CodeBuffer& cbuf); 1192 1193 static uint size_exception_handler() { 1194 // NativeCall instruction size is the same as NativeJump. 1195 // exception handler starts out as jump and can be patched to 1196 // a call be deoptimization. (4932387) 1197 // Note that this value is also credited (in output.cpp) to 1198 // the size of the code section. 1199 return NativeJump::instruction_size; 1200 } 1201 1202 #ifdef _LP64 1203 static uint size_deopt_handler() { 1204 // three 5 byte instructions plus one move for unreachable address. 1205 return 15+3; 1206 } 1207 #else 1208 static uint size_deopt_handler() { 1209 // NativeCall instruction size is the same as NativeJump. 1210 // exception handler starts out as jump and can be patched to 1211 // a call be deoptimization. (4932387) 1212 // Note that this value is also credited (in output.cpp) to 1213 // the size of the code section. 1214 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1215 } 1216 #endif 1217 }; 1218 1219 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1220 switch(bytes) { 1221 case 4: // fall-through 1222 case 8: // fall-through 1223 case 16: return Assembler::AVX_128bit; 1224 case 32: return Assembler::AVX_256bit; 1225 case 64: return Assembler::AVX_512bit; 1226 1227 default: { 1228 ShouldNotReachHere(); 1229 return Assembler::AVX_NoVec; 1230 } 1231 } 1232 } 1233 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1236 } 1237 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1239 uint def_idx = use->operand_index(opnd); 1240 Node* def = use->in(def_idx); 1241 return vector_length_encoding(def); 1242 } 1243 1244 static inline bool is_unsigned_booltest_pred(int bt) { 1245 return ((bt & BoolTest::unsigned_compare) == BoolTest::unsigned_compare); 1246 } 1247 1248 class Node::PD { 1249 public: 1250 enum NodeFlags { 1251 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1252 _last_flag = Flag_intel_jcc_erratum 1253 }; 1254 }; 1255 1256 %} // end source_hpp 1257 1258 source %{ 1259 1260 #include "opto/addnode.hpp" 1261 #include "c2_intelJccErratum_x86.hpp" 1262 1263 void PhaseOutput::pd_perform_mach_node_analysis() { 1264 if (VM_Version::has_intel_jcc_erratum()) { 1265 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1266 _buf_sizes._code += extra_padding; 1267 } 1268 } 1269 1270 int MachNode::pd_alignment_required() const { 1271 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1272 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1273 return IntelJccErratum::largest_jcc_size() + 1; 1274 } else { 1275 return 1; 1276 } 1277 } 1278 1279 int MachNode::compute_padding(int current_offset) const { 1280 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1281 Compile* C = Compile::current(); 1282 PhaseOutput* output = C->output(); 1283 Block* block = output->block(); 1284 int index = output->index(); 1285 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1286 } else { 1287 return 0; 1288 } 1289 } 1290 1291 // Emit exception handler code. 1292 // Stuff framesize into a register and call a VM stub routine. 1293 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1294 1295 // Note that the code buffer's insts_mark is always relative to insts. 1296 // That's why we must use the macroassembler to generate a handler. 1297 C2_MacroAssembler _masm(&cbuf); 1298 address base = __ start_a_stub(size_exception_handler()); 1299 if (base == NULL) { 1300 ciEnv::current()->record_failure("CodeCache is full"); 1301 return 0; // CodeBuffer::expand failed 1302 } 1303 int offset = __ offset(); 1304 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1305 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1306 __ end_a_stub(); 1307 return offset; 1308 } 1309 1310 // Emit deopt handler code. 1311 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1312 1313 // Note that the code buffer's insts_mark is always relative to insts. 1314 // That's why we must use the macroassembler to generate a handler. 1315 C2_MacroAssembler _masm(&cbuf); 1316 address base = __ start_a_stub(size_deopt_handler()); 1317 if (base == NULL) { 1318 ciEnv::current()->record_failure("CodeCache is full"); 1319 return 0; // CodeBuffer::expand failed 1320 } 1321 int offset = __ offset(); 1322 1323 #ifdef _LP64 1324 address the_pc = (address) __ pc(); 1325 Label next; 1326 // push a "the_pc" on the stack without destroying any registers 1327 // as they all may be live. 1328 1329 // push address of "next" 1330 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1331 __ bind(next); 1332 // adjust it so it matches "the_pc" 1333 __ subptr(Address(rsp, 0), __ offset() - offset); 1334 #else 1335 InternalAddress here(__ pc()); 1336 __ pushptr(here.addr()); 1337 #endif 1338 1339 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1340 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1341 __ end_a_stub(); 1342 return offset; 1343 } 1344 1345 Assembler::Width widthForType(BasicType bt) { 1346 if (bt == T_BYTE) { 1347 return Assembler::B; 1348 } else if (bt == T_SHORT) { 1349 return Assembler::W; 1350 } else if (bt == T_INT) { 1351 return Assembler::D; 1352 } else { 1353 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1354 return Assembler::Q; 1355 } 1356 } 1357 1358 //============================================================================= 1359 1360 // Float masks come from different places depending on platform. 1361 #ifdef _LP64 1362 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1363 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1364 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1365 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1366 #else 1367 static address float_signmask() { return (address)float_signmask_pool; } 1368 static address float_signflip() { return (address)float_signflip_pool; } 1369 static address double_signmask() { return (address)double_signmask_pool; } 1370 static address double_signflip() { return (address)double_signflip_pool; } 1371 #endif 1372 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1373 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1374 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1375 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1376 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1377 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1378 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1379 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1380 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1381 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1382 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1383 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1384 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1385 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1386 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1387 1388 //============================================================================= 1389 const bool Matcher::match_rule_supported(int opcode) { 1390 if (!has_match_rule(opcode)) { 1391 return false; // no match rule present 1392 } 1393 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1394 switch (opcode) { 1395 case Op_AbsVL: 1396 case Op_StoreVectorScatter: 1397 if (UseAVX < 3) { 1398 return false; 1399 } 1400 break; 1401 case Op_PopCountI: 1402 case Op_PopCountL: 1403 if (!UsePopCountInstruction) { 1404 return false; 1405 } 1406 break; 1407 case Op_PopCountVI: 1408 if (!UsePopCountInstruction || (UseAVX < 2)) { 1409 return false; 1410 } 1411 break; 1412 case Op_PopCountVL: 1413 if (!UsePopCountInstruction || (UseAVX <= 2)) { 1414 return false; 1415 } 1416 break; 1417 case Op_MulVI: 1418 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1419 return false; 1420 } 1421 break; 1422 case Op_MulVL: 1423 if (UseSSE < 4) { // only with SSE4_1 or AVX 1424 return false; 1425 } 1426 break; 1427 case Op_MulReductionVL: 1428 if (VM_Version::supports_avx512dq() == false) { 1429 return false; 1430 } 1431 break; 1432 case Op_AddReductionVL: 1433 if (UseSSE < 2) { // requires at least SSE2 1434 return false; 1435 } 1436 break; 1437 case Op_AbsVB: 1438 case Op_AbsVS: 1439 case Op_AbsVI: 1440 case Op_AddReductionVI: 1441 case Op_AndReductionV: 1442 case Op_OrReductionV: 1443 case Op_XorReductionV: 1444 if (UseSSE < 3) { // requires at least SSSE3 1445 return false; 1446 } 1447 break; 1448 case Op_VectorLoadShuffle: 1449 case Op_VectorRearrange: 1450 case Op_MulReductionVI: 1451 if (UseSSE < 4) { // requires at least SSE4 1452 return false; 1453 } 1454 break; 1455 case Op_SqrtVD: 1456 case Op_SqrtVF: 1457 case Op_VectorMaskCmp: 1458 case Op_VectorCastB2X: 1459 case Op_VectorCastS2X: 1460 case Op_VectorCastI2X: 1461 case Op_VectorCastL2X: 1462 case Op_VectorCastF2X: 1463 case Op_VectorCastD2X: 1464 case Op_VectorUCastB2X: 1465 case Op_VectorUCastS2X: 1466 case Op_VectorUCastI2X: 1467 if (UseAVX < 1) { // enabled for AVX only 1468 return false; 1469 } 1470 break; 1471 case Op_CompareAndSwapL: 1472 #ifdef _LP64 1473 case Op_CompareAndSwapP: 1474 #endif 1475 if (!VM_Version::supports_cx8()) { 1476 return false; 1477 } 1478 break; 1479 case Op_CMoveVF: 1480 case Op_CMoveVD: 1481 if (UseAVX < 1) { // enabled for AVX only 1482 return false; 1483 } 1484 break; 1485 case Op_StrIndexOf: 1486 if (!UseSSE42Intrinsics) { 1487 return false; 1488 } 1489 break; 1490 case Op_StrIndexOfChar: 1491 if (!UseSSE42Intrinsics) { 1492 return false; 1493 } 1494 break; 1495 case Op_OnSpinWait: 1496 if (VM_Version::supports_on_spin_wait() == false) { 1497 return false; 1498 } 1499 break; 1500 case Op_MulVB: 1501 case Op_LShiftVB: 1502 case Op_RShiftVB: 1503 case Op_URShiftVB: 1504 case Op_VectorInsert: 1505 case Op_VectorLoadMask: 1506 case Op_VectorStoreMask: 1507 case Op_VectorBlend: 1508 if (UseSSE < 4) { 1509 return false; 1510 } 1511 break; 1512 #ifdef _LP64 1513 case Op_MaxD: 1514 case Op_MaxF: 1515 case Op_MinD: 1516 case Op_MinF: 1517 if (UseAVX < 1) { // enabled for AVX only 1518 return false; 1519 } 1520 break; 1521 #endif 1522 case Op_CacheWB: 1523 case Op_CacheWBPreSync: 1524 case Op_CacheWBPostSync: 1525 if (!VM_Version::supports_data_cache_line_flush()) { 1526 return false; 1527 } 1528 break; 1529 case Op_ExtractB: 1530 case Op_ExtractL: 1531 case Op_ExtractI: 1532 case Op_RoundDoubleMode: 1533 if (UseSSE < 4) { 1534 return false; 1535 } 1536 break; 1537 case Op_RoundDoubleModeV: 1538 if (VM_Version::supports_avx() == false) { 1539 return false; // 128bit vroundpd is not available 1540 } 1541 break; 1542 case Op_LoadVectorGather: 1543 if (UseAVX < 2) { 1544 return false; 1545 } 1546 break; 1547 case Op_FmaVD: 1548 case Op_FmaVF: 1549 if (!UseFMA) { 1550 return false; 1551 } 1552 break; 1553 case Op_MacroLogicV: 1554 if (UseAVX < 3 || !UseVectorMacroLogic) { 1555 return false; 1556 } 1557 break; 1558 1559 case Op_VectorCmpMasked: 1560 case Op_VectorMaskGen: 1561 case Op_LoadVectorMasked: 1562 case Op_StoreVectorMasked: 1563 if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { 1564 return false; 1565 } 1566 break; 1567 case Op_VectorMaskFirstTrue: 1568 case Op_VectorMaskLastTrue: 1569 case Op_VectorMaskTrueCount: 1570 case Op_VectorMaskToLong: 1571 if (!is_LP64 || UseAVX < 1) { 1572 return false; 1573 } 1574 break; 1575 case Op_CopySignD: 1576 case Op_CopySignF: 1577 if (UseAVX < 3 || !is_LP64) { 1578 return false; 1579 } 1580 if (!VM_Version::supports_avx512vl()) { 1581 return false; 1582 } 1583 break; 1584 #ifndef _LP64 1585 case Op_AddReductionVF: 1586 case Op_AddReductionVD: 1587 case Op_MulReductionVF: 1588 case Op_MulReductionVD: 1589 if (UseSSE < 1) { // requires at least SSE 1590 return false; 1591 } 1592 break; 1593 case Op_MulAddVS2VI: 1594 case Op_RShiftVL: 1595 case Op_AbsVD: 1596 case Op_NegVD: 1597 if (UseSSE < 2) { 1598 return false; 1599 } 1600 break; 1601 #endif // !LP64 1602 case Op_SignumF: 1603 if (UseSSE < 1) { 1604 return false; 1605 } 1606 break; 1607 case Op_SignumD: 1608 if (UseSSE < 2) { 1609 return false; 1610 } 1611 break; 1612 case Op_SqrtF: 1613 if (UseSSE < 1) { 1614 return false; 1615 } 1616 break; 1617 case Op_SqrtD: 1618 #ifdef _LP64 1619 if (UseSSE < 2) { 1620 return false; 1621 } 1622 #else 1623 // x86_32.ad has a special match rule for SqrtD. 1624 // Together with common x86 rules, this handles all UseSSE cases. 1625 #endif 1626 break; 1627 } 1628 return true; // Match rules are supported by default. 1629 } 1630 1631 //------------------------------------------------------------------------ 1632 1633 // Identify extra cases that we might want to provide match rules for vector nodes and 1634 // other intrinsics guarded with vector length (vlen) and element type (bt). 1635 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1636 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1637 if (!match_rule_supported(opcode)) { 1638 return false; 1639 } 1640 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1641 // * SSE2 supports 128bit vectors for all types; 1642 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1643 // * AVX2 supports 256bit vectors for all types; 1644 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1645 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1646 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1647 // And MaxVectorSize is taken into account as well. 1648 if (!vector_size_supported(bt, vlen)) { 1649 return false; 1650 } 1651 // Special cases which require vector length follow: 1652 // * implementation limitations 1653 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1654 // * 128bit vroundpd instruction is present only in AVX1 1655 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1656 switch (opcode) { 1657 case Op_AbsVF: 1658 case Op_NegVF: 1659 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1660 return false; // 512bit vandps and vxorps are not available 1661 } 1662 break; 1663 case Op_AbsVD: 1664 case Op_NegVD: 1665 case Op_MulVL: 1666 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1667 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1668 } 1669 break; 1670 case Op_CMoveVF: 1671 if (vlen != 8) { 1672 return false; // implementation limitation (only vcmov8F_reg is present) 1673 } 1674 break; 1675 case Op_RotateRightV: 1676 case Op_RotateLeftV: 1677 if (bt != T_INT && bt != T_LONG) { 1678 return false; 1679 } // fallthrough 1680 case Op_MacroLogicV: 1681 if (!VM_Version::supports_evex() || 1682 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1683 return false; 1684 } 1685 break; 1686 case Op_ClearArray: 1687 case Op_VectorMaskGen: 1688 case Op_VectorCmpMasked: 1689 case Op_LoadVectorMasked: 1690 case Op_StoreVectorMasked: 1691 if (!is_LP64 || !VM_Version::supports_avx512bw()) { 1692 return false; 1693 } 1694 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1695 return false; 1696 } 1697 break; 1698 case Op_CMoveVD: 1699 if (vlen != 4) { 1700 return false; // implementation limitation (only vcmov4D_reg is present) 1701 } 1702 break; 1703 case Op_MaxV: 1704 case Op_MinV: 1705 if (UseSSE < 4 && is_integral_type(bt)) { 1706 return false; 1707 } 1708 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1709 // Float/Double intrinsics are enabled for AVX family currently. 1710 if (UseAVX == 0) { 1711 return false; 1712 } 1713 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1714 return false; 1715 } 1716 } 1717 break; 1718 case Op_CallLeafVector: 1719 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1720 return false; 1721 } 1722 break; 1723 case Op_AddReductionVI: 1724 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1725 return false; 1726 } 1727 // fallthrough 1728 case Op_AndReductionV: 1729 case Op_OrReductionV: 1730 case Op_XorReductionV: 1731 if (is_subword_type(bt) && (UseSSE < 4)) { 1732 return false; 1733 } 1734 #ifndef _LP64 1735 if (bt == T_BYTE || bt == T_LONG) { 1736 return false; 1737 } 1738 #endif 1739 break; 1740 #ifndef _LP64 1741 case Op_VectorInsert: 1742 if (bt == T_LONG || bt == T_DOUBLE) { 1743 return false; 1744 } 1745 break; 1746 #endif 1747 case Op_MinReductionV: 1748 case Op_MaxReductionV: 1749 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1750 return false; 1751 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1752 return false; 1753 } 1754 // Float/Double intrinsics enabled for AVX family. 1755 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1756 return false; 1757 } 1758 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1759 return false; 1760 } 1761 #ifndef _LP64 1762 if (bt == T_BYTE || bt == T_LONG) { 1763 return false; 1764 } 1765 #endif 1766 break; 1767 case Op_VectorTest: 1768 if (UseSSE < 4) { 1769 return false; // Implementation limitation 1770 } else if (size_in_bits < 32) { 1771 return false; // Implementation limitation 1772 } else if (size_in_bits == 512 && (VM_Version::supports_avx512bw() == false)) { 1773 return false; // Implementation limitation 1774 } 1775 break; 1776 case Op_VectorLoadShuffle: 1777 case Op_VectorRearrange: 1778 if(vlen == 2) { 1779 return false; // Implementation limitation due to how shuffle is loaded 1780 } else if (size_in_bits == 256 && UseAVX < 2) { 1781 return false; // Implementation limitation 1782 } else if (bt == T_BYTE && size_in_bits > 256 && !VM_Version::supports_avx512_vbmi()) { 1783 return false; // Implementation limitation 1784 } else if (bt == T_SHORT && size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1785 return false; // Implementation limitation 1786 } 1787 break; 1788 case Op_VectorLoadMask: 1789 if (size_in_bits == 256 && UseAVX < 2) { 1790 return false; // Implementation limitation 1791 } 1792 // fallthrough 1793 case Op_VectorStoreMask: 1794 if (vlen == 2) { 1795 return false; // Implementation limitation 1796 } 1797 break; 1798 case Op_VectorCastB2X: 1799 case Op_VectorCastS2X: 1800 case Op_VectorCastI2X: 1801 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1802 return false; 1803 } 1804 break; 1805 case Op_VectorCastL2X: 1806 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1807 return false; 1808 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1809 return false; 1810 } 1811 break; 1812 case Op_VectorCastD2X: 1813 if (is_subword_type(bt) || bt == T_INT) { 1814 return false; 1815 } 1816 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1817 return false; 1818 } 1819 break; 1820 case Op_VectorCastF2X: 1821 if (is_subword_type(bt) || bt == T_LONG) { 1822 return false; 1823 } 1824 break; 1825 case Op_MulReductionVI: 1826 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1827 return false; 1828 } 1829 break; 1830 case Op_LoadVectorGatherMasked: 1831 case Op_StoreVectorScatterMasked: 1832 case Op_StoreVectorScatter: 1833 if(is_subword_type(bt)) { 1834 return false; 1835 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1836 return false; 1837 } 1838 // fallthrough 1839 case Op_LoadVectorGather: 1840 if (size_in_bits == 64 ) { 1841 return false; 1842 } 1843 break; 1844 case Op_MaskAll: 1845 if (!VM_Version::supports_evex()) { 1846 return false; 1847 } 1848 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1849 return false; 1850 } 1851 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1852 return false; 1853 } 1854 break; 1855 case Op_VectorMaskCmp: 1856 if (vlen < 2 || size_in_bits < 32) { 1857 return false; 1858 } 1859 break; 1860 case Op_VectorLongToMask: 1861 if (UseAVX < 1 || !is_LP64) { 1862 return false; 1863 } 1864 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1865 return false; 1866 } 1867 break; 1868 case Op_PopCountVI: 1869 if (!VM_Version::supports_avx512_vpopcntdq() && 1870 (vlen == 16) && !VM_Version::supports_avx512bw()) { 1871 return false; 1872 } 1873 break; 1874 case Op_PopCountVL: 1875 if (!VM_Version::supports_avx512_vpopcntdq() && 1876 ((vlen <= 4) || ((vlen == 8) && !VM_Version::supports_avx512bw()))) { 1877 return false; 1878 } 1879 break; 1880 } 1881 return true; // Per default match rules are supported. 1882 } 1883 1884 const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 1885 // ADLC based match_rule_supported routine checks for the existence of pattern based 1886 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 1887 // of their non-masked counterpart with mask edge being the differentiator. 1888 // This routine does a strict check on the existence of masked operation patterns 1889 // by returning a default false value for all the other opcodes apart from the 1890 // ones whose masked instruction patterns are defined in this file. 1891 if (!match_rule_supported_vector(opcode, vlen, bt)) { 1892 return false; 1893 } 1894 1895 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1896 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1897 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 1898 return false; 1899 } 1900 switch(opcode) { 1901 // Unary masked operations 1902 case Op_AbsVB: 1903 case Op_AbsVS: 1904 if(!VM_Version::supports_avx512bw()) { 1905 return false; // Implementation limitation 1906 } 1907 case Op_AbsVI: 1908 case Op_AbsVL: 1909 return true; 1910 1911 // Ternary masked operations 1912 case Op_FmaVF: 1913 case Op_FmaVD: 1914 return true; 1915 1916 case Op_MacroLogicV: 1917 if(bt != T_INT && bt != T_LONG) { 1918 return false; 1919 } 1920 return true; 1921 1922 // Binary masked operations 1923 case Op_AddVB: 1924 case Op_AddVS: 1925 case Op_SubVB: 1926 case Op_SubVS: 1927 case Op_MulVS: 1928 case Op_LShiftVS: 1929 case Op_RShiftVS: 1930 case Op_URShiftVS: 1931 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1932 if (!VM_Version::supports_avx512bw()) { 1933 return false; // Implementation limitation 1934 } 1935 return true; 1936 1937 case Op_MulVL: 1938 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1939 if (!VM_Version::supports_avx512dq()) { 1940 return false; // Implementation limitation 1941 } 1942 return true; 1943 1944 case Op_AndV: 1945 case Op_OrV: 1946 case Op_XorV: 1947 case Op_RotateRightV: 1948 case Op_RotateLeftV: 1949 if (bt != T_INT && bt != T_LONG) { 1950 return false; // Implementation limitation 1951 } 1952 return true; 1953 1954 case Op_VectorLoadMask: 1955 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1956 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 1957 return false; 1958 } 1959 return true; 1960 1961 case Op_AddVI: 1962 case Op_AddVL: 1963 case Op_AddVF: 1964 case Op_AddVD: 1965 case Op_SubVI: 1966 case Op_SubVL: 1967 case Op_SubVF: 1968 case Op_SubVD: 1969 case Op_MulVI: 1970 case Op_MulVF: 1971 case Op_MulVD: 1972 case Op_DivVF: 1973 case Op_DivVD: 1974 case Op_SqrtVF: 1975 case Op_SqrtVD: 1976 case Op_LShiftVI: 1977 case Op_LShiftVL: 1978 case Op_RShiftVI: 1979 case Op_RShiftVL: 1980 case Op_URShiftVI: 1981 case Op_URShiftVL: 1982 case Op_LoadVectorMasked: 1983 case Op_StoreVectorMasked: 1984 case Op_LoadVectorGatherMasked: 1985 case Op_StoreVectorScatterMasked: 1986 return true; 1987 1988 case Op_MaxV: 1989 case Op_MinV: 1990 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 1991 return false; // Implementation limitation 1992 } 1993 if (is_floating_point_type(bt)) { 1994 return false; // Implementation limitation 1995 } 1996 return true; 1997 1998 case Op_VectorMaskCmp: 1999 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2000 return false; // Implementation limitation 2001 } 2002 return true; 2003 2004 case Op_VectorRearrange: 2005 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2006 return false; // Implementation limitation 2007 } 2008 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2009 return false; // Implementation limitation 2010 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2011 return false; // Implementation limitation 2012 } 2013 return true; 2014 2015 // Binary Logical operations 2016 case Op_AndVMask: 2017 case Op_OrVMask: 2018 case Op_XorVMask: 2019 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2020 return false; // Implementation limitation 2021 } 2022 return true; 2023 2024 case Op_MaskAll: 2025 return true; 2026 2027 default: 2028 return false; 2029 } 2030 } 2031 2032 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2033 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2034 bool legacy = (generic_opnd->opcode() == LEGVEC); 2035 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2036 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2037 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2038 return new legVecZOper(); 2039 } 2040 if (legacy) { 2041 switch (ideal_reg) { 2042 case Op_VecS: return new legVecSOper(); 2043 case Op_VecD: return new legVecDOper(); 2044 case Op_VecX: return new legVecXOper(); 2045 case Op_VecY: return new legVecYOper(); 2046 case Op_VecZ: return new legVecZOper(); 2047 } 2048 } else { 2049 switch (ideal_reg) { 2050 case Op_VecS: return new vecSOper(); 2051 case Op_VecD: return new vecDOper(); 2052 case Op_VecX: return new vecXOper(); 2053 case Op_VecY: return new vecYOper(); 2054 case Op_VecZ: return new vecZOper(); 2055 } 2056 } 2057 ShouldNotReachHere(); 2058 return NULL; 2059 } 2060 2061 bool Matcher::is_reg2reg_move(MachNode* m) { 2062 switch (m->rule()) { 2063 case MoveVec2Leg_rule: 2064 case MoveLeg2Vec_rule: 2065 case MoveF2VL_rule: 2066 case MoveF2LEG_rule: 2067 case MoveVL2F_rule: 2068 case MoveLEG2F_rule: 2069 case MoveD2VL_rule: 2070 case MoveD2LEG_rule: 2071 case MoveVL2D_rule: 2072 case MoveLEG2D_rule: 2073 return true; 2074 default: 2075 return false; 2076 } 2077 } 2078 2079 bool Matcher::is_generic_vector(MachOper* opnd) { 2080 switch (opnd->opcode()) { 2081 case VEC: 2082 case LEGVEC: 2083 return true; 2084 default: 2085 return false; 2086 } 2087 } 2088 2089 //------------------------------------------------------------------------ 2090 2091 const RegMask* Matcher::predicate_reg_mask(void) { 2092 return &_VECTMASK_REG_mask; 2093 } 2094 2095 const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) { 2096 return new TypeVectMask(elemTy, length); 2097 } 2098 2099 // Max vector size in bytes. 0 if not supported. 2100 const int Matcher::vector_width_in_bytes(BasicType bt) { 2101 assert(is_java_primitive(bt), "only primitive type vectors"); 2102 if (UseSSE < 2) return 0; 2103 // SSE2 supports 128bit vectors for all types. 2104 // AVX2 supports 256bit vectors for all types. 2105 // AVX2/EVEX supports 512bit vectors for all types. 2106 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2107 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2108 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2109 size = (UseAVX > 2) ? 64 : 32; 2110 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2111 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2112 // Use flag to limit vector size. 2113 size = MIN2(size,(int)MaxVectorSize); 2114 // Minimum 2 values in vector (or 4 for bytes). 2115 switch (bt) { 2116 case T_DOUBLE: 2117 case T_LONG: 2118 if (size < 16) return 0; 2119 break; 2120 case T_FLOAT: 2121 case T_INT: 2122 if (size < 8) return 0; 2123 break; 2124 case T_BOOLEAN: 2125 if (size < 4) return 0; 2126 break; 2127 case T_CHAR: 2128 if (size < 4) return 0; 2129 break; 2130 case T_BYTE: 2131 if (size < 4) return 0; 2132 break; 2133 case T_SHORT: 2134 if (size < 4) return 0; 2135 break; 2136 default: 2137 ShouldNotReachHere(); 2138 } 2139 return size; 2140 } 2141 2142 // Limits on vector size (number of elements) loaded into vector. 2143 const int Matcher::max_vector_size(const BasicType bt) { 2144 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2145 } 2146 const int Matcher::min_vector_size(const BasicType bt) { 2147 int max_size = max_vector_size(bt); 2148 // Min size which can be loaded into vector is 4 bytes. 2149 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2150 // Support for calling svml double64 vectors 2151 if (bt == T_DOUBLE) { 2152 size = 1; 2153 } 2154 return MIN2(size,max_size); 2155 } 2156 2157 const int Matcher::scalable_vector_reg_size(const BasicType bt) { 2158 return -1; 2159 } 2160 2161 // Vector ideal reg corresponding to specified size in bytes 2162 const uint Matcher::vector_ideal_reg(int size) { 2163 assert(MaxVectorSize >= size, ""); 2164 switch(size) { 2165 case 4: return Op_VecS; 2166 case 8: return Op_VecD; 2167 case 16: return Op_VecX; 2168 case 32: return Op_VecY; 2169 case 64: return Op_VecZ; 2170 } 2171 ShouldNotReachHere(); 2172 return 0; 2173 } 2174 2175 // Check for shift by small constant as well 2176 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2177 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2178 shift->in(2)->get_int() <= 3 && 2179 // Are there other uses besides address expressions? 2180 !matcher->is_visited(shift)) { 2181 address_visited.set(shift->_idx); // Flag as address_visited 2182 mstack.push(shift->in(2), Matcher::Visit); 2183 Node *conv = shift->in(1); 2184 #ifdef _LP64 2185 // Allow Matcher to match the rule which bypass 2186 // ConvI2L operation for an array index on LP64 2187 // if the index value is positive. 2188 if (conv->Opcode() == Op_ConvI2L && 2189 conv->as_Type()->type()->is_long()->_lo >= 0 && 2190 // Are there other uses besides address expressions? 2191 !matcher->is_visited(conv)) { 2192 address_visited.set(conv->_idx); // Flag as address_visited 2193 mstack.push(conv->in(1), Matcher::Pre_Visit); 2194 } else 2195 #endif 2196 mstack.push(conv, Matcher::Pre_Visit); 2197 return true; 2198 } 2199 return false; 2200 } 2201 2202 // This function identifies sub-graphs in which a 'load' node is 2203 // input to two different nodes, and such that it can be matched 2204 // with BMI instructions like blsi, blsr, etc. 2205 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2206 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2207 // refers to the same node. 2208 // 2209 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2210 // This is a temporary solution until we make DAGs expressible in ADL. 2211 template<typename ConType> 2212 class FusedPatternMatcher { 2213 Node* _op1_node; 2214 Node* _mop_node; 2215 int _con_op; 2216 2217 static int match_next(Node* n, int next_op, int next_op_idx) { 2218 if (n->in(1) == NULL || n->in(2) == NULL) { 2219 return -1; 2220 } 2221 2222 if (next_op_idx == -1) { // n is commutative, try rotations 2223 if (n->in(1)->Opcode() == next_op) { 2224 return 1; 2225 } else if (n->in(2)->Opcode() == next_op) { 2226 return 2; 2227 } 2228 } else { 2229 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2230 if (n->in(next_op_idx)->Opcode() == next_op) { 2231 return next_op_idx; 2232 } 2233 } 2234 return -1; 2235 } 2236 2237 public: 2238 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2239 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2240 2241 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2242 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2243 typename ConType::NativeType con_value) { 2244 if (_op1_node->Opcode() != op1) { 2245 return false; 2246 } 2247 if (_mop_node->outcnt() > 2) { 2248 return false; 2249 } 2250 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2251 if (op1_op2_idx == -1) { 2252 return false; 2253 } 2254 // Memory operation must be the other edge 2255 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2256 2257 // Check that the mop node is really what we want 2258 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2259 Node* op2_node = _op1_node->in(op1_op2_idx); 2260 if (op2_node->outcnt() > 1) { 2261 return false; 2262 } 2263 assert(op2_node->Opcode() == op2, "Should be"); 2264 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2265 if (op2_con_idx == -1) { 2266 return false; 2267 } 2268 // Memory operation must be the other edge 2269 int op2_mop_idx = (op2_con_idx & 1) + 1; 2270 // Check that the memory operation is the same node 2271 if (op2_node->in(op2_mop_idx) == _mop_node) { 2272 // Now check the constant 2273 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2274 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2275 return true; 2276 } 2277 } 2278 } 2279 return false; 2280 } 2281 }; 2282 2283 static bool is_bmi_pattern(Node* n, Node* m) { 2284 assert(UseBMI1Instructions, "sanity"); 2285 if (n != NULL && m != NULL) { 2286 if (m->Opcode() == Op_LoadI) { 2287 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2288 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2289 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2290 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2291 } else if (m->Opcode() == Op_LoadL) { 2292 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2293 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2294 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2295 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2296 } 2297 } 2298 return false; 2299 } 2300 2301 // Should the matcher clone input 'm' of node 'n'? 2302 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2303 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2304 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2305 mstack.push(m, Visit); 2306 return true; 2307 } 2308 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2309 mstack.push(m, Visit); // m = ShiftCntV 2310 return true; 2311 } 2312 return false; 2313 } 2314 2315 // Should the Matcher clone shifts on addressing modes, expecting them 2316 // to be subsumed into complex addressing expressions or compute them 2317 // into registers? 2318 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2319 Node *off = m->in(AddPNode::Offset); 2320 if (off->is_Con()) { 2321 address_visited.test_set(m->_idx); // Flag as address_visited 2322 Node *adr = m->in(AddPNode::Address); 2323 2324 // Intel can handle 2 adds in addressing mode 2325 // AtomicAdd is not an addressing expression. 2326 // Cheap to find it by looking for screwy base. 2327 if (adr->is_AddP() && 2328 !adr->in(AddPNode::Base)->is_top() && 2329 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2330 // Are there other uses besides address expressions? 2331 !is_visited(adr)) { 2332 address_visited.set(adr->_idx); // Flag as address_visited 2333 Node *shift = adr->in(AddPNode::Offset); 2334 if (!clone_shift(shift, this, mstack, address_visited)) { 2335 mstack.push(shift, Pre_Visit); 2336 } 2337 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2338 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2339 } else { 2340 mstack.push(adr, Pre_Visit); 2341 } 2342 2343 // Clone X+offset as it also folds into most addressing expressions 2344 mstack.push(off, Visit); 2345 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2346 return true; 2347 } else if (clone_shift(off, this, mstack, address_visited)) { 2348 address_visited.test_set(m->_idx); // Flag as address_visited 2349 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2350 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2351 return true; 2352 } 2353 return false; 2354 } 2355 2356 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2357 switch (bt) { 2358 case BoolTest::eq: 2359 return Assembler::eq; 2360 case BoolTest::ne: 2361 return Assembler::neq; 2362 case BoolTest::le: 2363 case BoolTest::ule: 2364 return Assembler::le; 2365 case BoolTest::ge: 2366 case BoolTest::uge: 2367 return Assembler::nlt; 2368 case BoolTest::lt: 2369 case BoolTest::ult: 2370 return Assembler::lt; 2371 case BoolTest::gt: 2372 case BoolTest::ugt: 2373 return Assembler::nle; 2374 default : ShouldNotReachHere(); return Assembler::_false; 2375 } 2376 } 2377 2378 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2379 switch (bt) { 2380 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2381 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2382 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2383 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2384 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2385 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2386 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2387 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2388 } 2389 } 2390 2391 // Helper methods for MachSpillCopyNode::implementation(). 2392 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 2393 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2394 assert(ireg == Op_VecS || // 32bit vector 2395 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2396 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 2397 "no non-adjacent vector moves" ); 2398 if (cbuf) { 2399 C2_MacroAssembler _masm(cbuf); 2400 switch (ireg) { 2401 case Op_VecS: // copy whole register 2402 case Op_VecD: 2403 case Op_VecX: 2404 #ifndef _LP64 2405 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2406 #else 2407 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2408 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2409 } else { 2410 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2411 } 2412 #endif 2413 break; 2414 case Op_VecY: 2415 #ifndef _LP64 2416 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2417 #else 2418 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2419 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2420 } else { 2421 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2422 } 2423 #endif 2424 break; 2425 case Op_VecZ: 2426 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2427 break; 2428 default: 2429 ShouldNotReachHere(); 2430 } 2431 #ifndef PRODUCT 2432 } else { 2433 switch (ireg) { 2434 case Op_VecS: 2435 case Op_VecD: 2436 case Op_VecX: 2437 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2438 break; 2439 case Op_VecY: 2440 case Op_VecZ: 2441 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2442 break; 2443 default: 2444 ShouldNotReachHere(); 2445 } 2446 #endif 2447 } 2448 } 2449 2450 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 2451 int stack_offset, int reg, uint ireg, outputStream* st) { 2452 if (cbuf) { 2453 C2_MacroAssembler _masm(cbuf); 2454 if (is_load) { 2455 switch (ireg) { 2456 case Op_VecS: 2457 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2458 break; 2459 case Op_VecD: 2460 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2461 break; 2462 case Op_VecX: 2463 #ifndef _LP64 2464 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2465 #else 2466 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2467 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2468 } else { 2469 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2470 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2471 } 2472 #endif 2473 break; 2474 case Op_VecY: 2475 #ifndef _LP64 2476 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2477 #else 2478 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2479 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2480 } else { 2481 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2482 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2483 } 2484 #endif 2485 break; 2486 case Op_VecZ: 2487 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2488 break; 2489 default: 2490 ShouldNotReachHere(); 2491 } 2492 } else { // store 2493 switch (ireg) { 2494 case Op_VecS: 2495 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2496 break; 2497 case Op_VecD: 2498 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2499 break; 2500 case Op_VecX: 2501 #ifndef _LP64 2502 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2503 #else 2504 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2505 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2506 } 2507 else { 2508 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2509 } 2510 #endif 2511 break; 2512 case Op_VecY: 2513 #ifndef _LP64 2514 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2515 #else 2516 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2517 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2518 } 2519 else { 2520 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2521 } 2522 #endif 2523 break; 2524 case Op_VecZ: 2525 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2526 break; 2527 default: 2528 ShouldNotReachHere(); 2529 } 2530 } 2531 #ifndef PRODUCT 2532 } else { 2533 if (is_load) { 2534 switch (ireg) { 2535 case Op_VecS: 2536 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2537 break; 2538 case Op_VecD: 2539 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2540 break; 2541 case Op_VecX: 2542 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2543 break; 2544 case Op_VecY: 2545 case Op_VecZ: 2546 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2547 break; 2548 default: 2549 ShouldNotReachHere(); 2550 } 2551 } else { // store 2552 switch (ireg) { 2553 case Op_VecS: 2554 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2555 break; 2556 case Op_VecD: 2557 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2558 break; 2559 case Op_VecX: 2560 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2561 break; 2562 case Op_VecY: 2563 case Op_VecZ: 2564 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2565 break; 2566 default: 2567 ShouldNotReachHere(); 2568 } 2569 } 2570 #endif 2571 } 2572 } 2573 2574 template <class T> 2575 static inline GrowableArray<jvalue>* vreplicate_imm(BasicType bt, T con, int len) { 2576 GrowableArray<jvalue>* val = new GrowableArray<jvalue>(len); 2577 jvalue ele; 2578 switch (bt) { 2579 case T_BYTE: ele.b = con; break; 2580 case T_SHORT: ele.s = con; break; 2581 case T_INT: ele.i = con; break; 2582 case T_LONG: ele.j = con; break; 2583 case T_FLOAT: ele.f = con; break; 2584 case T_DOUBLE: ele.d = con; break; 2585 default: ShouldNotReachHere(); 2586 } 2587 for (int i = 0; i < len; i++) { 2588 val->append(ele); 2589 } 2590 return val; 2591 } 2592 2593 static inline jlong high_bit_set(BasicType bt) { 2594 switch (bt) { 2595 case T_BYTE: return 0x8080808080808080; 2596 case T_SHORT: return 0x8000800080008000; 2597 case T_INT: return 0x8000000080000000; 2598 case T_LONG: return 0x8000000000000000; 2599 default: 2600 ShouldNotReachHere(); 2601 return 0; 2602 } 2603 } 2604 2605 #ifndef PRODUCT 2606 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2607 st->print("nop \t# %d bytes pad for loops and calls", _count); 2608 } 2609 #endif 2610 2611 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2612 C2_MacroAssembler _masm(&cbuf); 2613 __ nop(_count); 2614 } 2615 2616 uint MachNopNode::size(PhaseRegAlloc*) const { 2617 return _count; 2618 } 2619 2620 #ifndef PRODUCT 2621 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2622 st->print("# breakpoint"); 2623 } 2624 #endif 2625 2626 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2627 C2_MacroAssembler _masm(&cbuf); 2628 __ int3(); 2629 } 2630 2631 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2632 return MachNode::size(ra_); 2633 } 2634 2635 %} 2636 2637 encode %{ 2638 2639 enc_class call_epilog %{ 2640 if (VerifyStackAtCalls) { 2641 // Check that stack depth is unchanged: find majik cookie on stack 2642 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2643 C2_MacroAssembler _masm(&cbuf); 2644 Label L; 2645 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2646 __ jccb(Assembler::equal, L); 2647 // Die if stack mismatch 2648 __ int3(); 2649 __ bind(L); 2650 } 2651 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) { 2652 C2_MacroAssembler _masm(&cbuf); 2653 if (!_method->signature()->returns_null_free_inline_type()) { 2654 // The last return value is not set by the callee but used to pass IsInit information to compiled code. 2655 // Search for the corresponding projection, get the register and emit code that initialized it. 2656 uint con = (tf()->range_cc()->cnt() - 1); 2657 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { 2658 ProjNode* proj = fast_out(i)->as_Proj(); 2659 if (proj->_con == con) { 2660 // Set IsInit if rax is non-null (a non-null value is returned buffered or scalarized) 2661 OptoReg::Name optoReg = ra_->get_reg_first(proj); 2662 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP)); 2663 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1; 2664 __ testq(rax, rax); 2665 __ set_byte_if_not_zero(toReg); 2666 __ movzbl(toReg, toReg); 2667 if (reg->is_stack()) { 2668 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size; 2669 __ movq(Address(rsp, st_off), toReg); 2670 } 2671 break; 2672 } 2673 } 2674 } 2675 if (return_value_is_used()) { 2676 // An inline type is returned as fields in multiple registers. 2677 // Rax either contains an oop if the inline type is buffered or a pointer 2678 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax 2679 // if the lowest bit is set to allow C2 to use the oop after null checking. 2680 // rax &= (rax & 1) - 1 2681 __ movptr(rscratch1, rax); 2682 __ andptr(rscratch1, 0x1); 2683 __ subptr(rscratch1, 0x1); 2684 __ andptr(rax, rscratch1); 2685 } 2686 } 2687 %} 2688 2689 %} 2690 2691 // Operands for bound floating pointer register arguments 2692 operand rxmm0() %{ 2693 constraint(ALLOC_IN_RC(xmm0_reg)); 2694 match(VecX); 2695 format%{%} 2696 interface(REG_INTER); 2697 %} 2698 2699 //----------OPERANDS----------------------------------------------------------- 2700 // Operand definitions must precede instruction definitions for correct parsing 2701 // in the ADLC because operands constitute user defined types which are used in 2702 // instruction definitions. 2703 2704 // Vectors 2705 2706 // Dummy generic vector class. Should be used for all vector operands. 2707 // Replaced with vec[SDXYZ] during post-selection pass. 2708 operand vec() %{ 2709 constraint(ALLOC_IN_RC(dynamic)); 2710 match(VecX); 2711 match(VecY); 2712 match(VecZ); 2713 match(VecS); 2714 match(VecD); 2715 2716 format %{ %} 2717 interface(REG_INTER); 2718 %} 2719 2720 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2721 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2722 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2723 // runtime code generation via reg_class_dynamic. 2724 operand legVec() %{ 2725 constraint(ALLOC_IN_RC(dynamic)); 2726 match(VecX); 2727 match(VecY); 2728 match(VecZ); 2729 match(VecS); 2730 match(VecD); 2731 2732 format %{ %} 2733 interface(REG_INTER); 2734 %} 2735 2736 // Replaces vec during post-selection cleanup. See above. 2737 operand vecS() %{ 2738 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2739 match(VecS); 2740 2741 format %{ %} 2742 interface(REG_INTER); 2743 %} 2744 2745 // Replaces legVec during post-selection cleanup. See above. 2746 operand legVecS() %{ 2747 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2748 match(VecS); 2749 2750 format %{ %} 2751 interface(REG_INTER); 2752 %} 2753 2754 // Replaces vec during post-selection cleanup. See above. 2755 operand vecD() %{ 2756 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2757 match(VecD); 2758 2759 format %{ %} 2760 interface(REG_INTER); 2761 %} 2762 2763 // Replaces legVec during post-selection cleanup. See above. 2764 operand legVecD() %{ 2765 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2766 match(VecD); 2767 2768 format %{ %} 2769 interface(REG_INTER); 2770 %} 2771 2772 // Replaces vec during post-selection cleanup. See above. 2773 operand vecX() %{ 2774 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2775 match(VecX); 2776 2777 format %{ %} 2778 interface(REG_INTER); 2779 %} 2780 2781 // Replaces legVec during post-selection cleanup. See above. 2782 operand legVecX() %{ 2783 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2784 match(VecX); 2785 2786 format %{ %} 2787 interface(REG_INTER); 2788 %} 2789 2790 // Replaces vec during post-selection cleanup. See above. 2791 operand vecY() %{ 2792 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2793 match(VecY); 2794 2795 format %{ %} 2796 interface(REG_INTER); 2797 %} 2798 2799 // Replaces legVec during post-selection cleanup. See above. 2800 operand legVecY() %{ 2801 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2802 match(VecY); 2803 2804 format %{ %} 2805 interface(REG_INTER); 2806 %} 2807 2808 // Replaces vec during post-selection cleanup. See above. 2809 operand vecZ() %{ 2810 constraint(ALLOC_IN_RC(vectorz_reg)); 2811 match(VecZ); 2812 2813 format %{ %} 2814 interface(REG_INTER); 2815 %} 2816 2817 // Replaces legVec during post-selection cleanup. See above. 2818 operand legVecZ() %{ 2819 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2820 match(VecZ); 2821 2822 format %{ %} 2823 interface(REG_INTER); 2824 %} 2825 2826 // Comparison Code for FP conditional move 2827 operand cmpOp_vcmppd() %{ 2828 match(Bool); 2829 2830 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2831 n->as_Bool()->_test._test != BoolTest::no_overflow); 2832 format %{ "" %} 2833 interface(COND_INTER) %{ 2834 equal (0x0, "eq"); 2835 less (0x1, "lt"); 2836 less_equal (0x2, "le"); 2837 not_equal (0xC, "ne"); 2838 greater_equal(0xD, "ge"); 2839 greater (0xE, "gt"); 2840 //TODO cannot compile (adlc breaks) without two next lines with error: 2841 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2842 // equal' for overflow. 2843 overflow (0x20, "o"); // not really supported by the instruction 2844 no_overflow (0x21, "no"); // not really supported by the instruction 2845 %} 2846 %} 2847 2848 2849 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2850 2851 // ============================================================================ 2852 2853 instruct ShouldNotReachHere() %{ 2854 match(Halt); 2855 format %{ "stop\t# ShouldNotReachHere" %} 2856 ins_encode %{ 2857 if (is_reachable()) { 2858 __ stop(_halt_reason); 2859 } 2860 %} 2861 ins_pipe(pipe_slow); 2862 %} 2863 2864 // =================================EVEX special=============================== 2865 // Existing partial implementation for post-loop multi-versioning computes 2866 // the mask corresponding to tail loop in K1 opmask register. This may then be 2867 // used for predicating instructions in loop body during last post-loop iteration. 2868 // TODO: Remove hard-coded K1 usage while fixing existing post-loop 2869 // multiversioning support. 2870 instruct setMask(rRegI dst, rRegI src, kReg_K1 mask) %{ 2871 predicate(PostLoopMultiversioning && Matcher::has_predicated_vectors()); 2872 match(Set dst (SetVectMaskI src)); 2873 effect(TEMP dst); 2874 format %{ "setvectmask $dst, $src" %} 2875 ins_encode %{ 2876 __ setvectmask($dst$$Register, $src$$Register, $mask$$KRegister); 2877 %} 2878 ins_pipe(pipe_slow); 2879 %} 2880 2881 // ============================================================================ 2882 2883 instruct addF_reg(regF dst, regF src) %{ 2884 predicate((UseSSE>=1) && (UseAVX == 0)); 2885 match(Set dst (AddF dst src)); 2886 2887 format %{ "addss $dst, $src" %} 2888 ins_cost(150); 2889 ins_encode %{ 2890 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2891 %} 2892 ins_pipe(pipe_slow); 2893 %} 2894 2895 instruct addF_mem(regF dst, memory src) %{ 2896 predicate((UseSSE>=1) && (UseAVX == 0)); 2897 match(Set dst (AddF dst (LoadF src))); 2898 2899 format %{ "addss $dst, $src" %} 2900 ins_cost(150); 2901 ins_encode %{ 2902 __ addss($dst$$XMMRegister, $src$$Address); 2903 %} 2904 ins_pipe(pipe_slow); 2905 %} 2906 2907 instruct addF_imm(regF dst, immF con) %{ 2908 predicate((UseSSE>=1) && (UseAVX == 0)); 2909 match(Set dst (AddF dst con)); 2910 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2911 ins_cost(150); 2912 ins_encode %{ 2913 __ addss($dst$$XMMRegister, $constantaddress($con)); 2914 %} 2915 ins_pipe(pipe_slow); 2916 %} 2917 2918 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2919 predicate(UseAVX > 0); 2920 match(Set dst (AddF src1 src2)); 2921 2922 format %{ "vaddss $dst, $src1, $src2" %} 2923 ins_cost(150); 2924 ins_encode %{ 2925 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2926 %} 2927 ins_pipe(pipe_slow); 2928 %} 2929 2930 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2931 predicate(UseAVX > 0); 2932 match(Set dst (AddF src1 (LoadF src2))); 2933 2934 format %{ "vaddss $dst, $src1, $src2" %} 2935 ins_cost(150); 2936 ins_encode %{ 2937 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2938 %} 2939 ins_pipe(pipe_slow); 2940 %} 2941 2942 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2943 predicate(UseAVX > 0); 2944 match(Set dst (AddF src con)); 2945 2946 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2947 ins_cost(150); 2948 ins_encode %{ 2949 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2950 %} 2951 ins_pipe(pipe_slow); 2952 %} 2953 2954 instruct addD_reg(regD dst, regD src) %{ 2955 predicate((UseSSE>=2) && (UseAVX == 0)); 2956 match(Set dst (AddD dst src)); 2957 2958 format %{ "addsd $dst, $src" %} 2959 ins_cost(150); 2960 ins_encode %{ 2961 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2962 %} 2963 ins_pipe(pipe_slow); 2964 %} 2965 2966 instruct addD_mem(regD dst, memory src) %{ 2967 predicate((UseSSE>=2) && (UseAVX == 0)); 2968 match(Set dst (AddD dst (LoadD src))); 2969 2970 format %{ "addsd $dst, $src" %} 2971 ins_cost(150); 2972 ins_encode %{ 2973 __ addsd($dst$$XMMRegister, $src$$Address); 2974 %} 2975 ins_pipe(pipe_slow); 2976 %} 2977 2978 instruct addD_imm(regD dst, immD con) %{ 2979 predicate((UseSSE>=2) && (UseAVX == 0)); 2980 match(Set dst (AddD dst con)); 2981 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2982 ins_cost(150); 2983 ins_encode %{ 2984 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2985 %} 2986 ins_pipe(pipe_slow); 2987 %} 2988 2989 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2990 predicate(UseAVX > 0); 2991 match(Set dst (AddD src1 src2)); 2992 2993 format %{ "vaddsd $dst, $src1, $src2" %} 2994 ins_cost(150); 2995 ins_encode %{ 2996 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2997 %} 2998 ins_pipe(pipe_slow); 2999 %} 3000 3001 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3002 predicate(UseAVX > 0); 3003 match(Set dst (AddD src1 (LoadD src2))); 3004 3005 format %{ "vaddsd $dst, $src1, $src2" %} 3006 ins_cost(150); 3007 ins_encode %{ 3008 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3009 %} 3010 ins_pipe(pipe_slow); 3011 %} 3012 3013 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3014 predicate(UseAVX > 0); 3015 match(Set dst (AddD src con)); 3016 3017 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3018 ins_cost(150); 3019 ins_encode %{ 3020 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3021 %} 3022 ins_pipe(pipe_slow); 3023 %} 3024 3025 instruct subF_reg(regF dst, regF src) %{ 3026 predicate((UseSSE>=1) && (UseAVX == 0)); 3027 match(Set dst (SubF dst src)); 3028 3029 format %{ "subss $dst, $src" %} 3030 ins_cost(150); 3031 ins_encode %{ 3032 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3033 %} 3034 ins_pipe(pipe_slow); 3035 %} 3036 3037 instruct subF_mem(regF dst, memory src) %{ 3038 predicate((UseSSE>=1) && (UseAVX == 0)); 3039 match(Set dst (SubF dst (LoadF src))); 3040 3041 format %{ "subss $dst, $src" %} 3042 ins_cost(150); 3043 ins_encode %{ 3044 __ subss($dst$$XMMRegister, $src$$Address); 3045 %} 3046 ins_pipe(pipe_slow); 3047 %} 3048 3049 instruct subF_imm(regF dst, immF con) %{ 3050 predicate((UseSSE>=1) && (UseAVX == 0)); 3051 match(Set dst (SubF dst con)); 3052 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3053 ins_cost(150); 3054 ins_encode %{ 3055 __ subss($dst$$XMMRegister, $constantaddress($con)); 3056 %} 3057 ins_pipe(pipe_slow); 3058 %} 3059 3060 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3061 predicate(UseAVX > 0); 3062 match(Set dst (SubF src1 src2)); 3063 3064 format %{ "vsubss $dst, $src1, $src2" %} 3065 ins_cost(150); 3066 ins_encode %{ 3067 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3068 %} 3069 ins_pipe(pipe_slow); 3070 %} 3071 3072 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3073 predicate(UseAVX > 0); 3074 match(Set dst (SubF src1 (LoadF src2))); 3075 3076 format %{ "vsubss $dst, $src1, $src2" %} 3077 ins_cost(150); 3078 ins_encode %{ 3079 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3080 %} 3081 ins_pipe(pipe_slow); 3082 %} 3083 3084 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3085 predicate(UseAVX > 0); 3086 match(Set dst (SubF src con)); 3087 3088 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3089 ins_cost(150); 3090 ins_encode %{ 3091 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3092 %} 3093 ins_pipe(pipe_slow); 3094 %} 3095 3096 instruct subD_reg(regD dst, regD src) %{ 3097 predicate((UseSSE>=2) && (UseAVX == 0)); 3098 match(Set dst (SubD dst src)); 3099 3100 format %{ "subsd $dst, $src" %} 3101 ins_cost(150); 3102 ins_encode %{ 3103 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3104 %} 3105 ins_pipe(pipe_slow); 3106 %} 3107 3108 instruct subD_mem(regD dst, memory src) %{ 3109 predicate((UseSSE>=2) && (UseAVX == 0)); 3110 match(Set dst (SubD dst (LoadD src))); 3111 3112 format %{ "subsd $dst, $src" %} 3113 ins_cost(150); 3114 ins_encode %{ 3115 __ subsd($dst$$XMMRegister, $src$$Address); 3116 %} 3117 ins_pipe(pipe_slow); 3118 %} 3119 3120 instruct subD_imm(regD dst, immD con) %{ 3121 predicate((UseSSE>=2) && (UseAVX == 0)); 3122 match(Set dst (SubD dst con)); 3123 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3124 ins_cost(150); 3125 ins_encode %{ 3126 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3127 %} 3128 ins_pipe(pipe_slow); 3129 %} 3130 3131 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3132 predicate(UseAVX > 0); 3133 match(Set dst (SubD src1 src2)); 3134 3135 format %{ "vsubsd $dst, $src1, $src2" %} 3136 ins_cost(150); 3137 ins_encode %{ 3138 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3139 %} 3140 ins_pipe(pipe_slow); 3141 %} 3142 3143 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3144 predicate(UseAVX > 0); 3145 match(Set dst (SubD src1 (LoadD src2))); 3146 3147 format %{ "vsubsd $dst, $src1, $src2" %} 3148 ins_cost(150); 3149 ins_encode %{ 3150 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3151 %} 3152 ins_pipe(pipe_slow); 3153 %} 3154 3155 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3156 predicate(UseAVX > 0); 3157 match(Set dst (SubD src con)); 3158 3159 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3160 ins_cost(150); 3161 ins_encode %{ 3162 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3163 %} 3164 ins_pipe(pipe_slow); 3165 %} 3166 3167 instruct mulF_reg(regF dst, regF src) %{ 3168 predicate((UseSSE>=1) && (UseAVX == 0)); 3169 match(Set dst (MulF dst src)); 3170 3171 format %{ "mulss $dst, $src" %} 3172 ins_cost(150); 3173 ins_encode %{ 3174 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3175 %} 3176 ins_pipe(pipe_slow); 3177 %} 3178 3179 instruct mulF_mem(regF dst, memory src) %{ 3180 predicate((UseSSE>=1) && (UseAVX == 0)); 3181 match(Set dst (MulF dst (LoadF src))); 3182 3183 format %{ "mulss $dst, $src" %} 3184 ins_cost(150); 3185 ins_encode %{ 3186 __ mulss($dst$$XMMRegister, $src$$Address); 3187 %} 3188 ins_pipe(pipe_slow); 3189 %} 3190 3191 instruct mulF_imm(regF dst, immF con) %{ 3192 predicate((UseSSE>=1) && (UseAVX == 0)); 3193 match(Set dst (MulF dst con)); 3194 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3195 ins_cost(150); 3196 ins_encode %{ 3197 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3198 %} 3199 ins_pipe(pipe_slow); 3200 %} 3201 3202 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3203 predicate(UseAVX > 0); 3204 match(Set dst (MulF src1 src2)); 3205 3206 format %{ "vmulss $dst, $src1, $src2" %} 3207 ins_cost(150); 3208 ins_encode %{ 3209 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3210 %} 3211 ins_pipe(pipe_slow); 3212 %} 3213 3214 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3215 predicate(UseAVX > 0); 3216 match(Set dst (MulF src1 (LoadF src2))); 3217 3218 format %{ "vmulss $dst, $src1, $src2" %} 3219 ins_cost(150); 3220 ins_encode %{ 3221 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3222 %} 3223 ins_pipe(pipe_slow); 3224 %} 3225 3226 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3227 predicate(UseAVX > 0); 3228 match(Set dst (MulF src con)); 3229 3230 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3231 ins_cost(150); 3232 ins_encode %{ 3233 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3234 %} 3235 ins_pipe(pipe_slow); 3236 %} 3237 3238 instruct mulD_reg(regD dst, regD src) %{ 3239 predicate((UseSSE>=2) && (UseAVX == 0)); 3240 match(Set dst (MulD dst src)); 3241 3242 format %{ "mulsd $dst, $src" %} 3243 ins_cost(150); 3244 ins_encode %{ 3245 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3246 %} 3247 ins_pipe(pipe_slow); 3248 %} 3249 3250 instruct mulD_mem(regD dst, memory src) %{ 3251 predicate((UseSSE>=2) && (UseAVX == 0)); 3252 match(Set dst (MulD dst (LoadD src))); 3253 3254 format %{ "mulsd $dst, $src" %} 3255 ins_cost(150); 3256 ins_encode %{ 3257 __ mulsd($dst$$XMMRegister, $src$$Address); 3258 %} 3259 ins_pipe(pipe_slow); 3260 %} 3261 3262 instruct mulD_imm(regD dst, immD con) %{ 3263 predicate((UseSSE>=2) && (UseAVX == 0)); 3264 match(Set dst (MulD dst con)); 3265 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3266 ins_cost(150); 3267 ins_encode %{ 3268 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3269 %} 3270 ins_pipe(pipe_slow); 3271 %} 3272 3273 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3274 predicate(UseAVX > 0); 3275 match(Set dst (MulD src1 src2)); 3276 3277 format %{ "vmulsd $dst, $src1, $src2" %} 3278 ins_cost(150); 3279 ins_encode %{ 3280 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3281 %} 3282 ins_pipe(pipe_slow); 3283 %} 3284 3285 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3286 predicate(UseAVX > 0); 3287 match(Set dst (MulD src1 (LoadD src2))); 3288 3289 format %{ "vmulsd $dst, $src1, $src2" %} 3290 ins_cost(150); 3291 ins_encode %{ 3292 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3293 %} 3294 ins_pipe(pipe_slow); 3295 %} 3296 3297 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3298 predicate(UseAVX > 0); 3299 match(Set dst (MulD src con)); 3300 3301 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3302 ins_cost(150); 3303 ins_encode %{ 3304 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3305 %} 3306 ins_pipe(pipe_slow); 3307 %} 3308 3309 instruct divF_reg(regF dst, regF src) %{ 3310 predicate((UseSSE>=1) && (UseAVX == 0)); 3311 match(Set dst (DivF dst src)); 3312 3313 format %{ "divss $dst, $src" %} 3314 ins_cost(150); 3315 ins_encode %{ 3316 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3317 %} 3318 ins_pipe(pipe_slow); 3319 %} 3320 3321 instruct divF_mem(regF dst, memory src) %{ 3322 predicate((UseSSE>=1) && (UseAVX == 0)); 3323 match(Set dst (DivF dst (LoadF src))); 3324 3325 format %{ "divss $dst, $src" %} 3326 ins_cost(150); 3327 ins_encode %{ 3328 __ divss($dst$$XMMRegister, $src$$Address); 3329 %} 3330 ins_pipe(pipe_slow); 3331 %} 3332 3333 instruct divF_imm(regF dst, immF con) %{ 3334 predicate((UseSSE>=1) && (UseAVX == 0)); 3335 match(Set dst (DivF dst con)); 3336 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3337 ins_cost(150); 3338 ins_encode %{ 3339 __ divss($dst$$XMMRegister, $constantaddress($con)); 3340 %} 3341 ins_pipe(pipe_slow); 3342 %} 3343 3344 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3345 predicate(UseAVX > 0); 3346 match(Set dst (DivF src1 src2)); 3347 3348 format %{ "vdivss $dst, $src1, $src2" %} 3349 ins_cost(150); 3350 ins_encode %{ 3351 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3352 %} 3353 ins_pipe(pipe_slow); 3354 %} 3355 3356 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3357 predicate(UseAVX > 0); 3358 match(Set dst (DivF src1 (LoadF src2))); 3359 3360 format %{ "vdivss $dst, $src1, $src2" %} 3361 ins_cost(150); 3362 ins_encode %{ 3363 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3364 %} 3365 ins_pipe(pipe_slow); 3366 %} 3367 3368 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3369 predicate(UseAVX > 0); 3370 match(Set dst (DivF src con)); 3371 3372 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3373 ins_cost(150); 3374 ins_encode %{ 3375 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3376 %} 3377 ins_pipe(pipe_slow); 3378 %} 3379 3380 instruct divD_reg(regD dst, regD src) %{ 3381 predicate((UseSSE>=2) && (UseAVX == 0)); 3382 match(Set dst (DivD dst src)); 3383 3384 format %{ "divsd $dst, $src" %} 3385 ins_cost(150); 3386 ins_encode %{ 3387 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3388 %} 3389 ins_pipe(pipe_slow); 3390 %} 3391 3392 instruct divD_mem(regD dst, memory src) %{ 3393 predicate((UseSSE>=2) && (UseAVX == 0)); 3394 match(Set dst (DivD dst (LoadD src))); 3395 3396 format %{ "divsd $dst, $src" %} 3397 ins_cost(150); 3398 ins_encode %{ 3399 __ divsd($dst$$XMMRegister, $src$$Address); 3400 %} 3401 ins_pipe(pipe_slow); 3402 %} 3403 3404 instruct divD_imm(regD dst, immD con) %{ 3405 predicate((UseSSE>=2) && (UseAVX == 0)); 3406 match(Set dst (DivD dst con)); 3407 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3408 ins_cost(150); 3409 ins_encode %{ 3410 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3411 %} 3412 ins_pipe(pipe_slow); 3413 %} 3414 3415 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3416 predicate(UseAVX > 0); 3417 match(Set dst (DivD src1 src2)); 3418 3419 format %{ "vdivsd $dst, $src1, $src2" %} 3420 ins_cost(150); 3421 ins_encode %{ 3422 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3423 %} 3424 ins_pipe(pipe_slow); 3425 %} 3426 3427 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3428 predicate(UseAVX > 0); 3429 match(Set dst (DivD src1 (LoadD src2))); 3430 3431 format %{ "vdivsd $dst, $src1, $src2" %} 3432 ins_cost(150); 3433 ins_encode %{ 3434 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3435 %} 3436 ins_pipe(pipe_slow); 3437 %} 3438 3439 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3440 predicate(UseAVX > 0); 3441 match(Set dst (DivD src con)); 3442 3443 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3444 ins_cost(150); 3445 ins_encode %{ 3446 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3447 %} 3448 ins_pipe(pipe_slow); 3449 %} 3450 3451 instruct absF_reg(regF dst) %{ 3452 predicate((UseSSE>=1) && (UseAVX == 0)); 3453 match(Set dst (AbsF dst)); 3454 ins_cost(150); 3455 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3456 ins_encode %{ 3457 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3458 %} 3459 ins_pipe(pipe_slow); 3460 %} 3461 3462 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3463 predicate(UseAVX > 0); 3464 match(Set dst (AbsF src)); 3465 ins_cost(150); 3466 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3467 ins_encode %{ 3468 int vlen_enc = Assembler::AVX_128bit; 3469 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3470 ExternalAddress(float_signmask()), vlen_enc); 3471 %} 3472 ins_pipe(pipe_slow); 3473 %} 3474 3475 instruct absD_reg(regD dst) %{ 3476 predicate((UseSSE>=2) && (UseAVX == 0)); 3477 match(Set dst (AbsD dst)); 3478 ins_cost(150); 3479 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3480 "# abs double by sign masking" %} 3481 ins_encode %{ 3482 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3483 %} 3484 ins_pipe(pipe_slow); 3485 %} 3486 3487 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3488 predicate(UseAVX > 0); 3489 match(Set dst (AbsD src)); 3490 ins_cost(150); 3491 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3492 "# abs double by sign masking" %} 3493 ins_encode %{ 3494 int vlen_enc = Assembler::AVX_128bit; 3495 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3496 ExternalAddress(double_signmask()), vlen_enc); 3497 %} 3498 ins_pipe(pipe_slow); 3499 %} 3500 3501 instruct negF_reg(regF dst) %{ 3502 predicate((UseSSE>=1) && (UseAVX == 0)); 3503 match(Set dst (NegF dst)); 3504 ins_cost(150); 3505 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3506 ins_encode %{ 3507 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3508 %} 3509 ins_pipe(pipe_slow); 3510 %} 3511 3512 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3513 predicate(UseAVX > 0); 3514 match(Set dst (NegF src)); 3515 ins_cost(150); 3516 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3517 ins_encode %{ 3518 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3519 ExternalAddress(float_signflip())); 3520 %} 3521 ins_pipe(pipe_slow); 3522 %} 3523 3524 instruct negD_reg(regD dst) %{ 3525 predicate((UseSSE>=2) && (UseAVX == 0)); 3526 match(Set dst (NegD dst)); 3527 ins_cost(150); 3528 format %{ "xorpd $dst, [0x8000000000000000]\t" 3529 "# neg double by sign flipping" %} 3530 ins_encode %{ 3531 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3532 %} 3533 ins_pipe(pipe_slow); 3534 %} 3535 3536 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3537 predicate(UseAVX > 0); 3538 match(Set dst (NegD src)); 3539 ins_cost(150); 3540 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3541 "# neg double by sign flipping" %} 3542 ins_encode %{ 3543 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3544 ExternalAddress(double_signflip())); 3545 %} 3546 ins_pipe(pipe_slow); 3547 %} 3548 3549 // sqrtss instruction needs destination register to be pre initialized for best performance 3550 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3551 instruct sqrtF_reg(regF dst) %{ 3552 predicate(UseSSE>=1); 3553 match(Set dst (SqrtF dst)); 3554 format %{ "sqrtss $dst, $dst" %} 3555 ins_encode %{ 3556 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3557 %} 3558 ins_pipe(pipe_slow); 3559 %} 3560 3561 // sqrtsd instruction needs destination register to be pre initialized for best performance 3562 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3563 instruct sqrtD_reg(regD dst) %{ 3564 predicate(UseSSE>=2); 3565 match(Set dst (SqrtD dst)); 3566 format %{ "sqrtsd $dst, $dst" %} 3567 ins_encode %{ 3568 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3569 %} 3570 ins_pipe(pipe_slow); 3571 %} 3572 3573 3574 // ---------------------------------------- VectorReinterpret ------------------------------------ 3575 instruct reinterpret_mask(kReg dst) %{ 3576 predicate(n->bottom_type()->isa_vectmask() && 3577 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3578 match(Set dst (VectorReinterpret dst)); 3579 ins_cost(125); 3580 format %{ "vector_reinterpret $dst\t!" %} 3581 ins_encode %{ 3582 // empty 3583 %} 3584 ins_pipe( pipe_slow ); 3585 %} 3586 3587 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3588 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3589 n->bottom_type()->isa_vectmask() && 3590 n->in(1)->bottom_type()->isa_vectmask() && 3591 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3592 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3593 match(Set dst (VectorReinterpret src)); 3594 effect(TEMP xtmp); 3595 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3596 ins_encode %{ 3597 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3598 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3599 assert(src_sz == dst_sz , "src and dst size mismatch"); 3600 int vlen_enc = vector_length_encoding(src_sz); 3601 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3602 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3603 %} 3604 ins_pipe( pipe_slow ); 3605 %} 3606 3607 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3608 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3609 n->bottom_type()->isa_vectmask() && 3610 n->in(1)->bottom_type()->isa_vectmask() && 3611 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3612 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3613 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3614 match(Set dst (VectorReinterpret src)); 3615 effect(TEMP xtmp); 3616 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3617 ins_encode %{ 3618 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3619 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3620 assert(src_sz == dst_sz , "src and dst size mismatch"); 3621 int vlen_enc = vector_length_encoding(src_sz); 3622 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3623 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3624 %} 3625 ins_pipe( pipe_slow ); 3626 %} 3627 3628 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3629 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3630 n->bottom_type()->isa_vectmask() && 3631 n->in(1)->bottom_type()->isa_vectmask() && 3632 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3633 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3634 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3635 match(Set dst (VectorReinterpret src)); 3636 effect(TEMP xtmp); 3637 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3638 ins_encode %{ 3639 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3640 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3641 assert(src_sz == dst_sz , "src and dst size mismatch"); 3642 int vlen_enc = vector_length_encoding(src_sz); 3643 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3644 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3645 %} 3646 ins_pipe( pipe_slow ); 3647 %} 3648 3649 instruct reinterpret(vec dst) %{ 3650 predicate(!n->bottom_type()->isa_vectmask() && 3651 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3652 match(Set dst (VectorReinterpret dst)); 3653 ins_cost(125); 3654 format %{ "vector_reinterpret $dst\t!" %} 3655 ins_encode %{ 3656 // empty 3657 %} 3658 ins_pipe( pipe_slow ); 3659 %} 3660 3661 instruct reinterpret_expand(vec dst, vec src, rRegP scratch) %{ 3662 predicate(UseAVX == 0 && 3663 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3664 match(Set dst (VectorReinterpret src)); 3665 ins_cost(125); 3666 effect(TEMP dst, TEMP scratch); 3667 format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %} 3668 ins_encode %{ 3669 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3670 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3671 3672 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3673 if (src_vlen_in_bytes == 4) { 3674 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register); 3675 } else { 3676 assert(src_vlen_in_bytes == 8, ""); 3677 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), $scratch$$Register); 3678 } 3679 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3680 %} 3681 ins_pipe( pipe_slow ); 3682 %} 3683 3684 instruct vreinterpret_expand4(legVec dst, vec src, rRegP scratch) %{ 3685 predicate(UseAVX > 0 && 3686 !n->bottom_type()->isa_vectmask() && 3687 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3688 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3689 match(Set dst (VectorReinterpret src)); 3690 ins_cost(125); 3691 effect(TEMP scratch); 3692 format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %} 3693 ins_encode %{ 3694 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, $scratch$$Register); 3695 %} 3696 ins_pipe( pipe_slow ); 3697 %} 3698 3699 3700 instruct vreinterpret_expand(legVec dst, vec src) %{ 3701 predicate(UseAVX > 0 && 3702 !n->bottom_type()->isa_vectmask() && 3703 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3704 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3705 match(Set dst (VectorReinterpret src)); 3706 ins_cost(125); 3707 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3708 ins_encode %{ 3709 switch (Matcher::vector_length_in_bytes(this, $src)) { 3710 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3711 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3712 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3713 default: ShouldNotReachHere(); 3714 } 3715 %} 3716 ins_pipe( pipe_slow ); 3717 %} 3718 3719 instruct reinterpret_shrink(vec dst, legVec src) %{ 3720 predicate(!n->bottom_type()->isa_vectmask() && 3721 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3722 match(Set dst (VectorReinterpret src)); 3723 ins_cost(125); 3724 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3725 ins_encode %{ 3726 switch (Matcher::vector_length_in_bytes(this)) { 3727 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3728 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3729 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3730 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3731 default: ShouldNotReachHere(); 3732 } 3733 %} 3734 ins_pipe( pipe_slow ); 3735 %} 3736 3737 // ---------------------------------------------------------------------------------------------------- 3738 3739 #ifdef _LP64 3740 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3741 match(Set dst (RoundDoubleMode src rmode)); 3742 format %{ "roundsd $dst,$src" %} 3743 ins_cost(150); 3744 ins_encode %{ 3745 assert(UseSSE >= 4, "required"); 3746 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3747 %} 3748 ins_pipe(pipe_slow); 3749 %} 3750 3751 instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{ 3752 match(Set dst (RoundDoubleMode (LoadD src) rmode)); 3753 format %{ "roundsd $dst,$src" %} 3754 ins_cost(150); 3755 ins_encode %{ 3756 assert(UseSSE >= 4, "required"); 3757 __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant); 3758 %} 3759 ins_pipe(pipe_slow); 3760 %} 3761 3762 instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{ 3763 match(Set dst (RoundDoubleMode con rmode)); 3764 effect(TEMP scratch_reg); 3765 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3766 ins_cost(150); 3767 ins_encode %{ 3768 assert(UseSSE >= 4, "required"); 3769 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, $scratch_reg$$Register); 3770 %} 3771 ins_pipe(pipe_slow); 3772 %} 3773 3774 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3775 predicate(Matcher::vector_length(n) < 8); 3776 match(Set dst (RoundDoubleModeV src rmode)); 3777 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3778 ins_encode %{ 3779 assert(UseAVX > 0, "required"); 3780 int vlen_enc = vector_length_encoding(this); 3781 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3782 %} 3783 ins_pipe( pipe_slow ); 3784 %} 3785 3786 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3787 predicate(Matcher::vector_length(n) == 8); 3788 match(Set dst (RoundDoubleModeV src rmode)); 3789 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3790 ins_encode %{ 3791 assert(UseAVX > 2, "required"); 3792 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3793 %} 3794 ins_pipe( pipe_slow ); 3795 %} 3796 3797 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3798 predicate(Matcher::vector_length(n) < 8); 3799 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3800 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3801 ins_encode %{ 3802 assert(UseAVX > 0, "required"); 3803 int vlen_enc = vector_length_encoding(this); 3804 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3805 %} 3806 ins_pipe( pipe_slow ); 3807 %} 3808 3809 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3810 predicate(Matcher::vector_length(n) == 8); 3811 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3812 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3813 ins_encode %{ 3814 assert(UseAVX > 2, "required"); 3815 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3816 %} 3817 ins_pipe( pipe_slow ); 3818 %} 3819 #endif // _LP64 3820 3821 instruct onspinwait() %{ 3822 match(OnSpinWait); 3823 ins_cost(200); 3824 3825 format %{ 3826 $$template 3827 $$emit$$"pause\t! membar_onspinwait" 3828 %} 3829 ins_encode %{ 3830 __ pause(); 3831 %} 3832 ins_pipe(pipe_slow); 3833 %} 3834 3835 // a * b + c 3836 instruct fmaD_reg(regD a, regD b, regD c) %{ 3837 predicate(UseFMA); 3838 match(Set c (FmaD c (Binary a b))); 3839 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3840 ins_cost(150); 3841 ins_encode %{ 3842 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3843 %} 3844 ins_pipe( pipe_slow ); 3845 %} 3846 3847 // a * b + c 3848 instruct fmaF_reg(regF a, regF b, regF c) %{ 3849 predicate(UseFMA); 3850 match(Set c (FmaF c (Binary a b))); 3851 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3852 ins_cost(150); 3853 ins_encode %{ 3854 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3855 %} 3856 ins_pipe( pipe_slow ); 3857 %} 3858 3859 // ====================VECTOR INSTRUCTIONS===================================== 3860 3861 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3862 instruct MoveVec2Leg(legVec dst, vec src) %{ 3863 match(Set dst src); 3864 format %{ "" %} 3865 ins_encode %{ 3866 ShouldNotReachHere(); 3867 %} 3868 ins_pipe( fpu_reg_reg ); 3869 %} 3870 3871 instruct MoveLeg2Vec(vec dst, legVec src) %{ 3872 match(Set dst src); 3873 format %{ "" %} 3874 ins_encode %{ 3875 ShouldNotReachHere(); 3876 %} 3877 ins_pipe( fpu_reg_reg ); 3878 %} 3879 3880 // ============================================================================ 3881 3882 // Load vectors generic operand pattern 3883 instruct loadV(vec dst, memory mem) %{ 3884 match(Set dst (LoadVector mem)); 3885 ins_cost(125); 3886 format %{ "load_vector $dst,$mem" %} 3887 ins_encode %{ 3888 __ load_vector($dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 3889 %} 3890 ins_pipe( pipe_slow ); 3891 %} 3892 3893 // Store vectors generic operand pattern. 3894 instruct storeV(memory mem, vec src) %{ 3895 match(Set mem (StoreVector mem src)); 3896 ins_cost(145); 3897 format %{ "store_vector $mem,$src\n\t" %} 3898 ins_encode %{ 3899 switch (Matcher::vector_length_in_bytes(this, $src)) { 3900 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 3901 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 3902 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 3903 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 3904 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 3905 default: ShouldNotReachHere(); 3906 } 3907 %} 3908 ins_pipe( pipe_slow ); 3909 %} 3910 3911 // ---------------------------------------- Gather ------------------------------------ 3912 3913 // Gather INT, LONG, FLOAT, DOUBLE 3914 3915 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 3916 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 3917 match(Set dst (LoadVectorGather mem idx)); 3918 effect(TEMP dst, TEMP tmp, TEMP mask); 3919 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 3920 ins_encode %{ 3921 assert(UseAVX >= 2, "sanity"); 3922 3923 int vlen_enc = vector_length_encoding(this); 3924 BasicType elem_bt = Matcher::vector_element_basic_type(this); 3925 3926 assert(Matcher::vector_length_in_bytes(this) >= 16, "sanity"); 3927 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3928 3929 if (vlen_enc == Assembler::AVX_128bit) { 3930 __ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set())); 3931 } else { 3932 __ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set())); 3933 } 3934 __ lea($tmp$$Register, $mem$$Address); 3935 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 3936 %} 3937 ins_pipe( pipe_slow ); 3938 %} 3939 3940 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 3941 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 3942 match(Set dst (LoadVectorGather mem idx)); 3943 effect(TEMP dst, TEMP tmp, TEMP ktmp); 3944 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 3945 ins_encode %{ 3946 assert(UseAVX > 2, "sanity"); 3947 3948 int vlen_enc = vector_length_encoding(this); 3949 BasicType elem_bt = Matcher::vector_element_basic_type(this); 3950 3951 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3952 3953 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register); 3954 __ lea($tmp$$Register, $mem$$Address); 3955 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 3956 %} 3957 ins_pipe( pipe_slow ); 3958 %} 3959 3960 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 3961 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 3962 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 3963 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 3964 ins_encode %{ 3965 assert(UseAVX > 2, "sanity"); 3966 int vlen_enc = vector_length_encoding(this); 3967 BasicType elem_bt = Matcher::vector_element_basic_type(this); 3968 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3969 // Note: Since gather instruction partially updates the opmask register used 3970 // for predication hense moving mask operand to a temporary. 3971 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 3972 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3973 __ lea($tmp$$Register, $mem$$Address); 3974 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 3975 %} 3976 ins_pipe( pipe_slow ); 3977 %} 3978 // ====================Scatter======================================= 3979 3980 // Scatter INT, LONG, FLOAT, DOUBLE 3981 3982 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 3983 predicate(UseAVX > 2); 3984 match(Set mem (StoreVectorScatter mem (Binary src idx))); 3985 effect(TEMP tmp, TEMP ktmp); 3986 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 3987 ins_encode %{ 3988 int vlen_enc = vector_length_encoding(this, $src); 3989 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 3990 3991 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 3992 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3993 3994 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register); 3995 __ lea($tmp$$Register, $mem$$Address); 3996 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 3997 %} 3998 ins_pipe( pipe_slow ); 3999 %} 4000 4001 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4002 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4003 effect(TEMP tmp, TEMP ktmp); 4004 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4005 ins_encode %{ 4006 int vlen_enc = vector_length_encoding(this, $src); 4007 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4008 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4009 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4010 // Note: Since scatter instruction partially updates the opmask register used 4011 // for predication hense moving mask operand to a temporary. 4012 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4013 __ lea($tmp$$Register, $mem$$Address); 4014 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4015 %} 4016 ins_pipe( pipe_slow ); 4017 %} 4018 4019 // ====================REPLICATE======================================= 4020 4021 // Replicate byte scalar to be vector 4022 instruct ReplB_reg(vec dst, rRegI src) %{ 4023 match(Set dst (ReplicateB src)); 4024 format %{ "replicateB $dst,$src" %} 4025 ins_encode %{ 4026 uint vlen = Matcher::vector_length(this); 4027 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4028 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4029 int vlen_enc = vector_length_encoding(this); 4030 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4031 } else if (VM_Version::supports_avx2()) { 4032 int vlen_enc = vector_length_encoding(this); 4033 __ movdl($dst$$XMMRegister, $src$$Register); 4034 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4035 } else { 4036 __ movdl($dst$$XMMRegister, $src$$Register); 4037 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4038 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4039 if (vlen >= 16) { 4040 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4041 if (vlen >= 32) { 4042 assert(vlen == 32, "sanity"); 4043 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4044 } 4045 } 4046 } 4047 %} 4048 ins_pipe( pipe_slow ); 4049 %} 4050 4051 instruct ReplB_mem(vec dst, memory mem) %{ 4052 predicate(VM_Version::supports_avx2()); 4053 match(Set dst (ReplicateB (LoadB mem))); 4054 format %{ "replicateB $dst,$mem" %} 4055 ins_encode %{ 4056 int vlen_enc = vector_length_encoding(this); 4057 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4058 %} 4059 ins_pipe( pipe_slow ); 4060 %} 4061 4062 instruct ReplB_imm(vec dst, immI con) %{ 4063 match(Set dst (ReplicateB con)); 4064 format %{ "replicateB $dst,$con" %} 4065 ins_encode %{ 4066 InternalAddress addr = $constantaddress(T_BYTE, vreplicate_imm(T_BYTE, $con$$constant, Matcher::vector_length(this))); 4067 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4068 %} 4069 ins_pipe( pipe_slow ); 4070 %} 4071 4072 // ====================ReplicateS======================================= 4073 4074 instruct ReplS_reg(vec dst, rRegI src) %{ 4075 match(Set dst (ReplicateS src)); 4076 format %{ "replicateS $dst,$src" %} 4077 ins_encode %{ 4078 uint vlen = Matcher::vector_length(this); 4079 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4080 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4081 int vlen_enc = vector_length_encoding(this); 4082 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4083 } else if (VM_Version::supports_avx2()) { 4084 int vlen_enc = vector_length_encoding(this); 4085 __ movdl($dst$$XMMRegister, $src$$Register); 4086 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4087 } else { 4088 __ movdl($dst$$XMMRegister, $src$$Register); 4089 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4090 if (vlen >= 8) { 4091 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4092 if (vlen >= 16) { 4093 assert(vlen == 16, "sanity"); 4094 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4095 } 4096 } 4097 } 4098 %} 4099 ins_pipe( pipe_slow ); 4100 %} 4101 4102 instruct ReplS_mem(vec dst, memory mem) %{ 4103 predicate(VM_Version::supports_avx2()); 4104 match(Set dst (ReplicateS (LoadS mem))); 4105 format %{ "replicateS $dst,$mem" %} 4106 ins_encode %{ 4107 int vlen_enc = vector_length_encoding(this); 4108 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4109 %} 4110 ins_pipe( pipe_slow ); 4111 %} 4112 4113 instruct ReplS_imm(vec dst, immI con) %{ 4114 match(Set dst (ReplicateS con)); 4115 format %{ "replicateS $dst,$con" %} 4116 ins_encode %{ 4117 InternalAddress addr = $constantaddress(T_SHORT, vreplicate_imm(T_SHORT, $con$$constant, Matcher::vector_length(this))); 4118 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4119 %} 4120 ins_pipe( pipe_slow ); 4121 %} 4122 4123 // ====================ReplicateI======================================= 4124 4125 instruct ReplI_reg(vec dst, rRegI src) %{ 4126 match(Set dst (ReplicateI src)); 4127 format %{ "replicateI $dst,$src" %} 4128 ins_encode %{ 4129 uint vlen = Matcher::vector_length(this); 4130 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4131 int vlen_enc = vector_length_encoding(this); 4132 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4133 } else if (VM_Version::supports_avx2()) { 4134 int vlen_enc = vector_length_encoding(this); 4135 __ movdl($dst$$XMMRegister, $src$$Register); 4136 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4137 } else { 4138 __ movdl($dst$$XMMRegister, $src$$Register); 4139 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4140 if (vlen >= 8) { 4141 assert(vlen == 8, "sanity"); 4142 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4143 } 4144 } 4145 %} 4146 ins_pipe( pipe_slow ); 4147 %} 4148 4149 instruct ReplI_mem(vec dst, memory mem) %{ 4150 match(Set dst (ReplicateI (LoadI mem))); 4151 format %{ "replicateI $dst,$mem" %} 4152 ins_encode %{ 4153 uint vlen = Matcher::vector_length(this); 4154 if (vlen <= 4) { 4155 __ movdl($dst$$XMMRegister, $mem$$Address); 4156 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4157 } else { 4158 assert(VM_Version::supports_avx2(), "sanity"); 4159 int vlen_enc = vector_length_encoding(this); 4160 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4161 } 4162 %} 4163 ins_pipe( pipe_slow ); 4164 %} 4165 4166 instruct ReplI_imm(vec dst, immI con) %{ 4167 match(Set dst (ReplicateI con)); 4168 format %{ "replicateI $dst,$con" %} 4169 ins_encode %{ 4170 InternalAddress addr = $constantaddress(T_INT, vreplicate_imm(T_INT, $con$$constant, Matcher::vector_length(this))); 4171 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4172 %} 4173 ins_pipe( pipe_slow ); 4174 %} 4175 4176 // Replicate scalar zero to be vector 4177 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4178 match(Set dst (ReplicateB zero)); 4179 match(Set dst (ReplicateS zero)); 4180 match(Set dst (ReplicateI zero)); 4181 format %{ "replicateI $dst,$zero" %} 4182 ins_encode %{ 4183 uint vsize = Matcher::vector_length_in_bytes(this); 4184 if (vsize <= 16) { 4185 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4186 } else { 4187 int vlen_enc = vector_length_encoding(this); 4188 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4189 } 4190 %} 4191 ins_pipe( fpu_reg_reg ); 4192 %} 4193 4194 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4195 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) >= 16); 4196 match(Set dst (ReplicateB con)); 4197 match(Set dst (ReplicateS con)); 4198 match(Set dst (ReplicateI con)); 4199 effect(TEMP dst); 4200 format %{ "vallones $dst" %} 4201 ins_encode %{ 4202 int vector_len = vector_length_encoding(this); 4203 __ vallones($dst$$XMMRegister, vector_len); 4204 %} 4205 ins_pipe( pipe_slow ); 4206 %} 4207 4208 // ====================ReplicateL======================================= 4209 4210 #ifdef _LP64 4211 // Replicate long (8 byte) scalar to be vector 4212 instruct ReplL_reg(vec dst, rRegL src) %{ 4213 match(Set dst (ReplicateL src)); 4214 format %{ "replicateL $dst,$src" %} 4215 ins_encode %{ 4216 uint vlen = Matcher::vector_length(this); 4217 if (vlen == 2) { 4218 __ movdq($dst$$XMMRegister, $src$$Register); 4219 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4220 } else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4221 int vlen_enc = vector_length_encoding(this); 4222 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4223 } else if (VM_Version::supports_avx2()) { 4224 assert(vlen == 4, "sanity"); 4225 int vlen_enc = vector_length_encoding(this); 4226 __ movdq($dst$$XMMRegister, $src$$Register); 4227 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4228 } else { 4229 assert(vlen == 4, "sanity"); 4230 __ movdq($dst$$XMMRegister, $src$$Register); 4231 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4232 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4233 } 4234 %} 4235 ins_pipe( pipe_slow ); 4236 %} 4237 #else // _LP64 4238 // Replicate long (8 byte) scalar to be vector 4239 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 4240 predicate(Matcher::vector_length(n) <= 4); 4241 match(Set dst (ReplicateL src)); 4242 effect(TEMP dst, USE src, TEMP tmp); 4243 format %{ "replicateL $dst,$src" %} 4244 ins_encode %{ 4245 uint vlen = Matcher::vector_length(this); 4246 if (vlen == 2) { 4247 __ movdl($dst$$XMMRegister, $src$$Register); 4248 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4249 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4250 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4251 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4252 int vlen_enc = Assembler::AVX_256bit; 4253 __ movdl($dst$$XMMRegister, $src$$Register); 4254 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4255 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4256 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4257 } else { 4258 __ movdl($dst$$XMMRegister, $src$$Register); 4259 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4260 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4261 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4262 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4263 } 4264 %} 4265 ins_pipe( pipe_slow ); 4266 %} 4267 4268 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 4269 predicate(Matcher::vector_length(n) == 8); 4270 match(Set dst (ReplicateL src)); 4271 effect(TEMP dst, USE src, TEMP tmp); 4272 format %{ "replicateL $dst,$src" %} 4273 ins_encode %{ 4274 if (VM_Version::supports_avx512vl()) { 4275 __ movdl($dst$$XMMRegister, $src$$Register); 4276 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4277 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4278 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4279 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4280 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4281 } else { 4282 int vlen_enc = Assembler::AVX_512bit; 4283 __ movdl($dst$$XMMRegister, $src$$Register); 4284 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4285 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4286 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4287 } 4288 %} 4289 ins_pipe( pipe_slow ); 4290 %} 4291 #endif // _LP64 4292 4293 instruct ReplL_mem(vec dst, memory mem) %{ 4294 match(Set dst (ReplicateL (LoadL mem))); 4295 format %{ "replicateL $dst,$mem" %} 4296 ins_encode %{ 4297 uint vlen = Matcher::vector_length(this); 4298 if (vlen == 2) { 4299 __ movq($dst$$XMMRegister, $mem$$Address); 4300 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4301 } else { 4302 assert(VM_Version::supports_avx2(), "sanity"); 4303 int vlen_enc = vector_length_encoding(this); 4304 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4305 } 4306 %} 4307 ins_pipe( pipe_slow ); 4308 %} 4309 4310 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4311 instruct ReplL_imm(vec dst, immL con) %{ 4312 match(Set dst (ReplicateL con)); 4313 format %{ "replicateL $dst,$con" %} 4314 ins_encode %{ 4315 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, Matcher::vector_length(this))); 4316 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4317 %} 4318 ins_pipe( pipe_slow ); 4319 %} 4320 4321 instruct ReplL_zero(vec dst, immL0 zero) %{ 4322 match(Set dst (ReplicateL zero)); 4323 format %{ "replicateL $dst,$zero" %} 4324 ins_encode %{ 4325 int vlen = Matcher::vector_length(this); 4326 if (vlen == 2) { 4327 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4328 } else { 4329 int vlen_enc = vector_length_encoding(this); 4330 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4331 } 4332 %} 4333 ins_pipe( fpu_reg_reg ); 4334 %} 4335 4336 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4337 predicate(UseAVX > 0); 4338 match(Set dst (ReplicateL con)); 4339 effect(TEMP dst); 4340 format %{ "vallones $dst" %} 4341 ins_encode %{ 4342 int vector_len = vector_length_encoding(this); 4343 __ vallones($dst$$XMMRegister, vector_len); 4344 %} 4345 ins_pipe( pipe_slow ); 4346 %} 4347 4348 // ====================ReplicateF======================================= 4349 4350 instruct ReplF_reg(vec dst, vlRegF src) %{ 4351 match(Set dst (ReplicateF src)); 4352 format %{ "replicateF $dst,$src" %} 4353 ins_encode %{ 4354 uint vlen = Matcher::vector_length(this); 4355 if (vlen <= 4) { 4356 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4357 } else if (VM_Version::supports_avx2()) { 4358 int vlen_enc = vector_length_encoding(this); 4359 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4360 } else { 4361 assert(vlen == 8, "sanity"); 4362 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4363 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4364 } 4365 %} 4366 ins_pipe( pipe_slow ); 4367 %} 4368 4369 instruct ReplF_mem(vec dst, memory mem) %{ 4370 match(Set dst (ReplicateF (LoadF mem))); 4371 format %{ "replicateF $dst,$mem" %} 4372 ins_encode %{ 4373 uint vlen = Matcher::vector_length(this); 4374 if (vlen <= 4) { 4375 __ movdl($dst$$XMMRegister, $mem$$Address); 4376 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4377 } else { 4378 assert(VM_Version::supports_avx(), "sanity"); 4379 int vlen_enc = vector_length_encoding(this); 4380 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4381 } 4382 %} 4383 ins_pipe( pipe_slow ); 4384 %} 4385 4386 // Replicate float scalar immediate to be vector by loading from const table. 4387 instruct ReplF_imm(vec dst, immF con) %{ 4388 match(Set dst (ReplicateF con)); 4389 format %{ "replicateF $dst,$con" %} 4390 ins_encode %{ 4391 InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant, Matcher::vector_length(this))); 4392 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4393 %} 4394 ins_pipe( pipe_slow ); 4395 %} 4396 4397 instruct ReplF_zero(vec dst, immF0 zero) %{ 4398 match(Set dst (ReplicateF zero)); 4399 format %{ "replicateF $dst,$zero" %} 4400 ins_encode %{ 4401 uint vlen = Matcher::vector_length(this); 4402 if (vlen <= 4) { 4403 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4404 } else { 4405 int vlen_enc = vector_length_encoding(this); 4406 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 4407 } 4408 %} 4409 ins_pipe( fpu_reg_reg ); 4410 %} 4411 4412 // ====================ReplicateD======================================= 4413 4414 // Replicate double (8 bytes) scalar to be vector 4415 instruct ReplD_reg(vec dst, vlRegD src) %{ 4416 match(Set dst (ReplicateD src)); 4417 format %{ "replicateD $dst,$src" %} 4418 ins_encode %{ 4419 uint vlen = Matcher::vector_length(this); 4420 if (vlen == 2) { 4421 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4422 } else if (VM_Version::supports_avx2()) { 4423 int vlen_enc = vector_length_encoding(this); 4424 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4425 } else { 4426 assert(vlen == 4, "sanity"); 4427 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4428 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4429 } 4430 %} 4431 ins_pipe( pipe_slow ); 4432 %} 4433 4434 instruct ReplD_mem(vec dst, memory mem) %{ 4435 match(Set dst (ReplicateD (LoadD mem))); 4436 format %{ "replicateD $dst,$mem" %} 4437 ins_encode %{ 4438 uint vlen = Matcher::vector_length(this); 4439 if (vlen == 2) { 4440 __ movq($dst$$XMMRegister, $mem$$Address); 4441 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44); 4442 } else { 4443 assert(VM_Version::supports_avx(), "sanity"); 4444 int vlen_enc = vector_length_encoding(this); 4445 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4446 } 4447 %} 4448 ins_pipe( pipe_slow ); 4449 %} 4450 4451 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4452 instruct ReplD_imm(vec dst, immD con) %{ 4453 match(Set dst (ReplicateD con)); 4454 format %{ "replicateD $dst,$con" %} 4455 ins_encode %{ 4456 InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, Matcher::vector_length(this))); 4457 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4458 %} 4459 ins_pipe( pipe_slow ); 4460 %} 4461 4462 instruct ReplD_zero(vec dst, immD0 zero) %{ 4463 match(Set dst (ReplicateD zero)); 4464 format %{ "replicateD $dst,$zero" %} 4465 ins_encode %{ 4466 uint vlen = Matcher::vector_length(this); 4467 if (vlen == 2) { 4468 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 4469 } else { 4470 int vlen_enc = vector_length_encoding(this); 4471 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 4472 } 4473 %} 4474 ins_pipe( fpu_reg_reg ); 4475 %} 4476 4477 // ====================VECTOR INSERT======================================= 4478 4479 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4480 predicate(Matcher::vector_length_in_bytes(n) < 32); 4481 match(Set dst (VectorInsert (Binary dst val) idx)); 4482 format %{ "vector_insert $dst,$val,$idx" %} 4483 ins_encode %{ 4484 assert(UseSSE >= 4, "required"); 4485 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4486 4487 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4488 4489 assert(is_integral_type(elem_bt), ""); 4490 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4491 4492 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4493 %} 4494 ins_pipe( pipe_slow ); 4495 %} 4496 4497 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4498 predicate(Matcher::vector_length_in_bytes(n) == 32); 4499 match(Set dst (VectorInsert (Binary src val) idx)); 4500 effect(TEMP vtmp); 4501 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4502 ins_encode %{ 4503 int vlen_enc = Assembler::AVX_256bit; 4504 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4505 int elem_per_lane = 16/type2aelembytes(elem_bt); 4506 int log2epr = log2(elem_per_lane); 4507 4508 assert(is_integral_type(elem_bt), "sanity"); 4509 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4510 4511 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4512 uint y_idx = ($idx$$constant >> log2epr) & 1; 4513 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4514 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4515 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4516 %} 4517 ins_pipe( pipe_slow ); 4518 %} 4519 4520 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4521 predicate(Matcher::vector_length_in_bytes(n) == 64); 4522 match(Set dst (VectorInsert (Binary src val) idx)); 4523 effect(TEMP vtmp); 4524 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4525 ins_encode %{ 4526 assert(UseAVX > 2, "sanity"); 4527 4528 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4529 int elem_per_lane = 16/type2aelembytes(elem_bt); 4530 int log2epr = log2(elem_per_lane); 4531 4532 assert(is_integral_type(elem_bt), ""); 4533 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4534 4535 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4536 uint y_idx = ($idx$$constant >> log2epr) & 3; 4537 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4538 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4539 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4540 %} 4541 ins_pipe( pipe_slow ); 4542 %} 4543 4544 #ifdef _LP64 4545 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4546 predicate(Matcher::vector_length(n) == 2); 4547 match(Set dst (VectorInsert (Binary dst val) idx)); 4548 format %{ "vector_insert $dst,$val,$idx" %} 4549 ins_encode %{ 4550 assert(UseSSE >= 4, "required"); 4551 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4552 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4553 4554 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4555 %} 4556 ins_pipe( pipe_slow ); 4557 %} 4558 4559 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4560 predicate(Matcher::vector_length(n) == 4); 4561 match(Set dst (VectorInsert (Binary src val) idx)); 4562 effect(TEMP vtmp); 4563 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4564 ins_encode %{ 4565 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4566 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4567 4568 uint x_idx = $idx$$constant & right_n_bits(1); 4569 uint y_idx = ($idx$$constant >> 1) & 1; 4570 int vlen_enc = Assembler::AVX_256bit; 4571 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4572 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4573 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4574 %} 4575 ins_pipe( pipe_slow ); 4576 %} 4577 4578 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4579 predicate(Matcher::vector_length(n) == 8); 4580 match(Set dst (VectorInsert (Binary src val) idx)); 4581 effect(TEMP vtmp); 4582 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4583 ins_encode %{ 4584 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4585 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4586 4587 uint x_idx = $idx$$constant & right_n_bits(1); 4588 uint y_idx = ($idx$$constant >> 1) & 3; 4589 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4590 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4591 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4592 %} 4593 ins_pipe( pipe_slow ); 4594 %} 4595 #endif 4596 4597 instruct insertF(vec dst, regF val, immU8 idx) %{ 4598 predicate(Matcher::vector_length(n) < 8); 4599 match(Set dst (VectorInsert (Binary dst val) idx)); 4600 format %{ "vector_insert $dst,$val,$idx" %} 4601 ins_encode %{ 4602 assert(UseSSE >= 4, "sanity"); 4603 4604 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4605 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4606 4607 uint x_idx = $idx$$constant & right_n_bits(2); 4608 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4609 %} 4610 ins_pipe( pipe_slow ); 4611 %} 4612 4613 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4614 predicate(Matcher::vector_length(n) >= 8); 4615 match(Set dst (VectorInsert (Binary src val) idx)); 4616 effect(TEMP vtmp); 4617 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4618 ins_encode %{ 4619 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4620 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4621 4622 int vlen = Matcher::vector_length(this); 4623 uint x_idx = $idx$$constant & right_n_bits(2); 4624 if (vlen == 8) { 4625 uint y_idx = ($idx$$constant >> 2) & 1; 4626 int vlen_enc = Assembler::AVX_256bit; 4627 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4628 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4629 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4630 } else { 4631 assert(vlen == 16, "sanity"); 4632 uint y_idx = ($idx$$constant >> 2) & 3; 4633 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4634 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4635 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4636 } 4637 %} 4638 ins_pipe( pipe_slow ); 4639 %} 4640 4641 #ifdef _LP64 4642 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4643 predicate(Matcher::vector_length(n) == 2); 4644 match(Set dst (VectorInsert (Binary dst val) idx)); 4645 effect(TEMP tmp); 4646 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4647 ins_encode %{ 4648 assert(UseSSE >= 4, "sanity"); 4649 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4650 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4651 4652 __ movq($tmp$$Register, $val$$XMMRegister); 4653 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4654 %} 4655 ins_pipe( pipe_slow ); 4656 %} 4657 4658 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4659 predicate(Matcher::vector_length(n) == 4); 4660 match(Set dst (VectorInsert (Binary src val) idx)); 4661 effect(TEMP vtmp, TEMP tmp); 4662 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4663 ins_encode %{ 4664 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4665 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4666 4667 uint x_idx = $idx$$constant & right_n_bits(1); 4668 uint y_idx = ($idx$$constant >> 1) & 1; 4669 int vlen_enc = Assembler::AVX_256bit; 4670 __ movq($tmp$$Register, $val$$XMMRegister); 4671 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4672 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4673 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4674 %} 4675 ins_pipe( pipe_slow ); 4676 %} 4677 4678 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4679 predicate(Matcher::vector_length(n) == 8); 4680 match(Set dst (VectorInsert (Binary src val) idx)); 4681 effect(TEMP tmp, TEMP vtmp); 4682 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4683 ins_encode %{ 4684 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4685 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4686 4687 uint x_idx = $idx$$constant & right_n_bits(1); 4688 uint y_idx = ($idx$$constant >> 1) & 3; 4689 __ movq($tmp$$Register, $val$$XMMRegister); 4690 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4691 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4692 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4693 %} 4694 ins_pipe( pipe_slow ); 4695 %} 4696 #endif 4697 4698 // ====================REDUCTION ARITHMETIC======================================= 4699 4700 // =======================Int Reduction========================================== 4701 4702 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4703 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 4704 match(Set dst (AddReductionVI src1 src2)); 4705 match(Set dst (MulReductionVI src1 src2)); 4706 match(Set dst (AndReductionV src1 src2)); 4707 match(Set dst ( OrReductionV src1 src2)); 4708 match(Set dst (XorReductionV src1 src2)); 4709 match(Set dst (MinReductionV src1 src2)); 4710 match(Set dst (MaxReductionV src1 src2)); 4711 effect(TEMP vtmp1, TEMP vtmp2); 4712 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4713 ins_encode %{ 4714 int opcode = this->ideal_Opcode(); 4715 int vlen = Matcher::vector_length(this, $src2); 4716 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4717 %} 4718 ins_pipe( pipe_slow ); 4719 %} 4720 4721 // =======================Long Reduction========================================== 4722 4723 #ifdef _LP64 4724 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4725 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 4726 match(Set dst (AddReductionVL src1 src2)); 4727 match(Set dst (MulReductionVL src1 src2)); 4728 match(Set dst (AndReductionV src1 src2)); 4729 match(Set dst ( OrReductionV src1 src2)); 4730 match(Set dst (XorReductionV src1 src2)); 4731 match(Set dst (MinReductionV src1 src2)); 4732 match(Set dst (MaxReductionV src1 src2)); 4733 effect(TEMP vtmp1, TEMP vtmp2); 4734 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4735 ins_encode %{ 4736 int opcode = this->ideal_Opcode(); 4737 int vlen = Matcher::vector_length(this, $src2); 4738 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4739 %} 4740 ins_pipe( pipe_slow ); 4741 %} 4742 4743 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 4744 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 4745 match(Set dst (AddReductionVL src1 src2)); 4746 match(Set dst (MulReductionVL src1 src2)); 4747 match(Set dst (AndReductionV src1 src2)); 4748 match(Set dst ( OrReductionV src1 src2)); 4749 match(Set dst (XorReductionV src1 src2)); 4750 match(Set dst (MinReductionV src1 src2)); 4751 match(Set dst (MaxReductionV src1 src2)); 4752 effect(TEMP vtmp1, TEMP vtmp2); 4753 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4754 ins_encode %{ 4755 int opcode = this->ideal_Opcode(); 4756 int vlen = Matcher::vector_length(this, $src2); 4757 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4758 %} 4759 ins_pipe( pipe_slow ); 4760 %} 4761 #endif // _LP64 4762 4763 // =======================Float Reduction========================================== 4764 4765 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 4766 predicate(Matcher::vector_length(n->in(2)) <= 4); // src 4767 match(Set dst (AddReductionVF dst src)); 4768 match(Set dst (MulReductionVF dst src)); 4769 effect(TEMP dst, TEMP vtmp); 4770 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 4771 ins_encode %{ 4772 int opcode = this->ideal_Opcode(); 4773 int vlen = Matcher::vector_length(this, $src); 4774 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4775 %} 4776 ins_pipe( pipe_slow ); 4777 %} 4778 4779 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 4780 predicate(Matcher::vector_length(n->in(2)) == 8); // src 4781 match(Set dst (AddReductionVF dst src)); 4782 match(Set dst (MulReductionVF dst src)); 4783 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4784 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4785 ins_encode %{ 4786 int opcode = this->ideal_Opcode(); 4787 int vlen = Matcher::vector_length(this, $src); 4788 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4789 %} 4790 ins_pipe( pipe_slow ); 4791 %} 4792 4793 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4794 predicate(Matcher::vector_length(n->in(2)) == 16); // src 4795 match(Set dst (AddReductionVF dst src)); 4796 match(Set dst (MulReductionVF dst src)); 4797 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4798 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4799 ins_encode %{ 4800 int opcode = this->ideal_Opcode(); 4801 int vlen = Matcher::vector_length(this, $src); 4802 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4803 %} 4804 ins_pipe( pipe_slow ); 4805 %} 4806 4807 // =======================Double Reduction========================================== 4808 4809 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 4810 predicate(Matcher::vector_length(n->in(2)) == 2); // src 4811 match(Set dst (AddReductionVD dst src)); 4812 match(Set dst (MulReductionVD dst src)); 4813 effect(TEMP dst, TEMP vtmp); 4814 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 4815 ins_encode %{ 4816 int opcode = this->ideal_Opcode(); 4817 int vlen = Matcher::vector_length(this, $src); 4818 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4819 %} 4820 ins_pipe( pipe_slow ); 4821 %} 4822 4823 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 4824 predicate(Matcher::vector_length(n->in(2)) == 4); // src 4825 match(Set dst (AddReductionVD dst src)); 4826 match(Set dst (MulReductionVD dst src)); 4827 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4828 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4829 ins_encode %{ 4830 int opcode = this->ideal_Opcode(); 4831 int vlen = Matcher::vector_length(this, $src); 4832 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4833 %} 4834 ins_pipe( pipe_slow ); 4835 %} 4836 4837 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4838 predicate(Matcher::vector_length(n->in(2)) == 8); // src 4839 match(Set dst (AddReductionVD dst src)); 4840 match(Set dst (MulReductionVD dst src)); 4841 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4842 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4843 ins_encode %{ 4844 int opcode = this->ideal_Opcode(); 4845 int vlen = Matcher::vector_length(this, $src); 4846 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4847 %} 4848 ins_pipe( pipe_slow ); 4849 %} 4850 4851 // =======================Byte Reduction========================================== 4852 4853 #ifdef _LP64 4854 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4855 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 4856 match(Set dst (AddReductionVI src1 src2)); 4857 match(Set dst (AndReductionV src1 src2)); 4858 match(Set dst ( OrReductionV src1 src2)); 4859 match(Set dst (XorReductionV src1 src2)); 4860 match(Set dst (MinReductionV src1 src2)); 4861 match(Set dst (MaxReductionV src1 src2)); 4862 effect(TEMP vtmp1, TEMP vtmp2); 4863 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4864 ins_encode %{ 4865 int opcode = this->ideal_Opcode(); 4866 int vlen = Matcher::vector_length(this, $src2); 4867 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4868 %} 4869 ins_pipe( pipe_slow ); 4870 %} 4871 4872 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 4873 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 4874 match(Set dst (AddReductionVI src1 src2)); 4875 match(Set dst (AndReductionV src1 src2)); 4876 match(Set dst ( OrReductionV src1 src2)); 4877 match(Set dst (XorReductionV src1 src2)); 4878 match(Set dst (MinReductionV src1 src2)); 4879 match(Set dst (MaxReductionV src1 src2)); 4880 effect(TEMP vtmp1, TEMP vtmp2); 4881 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4882 ins_encode %{ 4883 int opcode = this->ideal_Opcode(); 4884 int vlen = Matcher::vector_length(this, $src2); 4885 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4886 %} 4887 ins_pipe( pipe_slow ); 4888 %} 4889 #endif 4890 4891 // =======================Short Reduction========================================== 4892 4893 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4894 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 4895 match(Set dst (AddReductionVI src1 src2)); 4896 match(Set dst (MulReductionVI src1 src2)); 4897 match(Set dst (AndReductionV src1 src2)); 4898 match(Set dst ( OrReductionV src1 src2)); 4899 match(Set dst (XorReductionV src1 src2)); 4900 match(Set dst (MinReductionV src1 src2)); 4901 match(Set dst (MaxReductionV src1 src2)); 4902 effect(TEMP vtmp1, TEMP vtmp2); 4903 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4904 ins_encode %{ 4905 int opcode = this->ideal_Opcode(); 4906 int vlen = Matcher::vector_length(this, $src2); 4907 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4908 %} 4909 ins_pipe( pipe_slow ); 4910 %} 4911 4912 // =======================Mul Reduction========================================== 4913 4914 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 4915 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 4916 Matcher::vector_length(n->in(2)) <= 32); // src2 4917 match(Set dst (MulReductionVI src1 src2)); 4918 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4919 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 4920 ins_encode %{ 4921 int opcode = this->ideal_Opcode(); 4922 int vlen = Matcher::vector_length(this, $src2); 4923 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4924 %} 4925 ins_pipe( pipe_slow ); 4926 %} 4927 4928 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4929 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 4930 Matcher::vector_length(n->in(2)) == 64); // src2 4931 match(Set dst (MulReductionVI src1 src2)); 4932 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4933 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 4934 ins_encode %{ 4935 int opcode = this->ideal_Opcode(); 4936 int vlen = Matcher::vector_length(this, $src2); 4937 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4938 %} 4939 ins_pipe( pipe_slow ); 4940 %} 4941 4942 //--------------------Min/Max Float Reduction -------------------- 4943 // Float Min Reduction 4944 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 4945 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 4946 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 4947 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 4948 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 4949 Matcher::vector_length(n->in(2)) == 2); 4950 match(Set dst (MinReductionV src1 src2)); 4951 match(Set dst (MaxReductionV src1 src2)); 4952 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 4953 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 4954 ins_encode %{ 4955 assert(UseAVX > 0, "sanity"); 4956 4957 int opcode = this->ideal_Opcode(); 4958 int vlen = Matcher::vector_length(this, $src2); 4959 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 4960 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 4961 %} 4962 ins_pipe( pipe_slow ); 4963 %} 4964 4965 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 4966 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 4967 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 4968 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 4969 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 4970 Matcher::vector_length(n->in(2)) >= 4); 4971 match(Set dst (MinReductionV src1 src2)); 4972 match(Set dst (MaxReductionV src1 src2)); 4973 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 4974 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 4975 ins_encode %{ 4976 assert(UseAVX > 0, "sanity"); 4977 4978 int opcode = this->ideal_Opcode(); 4979 int vlen = Matcher::vector_length(this, $src2); 4980 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 4981 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 4982 %} 4983 ins_pipe( pipe_slow ); 4984 %} 4985 4986 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 4987 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 4988 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 4989 Matcher::vector_length(n->in(2)) == 2); 4990 match(Set dst (MinReductionV dst src)); 4991 match(Set dst (MaxReductionV dst src)); 4992 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 4993 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 4994 ins_encode %{ 4995 assert(UseAVX > 0, "sanity"); 4996 4997 int opcode = this->ideal_Opcode(); 4998 int vlen = Matcher::vector_length(this, $src); 4999 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5000 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5001 %} 5002 ins_pipe( pipe_slow ); 5003 %} 5004 5005 5006 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5007 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5008 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5009 Matcher::vector_length(n->in(2)) >= 4); 5010 match(Set dst (MinReductionV dst src)); 5011 match(Set dst (MaxReductionV dst src)); 5012 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5013 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5014 ins_encode %{ 5015 assert(UseAVX > 0, "sanity"); 5016 5017 int opcode = this->ideal_Opcode(); 5018 int vlen = Matcher::vector_length(this, $src); 5019 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5020 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5021 %} 5022 ins_pipe( pipe_slow ); 5023 %} 5024 5025 5026 //--------------------Min Double Reduction -------------------- 5027 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5028 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5029 rFlagsReg cr) %{ 5030 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5031 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5032 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5033 Matcher::vector_length(n->in(2)) == 2); 5034 match(Set dst (MinReductionV src1 src2)); 5035 match(Set dst (MaxReductionV src1 src2)); 5036 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5037 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5038 ins_encode %{ 5039 assert(UseAVX > 0, "sanity"); 5040 5041 int opcode = this->ideal_Opcode(); 5042 int vlen = Matcher::vector_length(this, $src2); 5043 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5044 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5045 %} 5046 ins_pipe( pipe_slow ); 5047 %} 5048 5049 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5050 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5051 rFlagsReg cr) %{ 5052 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5053 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5054 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5055 Matcher::vector_length(n->in(2)) >= 4); 5056 match(Set dst (MinReductionV src1 src2)); 5057 match(Set dst (MaxReductionV src1 src2)); 5058 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5059 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5060 ins_encode %{ 5061 assert(UseAVX > 0, "sanity"); 5062 5063 int opcode = this->ideal_Opcode(); 5064 int vlen = Matcher::vector_length(this, $src2); 5065 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5066 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5067 %} 5068 ins_pipe( pipe_slow ); 5069 %} 5070 5071 5072 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5073 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5074 rFlagsReg cr) %{ 5075 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5076 Matcher::vector_length(n->in(2)) == 2); 5077 match(Set dst (MinReductionV dst src)); 5078 match(Set dst (MaxReductionV dst src)); 5079 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5080 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5081 ins_encode %{ 5082 assert(UseAVX > 0, "sanity"); 5083 5084 int opcode = this->ideal_Opcode(); 5085 int vlen = Matcher::vector_length(this, $src); 5086 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5087 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5088 %} 5089 ins_pipe( pipe_slow ); 5090 %} 5091 5092 instruct minmax_reductionD_av(legRegD dst, legVec src, 5093 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5094 rFlagsReg cr) %{ 5095 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5096 Matcher::vector_length(n->in(2)) >= 4); 5097 match(Set dst (MinReductionV dst src)); 5098 match(Set dst (MaxReductionV dst src)); 5099 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5100 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5101 ins_encode %{ 5102 assert(UseAVX > 0, "sanity"); 5103 5104 int opcode = this->ideal_Opcode(); 5105 int vlen = Matcher::vector_length(this, $src); 5106 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5107 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5108 %} 5109 ins_pipe( pipe_slow ); 5110 %} 5111 5112 // ====================VECTOR ARITHMETIC======================================= 5113 5114 // --------------------------------- ADD -------------------------------------- 5115 5116 // Bytes vector add 5117 instruct vaddB(vec dst, vec src) %{ 5118 predicate(UseAVX == 0); 5119 match(Set dst (AddVB dst src)); 5120 format %{ "paddb $dst,$src\t! add packedB" %} 5121 ins_encode %{ 5122 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5123 %} 5124 ins_pipe( pipe_slow ); 5125 %} 5126 5127 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5128 predicate(UseAVX > 0); 5129 match(Set dst (AddVB src1 src2)); 5130 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5131 ins_encode %{ 5132 int vlen_enc = vector_length_encoding(this); 5133 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5134 %} 5135 ins_pipe( pipe_slow ); 5136 %} 5137 5138 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5139 predicate((UseAVX > 0) && 5140 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5141 match(Set dst (AddVB src (LoadVector mem))); 5142 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5143 ins_encode %{ 5144 int vlen_enc = vector_length_encoding(this); 5145 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5146 %} 5147 ins_pipe( pipe_slow ); 5148 %} 5149 5150 // Shorts/Chars vector add 5151 instruct vaddS(vec dst, vec src) %{ 5152 predicate(UseAVX == 0); 5153 match(Set dst (AddVS dst src)); 5154 format %{ "paddw $dst,$src\t! add packedS" %} 5155 ins_encode %{ 5156 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5157 %} 5158 ins_pipe( pipe_slow ); 5159 %} 5160 5161 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5162 predicate(UseAVX > 0); 5163 match(Set dst (AddVS src1 src2)); 5164 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5165 ins_encode %{ 5166 int vlen_enc = vector_length_encoding(this); 5167 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5168 %} 5169 ins_pipe( pipe_slow ); 5170 %} 5171 5172 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5173 predicate((UseAVX > 0) && 5174 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5175 match(Set dst (AddVS src (LoadVector mem))); 5176 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5177 ins_encode %{ 5178 int vlen_enc = vector_length_encoding(this); 5179 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5180 %} 5181 ins_pipe( pipe_slow ); 5182 %} 5183 5184 // Integers vector add 5185 instruct vaddI(vec dst, vec src) %{ 5186 predicate(UseAVX == 0); 5187 match(Set dst (AddVI dst src)); 5188 format %{ "paddd $dst,$src\t! add packedI" %} 5189 ins_encode %{ 5190 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5191 %} 5192 ins_pipe( pipe_slow ); 5193 %} 5194 5195 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5196 predicate(UseAVX > 0); 5197 match(Set dst (AddVI src1 src2)); 5198 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5199 ins_encode %{ 5200 int vlen_enc = vector_length_encoding(this); 5201 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5202 %} 5203 ins_pipe( pipe_slow ); 5204 %} 5205 5206 5207 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5208 predicate((UseAVX > 0) && 5209 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5210 match(Set dst (AddVI src (LoadVector mem))); 5211 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5212 ins_encode %{ 5213 int vlen_enc = vector_length_encoding(this); 5214 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5215 %} 5216 ins_pipe( pipe_slow ); 5217 %} 5218 5219 // Longs vector add 5220 instruct vaddL(vec dst, vec src) %{ 5221 predicate(UseAVX == 0); 5222 match(Set dst (AddVL dst src)); 5223 format %{ "paddq $dst,$src\t! add packedL" %} 5224 ins_encode %{ 5225 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5226 %} 5227 ins_pipe( pipe_slow ); 5228 %} 5229 5230 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5231 predicate(UseAVX > 0); 5232 match(Set dst (AddVL src1 src2)); 5233 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5234 ins_encode %{ 5235 int vlen_enc = vector_length_encoding(this); 5236 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5237 %} 5238 ins_pipe( pipe_slow ); 5239 %} 5240 5241 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5242 predicate((UseAVX > 0) && 5243 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5244 match(Set dst (AddVL src (LoadVector mem))); 5245 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5246 ins_encode %{ 5247 int vlen_enc = vector_length_encoding(this); 5248 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5249 %} 5250 ins_pipe( pipe_slow ); 5251 %} 5252 5253 // Floats vector add 5254 instruct vaddF(vec dst, vec src) %{ 5255 predicate(UseAVX == 0); 5256 match(Set dst (AddVF dst src)); 5257 format %{ "addps $dst,$src\t! add packedF" %} 5258 ins_encode %{ 5259 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5260 %} 5261 ins_pipe( pipe_slow ); 5262 %} 5263 5264 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5265 predicate(UseAVX > 0); 5266 match(Set dst (AddVF src1 src2)); 5267 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5268 ins_encode %{ 5269 int vlen_enc = vector_length_encoding(this); 5270 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5271 %} 5272 ins_pipe( pipe_slow ); 5273 %} 5274 5275 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5276 predicate((UseAVX > 0) && 5277 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5278 match(Set dst (AddVF src (LoadVector mem))); 5279 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5280 ins_encode %{ 5281 int vlen_enc = vector_length_encoding(this); 5282 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5283 %} 5284 ins_pipe( pipe_slow ); 5285 %} 5286 5287 // Doubles vector add 5288 instruct vaddD(vec dst, vec src) %{ 5289 predicate(UseAVX == 0); 5290 match(Set dst (AddVD dst src)); 5291 format %{ "addpd $dst,$src\t! add packedD" %} 5292 ins_encode %{ 5293 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5294 %} 5295 ins_pipe( pipe_slow ); 5296 %} 5297 5298 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5299 predicate(UseAVX > 0); 5300 match(Set dst (AddVD src1 src2)); 5301 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5302 ins_encode %{ 5303 int vlen_enc = vector_length_encoding(this); 5304 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5305 %} 5306 ins_pipe( pipe_slow ); 5307 %} 5308 5309 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5310 predicate((UseAVX > 0) && 5311 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5312 match(Set dst (AddVD src (LoadVector mem))); 5313 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5314 ins_encode %{ 5315 int vlen_enc = vector_length_encoding(this); 5316 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5317 %} 5318 ins_pipe( pipe_slow ); 5319 %} 5320 5321 // --------------------------------- SUB -------------------------------------- 5322 5323 // Bytes vector sub 5324 instruct vsubB(vec dst, vec src) %{ 5325 predicate(UseAVX == 0); 5326 match(Set dst (SubVB dst src)); 5327 format %{ "psubb $dst,$src\t! sub packedB" %} 5328 ins_encode %{ 5329 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5330 %} 5331 ins_pipe( pipe_slow ); 5332 %} 5333 5334 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5335 predicate(UseAVX > 0); 5336 match(Set dst (SubVB src1 src2)); 5337 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5338 ins_encode %{ 5339 int vlen_enc = vector_length_encoding(this); 5340 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5341 %} 5342 ins_pipe( pipe_slow ); 5343 %} 5344 5345 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5346 predicate((UseAVX > 0) && 5347 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5348 match(Set dst (SubVB src (LoadVector mem))); 5349 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5350 ins_encode %{ 5351 int vlen_enc = vector_length_encoding(this); 5352 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5353 %} 5354 ins_pipe( pipe_slow ); 5355 %} 5356 5357 // Shorts/Chars vector sub 5358 instruct vsubS(vec dst, vec src) %{ 5359 predicate(UseAVX == 0); 5360 match(Set dst (SubVS dst src)); 5361 format %{ "psubw $dst,$src\t! sub packedS" %} 5362 ins_encode %{ 5363 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5364 %} 5365 ins_pipe( pipe_slow ); 5366 %} 5367 5368 5369 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5370 predicate(UseAVX > 0); 5371 match(Set dst (SubVS src1 src2)); 5372 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5373 ins_encode %{ 5374 int vlen_enc = vector_length_encoding(this); 5375 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5376 %} 5377 ins_pipe( pipe_slow ); 5378 %} 5379 5380 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5381 predicate((UseAVX > 0) && 5382 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5383 match(Set dst (SubVS src (LoadVector mem))); 5384 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5385 ins_encode %{ 5386 int vlen_enc = vector_length_encoding(this); 5387 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5388 %} 5389 ins_pipe( pipe_slow ); 5390 %} 5391 5392 // Integers vector sub 5393 instruct vsubI(vec dst, vec src) %{ 5394 predicate(UseAVX == 0); 5395 match(Set dst (SubVI dst src)); 5396 format %{ "psubd $dst,$src\t! sub packedI" %} 5397 ins_encode %{ 5398 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5399 %} 5400 ins_pipe( pipe_slow ); 5401 %} 5402 5403 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5404 predicate(UseAVX > 0); 5405 match(Set dst (SubVI src1 src2)); 5406 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5407 ins_encode %{ 5408 int vlen_enc = vector_length_encoding(this); 5409 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5410 %} 5411 ins_pipe( pipe_slow ); 5412 %} 5413 5414 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5415 predicate((UseAVX > 0) && 5416 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5417 match(Set dst (SubVI src (LoadVector mem))); 5418 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5419 ins_encode %{ 5420 int vlen_enc = vector_length_encoding(this); 5421 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5422 %} 5423 ins_pipe( pipe_slow ); 5424 %} 5425 5426 // Longs vector sub 5427 instruct vsubL(vec dst, vec src) %{ 5428 predicate(UseAVX == 0); 5429 match(Set dst (SubVL dst src)); 5430 format %{ "psubq $dst,$src\t! sub packedL" %} 5431 ins_encode %{ 5432 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5433 %} 5434 ins_pipe( pipe_slow ); 5435 %} 5436 5437 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5438 predicate(UseAVX > 0); 5439 match(Set dst (SubVL src1 src2)); 5440 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5441 ins_encode %{ 5442 int vlen_enc = vector_length_encoding(this); 5443 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5444 %} 5445 ins_pipe( pipe_slow ); 5446 %} 5447 5448 5449 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5450 predicate((UseAVX > 0) && 5451 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5452 match(Set dst (SubVL src (LoadVector mem))); 5453 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5454 ins_encode %{ 5455 int vlen_enc = vector_length_encoding(this); 5456 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5457 %} 5458 ins_pipe( pipe_slow ); 5459 %} 5460 5461 // Floats vector sub 5462 instruct vsubF(vec dst, vec src) %{ 5463 predicate(UseAVX == 0); 5464 match(Set dst (SubVF dst src)); 5465 format %{ "subps $dst,$src\t! sub packedF" %} 5466 ins_encode %{ 5467 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5468 %} 5469 ins_pipe( pipe_slow ); 5470 %} 5471 5472 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5473 predicate(UseAVX > 0); 5474 match(Set dst (SubVF src1 src2)); 5475 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5476 ins_encode %{ 5477 int vlen_enc = vector_length_encoding(this); 5478 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5479 %} 5480 ins_pipe( pipe_slow ); 5481 %} 5482 5483 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5484 predicate((UseAVX > 0) && 5485 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5486 match(Set dst (SubVF src (LoadVector mem))); 5487 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5488 ins_encode %{ 5489 int vlen_enc = vector_length_encoding(this); 5490 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5491 %} 5492 ins_pipe( pipe_slow ); 5493 %} 5494 5495 // Doubles vector sub 5496 instruct vsubD(vec dst, vec src) %{ 5497 predicate(UseAVX == 0); 5498 match(Set dst (SubVD dst src)); 5499 format %{ "subpd $dst,$src\t! sub packedD" %} 5500 ins_encode %{ 5501 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5502 %} 5503 ins_pipe( pipe_slow ); 5504 %} 5505 5506 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5507 predicate(UseAVX > 0); 5508 match(Set dst (SubVD src1 src2)); 5509 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5510 ins_encode %{ 5511 int vlen_enc = vector_length_encoding(this); 5512 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5513 %} 5514 ins_pipe( pipe_slow ); 5515 %} 5516 5517 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 5518 predicate((UseAVX > 0) && 5519 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5520 match(Set dst (SubVD src (LoadVector mem))); 5521 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 5522 ins_encode %{ 5523 int vlen_enc = vector_length_encoding(this); 5524 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5525 %} 5526 ins_pipe( pipe_slow ); 5527 %} 5528 5529 // --------------------------------- MUL -------------------------------------- 5530 5531 // Byte vector mul 5532 instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 5533 predicate(Matcher::vector_length(n) == 4 || 5534 Matcher::vector_length(n) == 8); 5535 match(Set dst (MulVB src1 src2)); 5536 effect(TEMP dst, TEMP tmp, TEMP scratch); 5537 format %{"vector_mulB $dst,$src1,$src2" %} 5538 ins_encode %{ 5539 assert(UseSSE > 3, "required"); 5540 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 5541 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 5542 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 5543 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5544 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 5545 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5546 %} 5547 ins_pipe( pipe_slow ); 5548 %} 5549 5550 instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5551 predicate(Matcher::vector_length(n) == 16 && UseAVX <= 1); 5552 match(Set dst (MulVB src1 src2)); 5553 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5554 format %{"vector_mulB $dst,$src1,$src2" %} 5555 ins_encode %{ 5556 assert(UseSSE > 3, "required"); 5557 __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister); 5558 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 5559 __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister); 5560 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE); 5561 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE); 5562 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 5563 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 5564 __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); 5565 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5566 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 5567 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 5568 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 5569 %} 5570 ins_pipe( pipe_slow ); 5571 %} 5572 5573 instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 5574 predicate(Matcher::vector_length(n) == 16 && UseAVX > 1); 5575 match(Set dst (MulVB src1 src2)); 5576 effect(TEMP dst, TEMP tmp, TEMP scratch); 5577 format %{"vector_mulB $dst,$src1,$src2" %} 5578 ins_encode %{ 5579 int vlen_enc = Assembler::AVX_256bit; 5580 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5581 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5582 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5583 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5584 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5585 __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); 5586 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); 5587 %} 5588 ins_pipe( pipe_slow ); 5589 %} 5590 5591 instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5592 predicate(Matcher::vector_length(n) == 32); 5593 match(Set dst (MulVB src1 src2)); 5594 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5595 format %{"vector_mulB $dst,$src1,$src2" %} 5596 ins_encode %{ 5597 assert(UseAVX > 1, "required"); 5598 int vlen_enc = Assembler::AVX_256bit; 5599 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 5600 __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister); 5601 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5602 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5603 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5604 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5605 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5606 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5607 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5608 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5609 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5610 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 5611 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5612 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 5613 %} 5614 ins_pipe( pipe_slow ); 5615 %} 5616 5617 instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5618 predicate(Matcher::vector_length(n) == 64); 5619 match(Set dst (MulVB src1 src2)); 5620 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5621 format %{"vector_mulB $dst,$src1,$src2\n\t" %} 5622 ins_encode %{ 5623 assert(UseAVX > 2, "required"); 5624 int vlen_enc = Assembler::AVX_512bit; 5625 __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); 5626 __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister); 5627 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5628 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5629 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5630 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5631 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5632 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5633 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5634 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5635 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5636 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5637 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 5638 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register); 5639 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5640 %} 5641 ins_pipe( pipe_slow ); 5642 %} 5643 5644 // Shorts/Chars vector mul 5645 instruct vmulS(vec dst, vec src) %{ 5646 predicate(UseAVX == 0); 5647 match(Set dst (MulVS dst src)); 5648 format %{ "pmullw $dst,$src\t! mul packedS" %} 5649 ins_encode %{ 5650 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 5651 %} 5652 ins_pipe( pipe_slow ); 5653 %} 5654 5655 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 5656 predicate(UseAVX > 0); 5657 match(Set dst (MulVS src1 src2)); 5658 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 5659 ins_encode %{ 5660 int vlen_enc = vector_length_encoding(this); 5661 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5662 %} 5663 ins_pipe( pipe_slow ); 5664 %} 5665 5666 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 5667 predicate((UseAVX > 0) && 5668 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5669 match(Set dst (MulVS src (LoadVector mem))); 5670 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 5671 ins_encode %{ 5672 int vlen_enc = vector_length_encoding(this); 5673 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5674 %} 5675 ins_pipe( pipe_slow ); 5676 %} 5677 5678 // Integers vector mul 5679 instruct vmulI(vec dst, vec src) %{ 5680 predicate(UseAVX == 0); 5681 match(Set dst (MulVI dst src)); 5682 format %{ "pmulld $dst,$src\t! mul packedI" %} 5683 ins_encode %{ 5684 assert(UseSSE > 3, "required"); 5685 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 5686 %} 5687 ins_pipe( pipe_slow ); 5688 %} 5689 5690 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 5691 predicate(UseAVX > 0); 5692 match(Set dst (MulVI src1 src2)); 5693 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 5694 ins_encode %{ 5695 int vlen_enc = vector_length_encoding(this); 5696 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5697 %} 5698 ins_pipe( pipe_slow ); 5699 %} 5700 5701 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 5702 predicate((UseAVX > 0) && 5703 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5704 match(Set dst (MulVI src (LoadVector mem))); 5705 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 5706 ins_encode %{ 5707 int vlen_enc = vector_length_encoding(this); 5708 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5709 %} 5710 ins_pipe( pipe_slow ); 5711 %} 5712 5713 // Longs vector mul 5714 instruct vmulL_reg(vec dst, vec src1, vec src2) %{ 5715 predicate(VM_Version::supports_avx512dq()); 5716 match(Set dst (MulVL src1 src2)); 5717 format %{ "vpmullq $dst,$src1,$src2\t! mul packedL" %} 5718 ins_encode %{ 5719 assert(UseAVX > 2, "required"); 5720 int vlen_enc = vector_length_encoding(this); 5721 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5722 %} 5723 ins_pipe( pipe_slow ); 5724 %} 5725 5726 instruct vmulL_mem(vec dst, vec src, memory mem) %{ 5727 predicate(VM_Version::supports_avx512dq() && 5728 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5729 match(Set dst (MulVL src (LoadVector mem))); 5730 format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %} 5731 ins_encode %{ 5732 assert(UseAVX > 2, "required"); 5733 int vlen_enc = vector_length_encoding(this); 5734 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5735 %} 5736 ins_pipe( pipe_slow ); 5737 %} 5738 5739 instruct mul2L_reg(vec dst, vec src2, legVec tmp) %{ 5740 predicate(Matcher::vector_length(n) == 2 && !VM_Version::supports_avx512dq()); 5741 match(Set dst (MulVL dst src2)); 5742 effect(TEMP dst, TEMP tmp); 5743 format %{ "pshufd $tmp,$src2, 177\n\t" 5744 "pmulld $tmp,$dst\n\t" 5745 "phaddd $tmp,$tmp\n\t" 5746 "pmovzxdq $tmp,$tmp\n\t" 5747 "psllq $tmp, 32\n\t" 5748 "pmuludq $dst,$src2\n\t" 5749 "paddq $dst,$tmp\n\t! mul packed2L" %} 5750 5751 ins_encode %{ 5752 assert(VM_Version::supports_sse4_1(), "required"); 5753 int vlen_enc = Assembler::AVX_128bit; 5754 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177); 5755 __ pmulld($tmp$$XMMRegister, $dst$$XMMRegister); 5756 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5757 __ pmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister); 5758 __ psllq($tmp$$XMMRegister, 32); 5759 __ pmuludq($dst$$XMMRegister, $src2$$XMMRegister); 5760 __ paddq($dst$$XMMRegister, $tmp$$XMMRegister); 5761 %} 5762 ins_pipe( pipe_slow ); 5763 %} 5764 5765 instruct vmul4L_reg_avx(vec dst, vec src1, vec src2, legVec tmp, legVec tmp1) %{ 5766 predicate(Matcher::vector_length(n) == 4 && !VM_Version::supports_avx512dq()); 5767 match(Set dst (MulVL src1 src2)); 5768 effect(TEMP tmp1, TEMP tmp); 5769 format %{ "vpshufd $tmp,$src2\n\t" 5770 "vpmulld $tmp,$src1,$tmp\n\t" 5771 "vphaddd $tmp,$tmp,$tmp\n\t" 5772 "vpmovzxdq $tmp,$tmp\n\t" 5773 "vpsllq $tmp,$tmp\n\t" 5774 "vpmuludq $tmp1,$src1,$src2\n\t" 5775 "vpaddq $dst,$tmp,$tmp1\t! mul packed4L" %} 5776 ins_encode %{ 5777 int vlen_enc = Assembler::AVX_256bit; 5778 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vlen_enc); 5779 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5780 __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister); 5781 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5782 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5783 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vlen_enc); 5784 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5785 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5786 %} 5787 ins_pipe( pipe_slow ); 5788 %} 5789 5790 // Floats vector mul 5791 instruct vmulF(vec dst, vec src) %{ 5792 predicate(UseAVX == 0); 5793 match(Set dst (MulVF dst src)); 5794 format %{ "mulps $dst,$src\t! mul packedF" %} 5795 ins_encode %{ 5796 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 5797 %} 5798 ins_pipe( pipe_slow ); 5799 %} 5800 5801 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 5802 predicate(UseAVX > 0); 5803 match(Set dst (MulVF src1 src2)); 5804 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 5805 ins_encode %{ 5806 int vlen_enc = vector_length_encoding(this); 5807 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5808 %} 5809 ins_pipe( pipe_slow ); 5810 %} 5811 5812 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 5813 predicate((UseAVX > 0) && 5814 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5815 match(Set dst (MulVF src (LoadVector mem))); 5816 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 5817 ins_encode %{ 5818 int vlen_enc = vector_length_encoding(this); 5819 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5820 %} 5821 ins_pipe( pipe_slow ); 5822 %} 5823 5824 // Doubles vector mul 5825 instruct vmulD(vec dst, vec src) %{ 5826 predicate(UseAVX == 0); 5827 match(Set dst (MulVD dst src)); 5828 format %{ "mulpd $dst,$src\t! mul packedD" %} 5829 ins_encode %{ 5830 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 5831 %} 5832 ins_pipe( pipe_slow ); 5833 %} 5834 5835 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 5836 predicate(UseAVX > 0); 5837 match(Set dst (MulVD src1 src2)); 5838 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 5839 ins_encode %{ 5840 int vlen_enc = vector_length_encoding(this); 5841 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5842 %} 5843 ins_pipe( pipe_slow ); 5844 %} 5845 5846 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 5847 predicate((UseAVX > 0) && 5848 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5849 match(Set dst (MulVD src (LoadVector mem))); 5850 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 5851 ins_encode %{ 5852 int vlen_enc = vector_length_encoding(this); 5853 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5854 %} 5855 ins_pipe( pipe_slow ); 5856 %} 5857 5858 instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 5859 predicate(Matcher::vector_length(n) == 8); 5860 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 5861 effect(TEMP dst, USE src1, USE src2); 5862 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 5863 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 5864 %} 5865 ins_encode %{ 5866 assert(UseAVX > 0, "required"); 5867 5868 int vlen_enc = Assembler::AVX_256bit; 5869 int cond = (Assembler::Condition)($copnd$$cmpcode); 5870 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); 5871 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5872 %} 5873 ins_pipe( pipe_slow ); 5874 %} 5875 5876 instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 5877 predicate(Matcher::vector_length(n) == 4); 5878 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 5879 effect(TEMP dst, USE src1, USE src2); 5880 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 5881 "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 5882 %} 5883 ins_encode %{ 5884 assert(UseAVX > 0, "required"); 5885 5886 int vlen_enc = Assembler::AVX_256bit; 5887 int cond = (Assembler::Condition)($copnd$$cmpcode); 5888 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); 5889 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5890 %} 5891 ins_pipe( pipe_slow ); 5892 %} 5893 5894 // --------------------------------- DIV -------------------------------------- 5895 5896 // Floats vector div 5897 instruct vdivF(vec dst, vec src) %{ 5898 predicate(UseAVX == 0); 5899 match(Set dst (DivVF dst src)); 5900 format %{ "divps $dst,$src\t! div packedF" %} 5901 ins_encode %{ 5902 __ divps($dst$$XMMRegister, $src$$XMMRegister); 5903 %} 5904 ins_pipe( pipe_slow ); 5905 %} 5906 5907 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 5908 predicate(UseAVX > 0); 5909 match(Set dst (DivVF src1 src2)); 5910 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 5911 ins_encode %{ 5912 int vlen_enc = vector_length_encoding(this); 5913 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5914 %} 5915 ins_pipe( pipe_slow ); 5916 %} 5917 5918 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 5919 predicate((UseAVX > 0) && 5920 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5921 match(Set dst (DivVF src (LoadVector mem))); 5922 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 5923 ins_encode %{ 5924 int vlen_enc = vector_length_encoding(this); 5925 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5926 %} 5927 ins_pipe( pipe_slow ); 5928 %} 5929 5930 // Doubles vector div 5931 instruct vdivD(vec dst, vec src) %{ 5932 predicate(UseAVX == 0); 5933 match(Set dst (DivVD dst src)); 5934 format %{ "divpd $dst,$src\t! div packedD" %} 5935 ins_encode %{ 5936 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 5937 %} 5938 ins_pipe( pipe_slow ); 5939 %} 5940 5941 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 5942 predicate(UseAVX > 0); 5943 match(Set dst (DivVD src1 src2)); 5944 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 5945 ins_encode %{ 5946 int vlen_enc = vector_length_encoding(this); 5947 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5948 %} 5949 ins_pipe( pipe_slow ); 5950 %} 5951 5952 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 5953 predicate((UseAVX > 0) && 5954 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5955 match(Set dst (DivVD src (LoadVector mem))); 5956 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 5957 ins_encode %{ 5958 int vlen_enc = vector_length_encoding(this); 5959 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5960 %} 5961 ins_pipe( pipe_slow ); 5962 %} 5963 5964 // ------------------------------ MinMax --------------------------------------- 5965 5966 // Byte, Short, Int vector Min/Max 5967 instruct minmax_reg_sse(vec dst, vec src) %{ 5968 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 5969 UseAVX == 0); 5970 match(Set dst (MinV dst src)); 5971 match(Set dst (MaxV dst src)); 5972 format %{ "vector_minmax $dst,$src\t! " %} 5973 ins_encode %{ 5974 assert(UseSSE >= 4, "required"); 5975 5976 int opcode = this->ideal_Opcode(); 5977 BasicType elem_bt = Matcher::vector_element_basic_type(this); 5978 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 5979 %} 5980 ins_pipe( pipe_slow ); 5981 %} 5982 5983 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 5984 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 5985 UseAVX > 0); 5986 match(Set dst (MinV src1 src2)); 5987 match(Set dst (MaxV src1 src2)); 5988 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 5989 ins_encode %{ 5990 int opcode = this->ideal_Opcode(); 5991 int vlen_enc = vector_length_encoding(this); 5992 BasicType elem_bt = Matcher::vector_element_basic_type(this); 5993 5994 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5995 %} 5996 ins_pipe( pipe_slow ); 5997 %} 5998 5999 // Long vector Min/Max 6000 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6001 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6002 UseAVX == 0); 6003 match(Set dst (MinV dst src)); 6004 match(Set dst (MaxV src dst)); 6005 effect(TEMP dst, TEMP tmp); 6006 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6007 ins_encode %{ 6008 assert(UseSSE >= 4, "required"); 6009 6010 int opcode = this->ideal_Opcode(); 6011 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6012 assert(elem_bt == T_LONG, "sanity"); 6013 6014 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6015 %} 6016 ins_pipe( pipe_slow ); 6017 %} 6018 6019 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6020 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6021 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6022 match(Set dst (MinV src1 src2)); 6023 match(Set dst (MaxV src1 src2)); 6024 effect(TEMP dst); 6025 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6026 ins_encode %{ 6027 int vlen_enc = vector_length_encoding(this); 6028 int opcode = this->ideal_Opcode(); 6029 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6030 assert(elem_bt == T_LONG, "sanity"); 6031 6032 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6033 %} 6034 ins_pipe( pipe_slow ); 6035 %} 6036 6037 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6038 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6039 Matcher::vector_element_basic_type(n) == T_LONG); 6040 match(Set dst (MinV src1 src2)); 6041 match(Set dst (MaxV src1 src2)); 6042 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6043 ins_encode %{ 6044 assert(UseAVX > 2, "required"); 6045 6046 int vlen_enc = vector_length_encoding(this); 6047 int opcode = this->ideal_Opcode(); 6048 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6049 assert(elem_bt == T_LONG, "sanity"); 6050 6051 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6052 %} 6053 ins_pipe( pipe_slow ); 6054 %} 6055 6056 // Float/Double vector Min/Max 6057 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6058 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6059 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6060 UseAVX > 0); 6061 match(Set dst (MinV a b)); 6062 match(Set dst (MaxV a b)); 6063 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6064 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6065 ins_encode %{ 6066 assert(UseAVX > 0, "required"); 6067 6068 int opcode = this->ideal_Opcode(); 6069 int vlen_enc = vector_length_encoding(this); 6070 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6071 6072 __ vminmax_fp(opcode, elem_bt, 6073 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6074 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6075 %} 6076 ins_pipe( pipe_slow ); 6077 %} 6078 6079 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6080 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6081 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6082 match(Set dst (MinV a b)); 6083 match(Set dst (MaxV a b)); 6084 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6085 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6086 ins_encode %{ 6087 assert(UseAVX > 2, "required"); 6088 6089 int opcode = this->ideal_Opcode(); 6090 int vlen_enc = vector_length_encoding(this); 6091 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6092 6093 __ evminmax_fp(opcode, elem_bt, 6094 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6095 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6096 %} 6097 ins_pipe( pipe_slow ); 6098 %} 6099 6100 // --------------------------------- Signum/CopySign --------------------------- 6101 6102 instruct signumF_reg(regF dst, regF zero, regF one, rRegP scratch, rFlagsReg cr) %{ 6103 match(Set dst (SignumF dst (Binary zero one))); 6104 effect(TEMP scratch, KILL cr); 6105 format %{ "signumF $dst, $dst\t! using $scratch as TEMP" %} 6106 ins_encode %{ 6107 int opcode = this->ideal_Opcode(); 6108 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register); 6109 %} 6110 ins_pipe( pipe_slow ); 6111 %} 6112 6113 instruct signumD_reg(regD dst, regD zero, regD one, rRegP scratch, rFlagsReg cr) %{ 6114 match(Set dst (SignumD dst (Binary zero one))); 6115 effect(TEMP scratch, KILL cr); 6116 format %{ "signumD $dst, $dst\t! using $scratch as TEMP" %} 6117 ins_encode %{ 6118 int opcode = this->ideal_Opcode(); 6119 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register); 6120 %} 6121 ins_pipe( pipe_slow ); 6122 %} 6123 6124 // --------------------------------------- 6125 // For copySign use 0xE4 as writemask for vpternlog 6126 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6127 // C (xmm2) is set to 0x7FFFFFFF 6128 // Wherever xmm2 is 0, we want to pick from B (sign) 6129 // Wherever xmm2 is 1, we want to pick from A (src) 6130 // 6131 // A B C Result 6132 // 0 0 0 0 6133 // 0 0 1 0 6134 // 0 1 0 1 6135 // 0 1 1 0 6136 // 1 0 0 0 6137 // 1 0 1 1 6138 // 1 1 0 1 6139 // 1 1 1 1 6140 // 6141 // Result going from high bit to low bit is 0x11100100 = 0xe4 6142 // --------------------------------------- 6143 6144 #ifdef _LP64 6145 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6146 match(Set dst (CopySignF dst src)); 6147 effect(TEMP tmp1, TEMP tmp2); 6148 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6149 ins_encode %{ 6150 __ movl($tmp2$$Register, 0x7FFFFFFF); 6151 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6152 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6153 %} 6154 ins_pipe( pipe_slow ); 6155 %} 6156 6157 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6158 match(Set dst (CopySignD dst (Binary src zero))); 6159 ins_cost(100); 6160 effect(TEMP tmp1, TEMP tmp2); 6161 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6162 ins_encode %{ 6163 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6164 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6165 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6166 %} 6167 ins_pipe( pipe_slow ); 6168 %} 6169 #endif // _LP64 6170 6171 // --------------------------------- Sqrt -------------------------------------- 6172 6173 instruct vsqrtF_reg(vec dst, vec src) %{ 6174 match(Set dst (SqrtVF src)); 6175 ins_cost(400); 6176 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6177 ins_encode %{ 6178 assert(UseAVX > 0, "required"); 6179 int vlen_enc = vector_length_encoding(this); 6180 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6181 %} 6182 ins_pipe( pipe_slow ); 6183 %} 6184 6185 instruct vsqrtF_mem(vec dst, memory mem) %{ 6186 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6187 match(Set dst (SqrtVF (LoadVector mem))); 6188 ins_cost(400); 6189 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6190 ins_encode %{ 6191 assert(UseAVX > 0, "required"); 6192 int vlen_enc = vector_length_encoding(this); 6193 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6194 %} 6195 ins_pipe( pipe_slow ); 6196 %} 6197 6198 // Floating point vector sqrt 6199 instruct vsqrtD_reg(vec dst, vec src) %{ 6200 match(Set dst (SqrtVD src)); 6201 ins_cost(400); 6202 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6203 ins_encode %{ 6204 assert(UseAVX > 0, "required"); 6205 int vlen_enc = vector_length_encoding(this); 6206 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6207 %} 6208 ins_pipe( pipe_slow ); 6209 %} 6210 6211 instruct vsqrtD_mem(vec dst, memory mem) %{ 6212 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6213 match(Set dst (SqrtVD (LoadVector mem))); 6214 ins_cost(400); 6215 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6216 ins_encode %{ 6217 assert(UseAVX > 0, "required"); 6218 int vlen_enc = vector_length_encoding(this); 6219 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6220 %} 6221 ins_pipe( pipe_slow ); 6222 %} 6223 6224 // ------------------------------ Shift --------------------------------------- 6225 6226 // Left and right shift count vectors are the same on x86 6227 // (only lowest bits of xmm reg are used for count). 6228 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6229 match(Set dst (LShiftCntV cnt)); 6230 match(Set dst (RShiftCntV cnt)); 6231 format %{ "movdl $dst,$cnt\t! load shift count" %} 6232 ins_encode %{ 6233 __ movdl($dst$$XMMRegister, $cnt$$Register); 6234 %} 6235 ins_pipe( pipe_slow ); 6236 %} 6237 6238 // Byte vector shift 6239 instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6240 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6241 match(Set dst ( LShiftVB src shift)); 6242 match(Set dst ( RShiftVB src shift)); 6243 match(Set dst (URShiftVB src shift)); 6244 effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch); 6245 format %{"vector_byte_shift $dst,$src,$shift" %} 6246 ins_encode %{ 6247 assert(UseSSE > 3, "required"); 6248 int opcode = this->ideal_Opcode(); 6249 bool sign = (opcode != Op_URShiftVB); 6250 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6251 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6252 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6253 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6254 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6255 %} 6256 ins_pipe( pipe_slow ); 6257 %} 6258 6259 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 6260 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6261 UseAVX <= 1); 6262 match(Set dst ( LShiftVB src shift)); 6263 match(Set dst ( RShiftVB src shift)); 6264 match(Set dst (URShiftVB src shift)); 6265 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch); 6266 format %{"vector_byte_shift $dst,$src,$shift" %} 6267 ins_encode %{ 6268 assert(UseSSE > 3, "required"); 6269 int opcode = this->ideal_Opcode(); 6270 bool sign = (opcode != Op_URShiftVB); 6271 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6272 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6273 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6274 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6275 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6276 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6277 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6278 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6279 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6280 %} 6281 ins_pipe( pipe_slow ); 6282 %} 6283 6284 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6285 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6286 UseAVX > 1); 6287 match(Set dst ( LShiftVB src shift)); 6288 match(Set dst ( RShiftVB src shift)); 6289 match(Set dst (URShiftVB src shift)); 6290 effect(TEMP dst, TEMP tmp, TEMP scratch); 6291 format %{"vector_byte_shift $dst,$src,$shift" %} 6292 ins_encode %{ 6293 int opcode = this->ideal_Opcode(); 6294 bool sign = (opcode != Op_URShiftVB); 6295 int vlen_enc = Assembler::AVX_256bit; 6296 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6297 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6298 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 6299 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6300 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6301 %} 6302 ins_pipe( pipe_slow ); 6303 %} 6304 6305 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6306 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6307 match(Set dst ( LShiftVB src shift)); 6308 match(Set dst ( RShiftVB src shift)); 6309 match(Set dst (URShiftVB src shift)); 6310 effect(TEMP dst, TEMP tmp, TEMP scratch); 6311 format %{"vector_byte_shift $dst,$src,$shift" %} 6312 ins_encode %{ 6313 assert(UseAVX > 1, "required"); 6314 int opcode = this->ideal_Opcode(); 6315 bool sign = (opcode != Op_URShiftVB); 6316 int vlen_enc = Assembler::AVX_256bit; 6317 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6318 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6319 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6320 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6321 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6322 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 6323 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 6324 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6325 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6326 %} 6327 ins_pipe( pipe_slow ); 6328 %} 6329 6330 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 6331 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6332 match(Set dst ( LShiftVB src shift)); 6333 match(Set dst (RShiftVB src shift)); 6334 match(Set dst (URShiftVB src shift)); 6335 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 6336 format %{"vector_byte_shift $dst,$src,$shift" %} 6337 ins_encode %{ 6338 assert(UseAVX > 2, "required"); 6339 int opcode = this->ideal_Opcode(); 6340 bool sign = (opcode != Op_URShiftVB); 6341 int vlen_enc = Assembler::AVX_512bit; 6342 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6343 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6344 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6345 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6346 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6347 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6348 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6349 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6350 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6351 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6352 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register); 6353 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6354 %} 6355 ins_pipe( pipe_slow ); 6356 %} 6357 6358 // Shorts vector logical right shift produces incorrect Java result 6359 // for negative data because java code convert short value into int with 6360 // sign extension before a shift. But char vectors are fine since chars are 6361 // unsigned values. 6362 // Shorts/Chars vector left shift 6363 instruct vshiftS(vec dst, vec src, vec shift) %{ 6364 predicate(!n->as_ShiftV()->is_var_shift()); 6365 match(Set dst ( LShiftVS src shift)); 6366 match(Set dst ( RShiftVS src shift)); 6367 match(Set dst (URShiftVS src shift)); 6368 effect(TEMP dst, USE src, USE shift); 6369 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6370 ins_encode %{ 6371 int opcode = this->ideal_Opcode(); 6372 if (UseAVX > 0) { 6373 int vlen_enc = vector_length_encoding(this); 6374 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6375 } else { 6376 int vlen = Matcher::vector_length(this); 6377 if (vlen == 2) { 6378 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6379 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6380 } else if (vlen == 4) { 6381 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6382 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6383 } else { 6384 assert (vlen == 8, "sanity"); 6385 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6386 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6387 } 6388 } 6389 %} 6390 ins_pipe( pipe_slow ); 6391 %} 6392 6393 // Integers vector left shift 6394 instruct vshiftI(vec dst, vec src, vec shift) %{ 6395 predicate(!n->as_ShiftV()->is_var_shift()); 6396 match(Set dst ( LShiftVI src shift)); 6397 match(Set dst ( RShiftVI src shift)); 6398 match(Set dst (URShiftVI src shift)); 6399 effect(TEMP dst, USE src, USE shift); 6400 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6401 ins_encode %{ 6402 int opcode = this->ideal_Opcode(); 6403 if (UseAVX > 0) { 6404 int vlen_enc = vector_length_encoding(this); 6405 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6406 } else { 6407 int vlen = Matcher::vector_length(this); 6408 if (vlen == 2) { 6409 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6410 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6411 } else { 6412 assert(vlen == 4, "sanity"); 6413 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6414 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6415 } 6416 } 6417 %} 6418 ins_pipe( pipe_slow ); 6419 %} 6420 6421 // Integers vector left constant shift 6422 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6423 match(Set dst (LShiftVI src (LShiftCntV shift))); 6424 match(Set dst (RShiftVI src (RShiftCntV shift))); 6425 match(Set dst (URShiftVI src (RShiftCntV shift))); 6426 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6427 ins_encode %{ 6428 int opcode = this->ideal_Opcode(); 6429 if (UseAVX > 0) { 6430 int vector_len = vector_length_encoding(this); 6431 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6432 } else { 6433 int vlen = Matcher::vector_length(this); 6434 if (vlen == 2) { 6435 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6436 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6437 } else { 6438 assert(vlen == 4, "sanity"); 6439 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6440 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6441 } 6442 } 6443 %} 6444 ins_pipe( pipe_slow ); 6445 %} 6446 6447 // Longs vector shift 6448 instruct vshiftL(vec dst, vec src, vec shift) %{ 6449 predicate(!n->as_ShiftV()->is_var_shift()); 6450 match(Set dst ( LShiftVL src shift)); 6451 match(Set dst (URShiftVL src shift)); 6452 effect(TEMP dst, USE src, USE shift); 6453 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6454 ins_encode %{ 6455 int opcode = this->ideal_Opcode(); 6456 if (UseAVX > 0) { 6457 int vlen_enc = vector_length_encoding(this); 6458 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6459 } else { 6460 assert(Matcher::vector_length(this) == 2, ""); 6461 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6462 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6463 } 6464 %} 6465 ins_pipe( pipe_slow ); 6466 %} 6467 6468 // Longs vector constant shift 6469 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 6470 match(Set dst (LShiftVL src (LShiftCntV shift))); 6471 match(Set dst (URShiftVL src (RShiftCntV shift))); 6472 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 6473 ins_encode %{ 6474 int opcode = this->ideal_Opcode(); 6475 if (UseAVX > 0) { 6476 int vector_len = vector_length_encoding(this); 6477 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6478 } else { 6479 assert(Matcher::vector_length(this) == 2, ""); 6480 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6481 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6482 } 6483 %} 6484 ins_pipe( pipe_slow ); 6485 %} 6486 6487 // -------------------ArithmeticRightShift ----------------------------------- 6488 // Long vector arithmetic right shift 6489 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6490 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 6491 match(Set dst (RShiftVL src shift)); 6492 effect(TEMP dst, TEMP tmp, TEMP scratch); 6493 format %{ "vshiftq $dst,$src,$shift" %} 6494 ins_encode %{ 6495 uint vlen = Matcher::vector_length(this); 6496 if (vlen == 2) { 6497 assert(UseSSE >= 2, "required"); 6498 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6499 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6500 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 6501 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6502 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6503 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 6504 } else { 6505 assert(vlen == 4, "sanity"); 6506 assert(UseAVX > 1, "required"); 6507 int vlen_enc = Assembler::AVX_256bit; 6508 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6509 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 6510 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6511 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6512 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6513 } 6514 %} 6515 ins_pipe( pipe_slow ); 6516 %} 6517 6518 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 6519 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 6520 match(Set dst (RShiftVL src shift)); 6521 format %{ "vshiftq $dst,$src,$shift" %} 6522 ins_encode %{ 6523 int vlen_enc = vector_length_encoding(this); 6524 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6525 %} 6526 ins_pipe( pipe_slow ); 6527 %} 6528 6529 // ------------------- Variable Shift ----------------------------- 6530 // Byte variable shift 6531 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 6532 predicate(Matcher::vector_length(n) <= 8 && 6533 n->as_ShiftV()->is_var_shift() && 6534 !VM_Version::supports_avx512bw()); 6535 match(Set dst ( LShiftVB src shift)); 6536 match(Set dst ( RShiftVB src shift)); 6537 match(Set dst (URShiftVB src shift)); 6538 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6539 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %} 6540 ins_encode %{ 6541 assert(UseAVX >= 2, "required"); 6542 6543 int opcode = this->ideal_Opcode(); 6544 int vlen_enc = Assembler::AVX_128bit; 6545 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register); 6546 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 6547 %} 6548 ins_pipe( pipe_slow ); 6549 %} 6550 6551 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 6552 predicate(Matcher::vector_length(n) == 16 && 6553 n->as_ShiftV()->is_var_shift() && 6554 !VM_Version::supports_avx512bw()); 6555 match(Set dst ( LShiftVB src shift)); 6556 match(Set dst ( RShiftVB src shift)); 6557 match(Set dst (URShiftVB src shift)); 6558 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6559 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %} 6560 ins_encode %{ 6561 assert(UseAVX >= 2, "required"); 6562 6563 int opcode = this->ideal_Opcode(); 6564 int vlen_enc = Assembler::AVX_128bit; 6565 // Shift lower half and get word result in dst 6566 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6567 6568 // Shift upper half and get word result in vtmp1 6569 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6570 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6571 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6572 6573 // Merge and down convert the two word results to byte in dst 6574 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6575 %} 6576 ins_pipe( pipe_slow ); 6577 %} 6578 6579 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4, rRegP scratch) %{ 6580 predicate(Matcher::vector_length(n) == 32 && 6581 n->as_ShiftV()->is_var_shift() && 6582 !VM_Version::supports_avx512bw()); 6583 match(Set dst ( LShiftVB src shift)); 6584 match(Set dst ( RShiftVB src shift)); 6585 match(Set dst (URShiftVB src shift)); 6586 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP scratch); 6587 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 and $scratch as TEMP" %} 6588 ins_encode %{ 6589 assert(UseAVX >= 2, "required"); 6590 6591 int opcode = this->ideal_Opcode(); 6592 int vlen_enc = Assembler::AVX_128bit; 6593 // Process lower 128 bits and get result in dst 6594 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6595 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6596 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6597 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6598 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6599 6600 // Process higher 128 bits and get result in vtmp3 6601 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6602 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6603 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister, $scratch$$Register); 6604 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 6605 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 6606 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6607 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 6608 6609 // Merge the two results in dst 6610 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6611 %} 6612 ins_pipe( pipe_slow ); 6613 %} 6614 6615 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 6616 predicate(Matcher::vector_length(n) <= 32 && 6617 n->as_ShiftV()->is_var_shift() && 6618 VM_Version::supports_avx512bw()); 6619 match(Set dst ( LShiftVB src shift)); 6620 match(Set dst ( RShiftVB src shift)); 6621 match(Set dst (URShiftVB src shift)); 6622 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6623 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %} 6624 ins_encode %{ 6625 assert(UseAVX > 2, "required"); 6626 6627 int opcode = this->ideal_Opcode(); 6628 int vlen_enc = vector_length_encoding(this); 6629 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register); 6630 %} 6631 ins_pipe( pipe_slow ); 6632 %} 6633 6634 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 6635 predicate(Matcher::vector_length(n) == 64 && 6636 n->as_ShiftV()->is_var_shift() && 6637 VM_Version::supports_avx512bw()); 6638 match(Set dst ( LShiftVB src shift)); 6639 match(Set dst ( RShiftVB src shift)); 6640 match(Set dst (URShiftVB src shift)); 6641 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6642 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %} 6643 ins_encode %{ 6644 assert(UseAVX > 2, "required"); 6645 6646 int opcode = this->ideal_Opcode(); 6647 int vlen_enc = Assembler::AVX_256bit; 6648 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6649 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6650 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6651 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6652 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6653 %} 6654 ins_pipe( pipe_slow ); 6655 %} 6656 6657 // Short variable shift 6658 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 6659 predicate(Matcher::vector_length(n) <= 8 && 6660 n->as_ShiftV()->is_var_shift() && 6661 !VM_Version::supports_avx512bw()); 6662 match(Set dst ( LShiftVS src shift)); 6663 match(Set dst ( RShiftVS src shift)); 6664 match(Set dst (URShiftVS src shift)); 6665 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6666 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6667 ins_encode %{ 6668 assert(UseAVX >= 2, "required"); 6669 6670 int opcode = this->ideal_Opcode(); 6671 bool sign = (opcode != Op_URShiftVS); 6672 int vlen_enc = Assembler::AVX_256bit; 6673 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 6674 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 6675 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 6676 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6677 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 6678 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 6679 %} 6680 ins_pipe( pipe_slow ); 6681 %} 6682 6683 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 6684 predicate(Matcher::vector_length(n) == 16 && 6685 n->as_ShiftV()->is_var_shift() && 6686 !VM_Version::supports_avx512bw()); 6687 match(Set dst ( LShiftVS src shift)); 6688 match(Set dst ( RShiftVS src shift)); 6689 match(Set dst (URShiftVS src shift)); 6690 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6691 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6692 ins_encode %{ 6693 assert(UseAVX >= 2, "required"); 6694 6695 int opcode = this->ideal_Opcode(); 6696 bool sign = (opcode != Op_URShiftVS); 6697 int vlen_enc = Assembler::AVX_256bit; 6698 // Shift lower half, with result in vtmp2 usign vtmp1 as TEMP 6699 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6700 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6701 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6702 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6703 6704 // Shift upper half, with result in dst usign vtmp1 as TEMP 6705 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 6706 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 6707 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6708 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6709 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6710 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6711 6712 // Merge lower and upper half result into dst 6713 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6714 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6715 %} 6716 ins_pipe( pipe_slow ); 6717 %} 6718 6719 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 6720 predicate(n->as_ShiftV()->is_var_shift() && 6721 VM_Version::supports_avx512bw()); 6722 match(Set dst ( LShiftVS src shift)); 6723 match(Set dst ( RShiftVS src shift)); 6724 match(Set dst (URShiftVS src shift)); 6725 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 6726 ins_encode %{ 6727 assert(UseAVX > 2, "required"); 6728 6729 int opcode = this->ideal_Opcode(); 6730 int vlen_enc = vector_length_encoding(this); 6731 if (!VM_Version::supports_avx512vl()) { 6732 vlen_enc = Assembler::AVX_512bit; 6733 } 6734 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6735 %} 6736 ins_pipe( pipe_slow ); 6737 %} 6738 6739 //Integer variable shift 6740 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 6741 predicate(n->as_ShiftV()->is_var_shift()); 6742 match(Set dst ( LShiftVI src shift)); 6743 match(Set dst ( RShiftVI src shift)); 6744 match(Set dst (URShiftVI src shift)); 6745 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 6746 ins_encode %{ 6747 assert(UseAVX >= 2, "required"); 6748 6749 int opcode = this->ideal_Opcode(); 6750 int vlen_enc = vector_length_encoding(this); 6751 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6752 %} 6753 ins_pipe( pipe_slow ); 6754 %} 6755 6756 //Long variable shift 6757 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 6758 predicate(n->as_ShiftV()->is_var_shift()); 6759 match(Set dst ( LShiftVL src shift)); 6760 match(Set dst (URShiftVL src shift)); 6761 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 6762 ins_encode %{ 6763 assert(UseAVX >= 2, "required"); 6764 6765 int opcode = this->ideal_Opcode(); 6766 int vlen_enc = vector_length_encoding(this); 6767 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6768 %} 6769 ins_pipe( pipe_slow ); 6770 %} 6771 6772 //Long variable right shift arithmetic 6773 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 6774 predicate(Matcher::vector_length(n) <= 4 && 6775 n->as_ShiftV()->is_var_shift() && 6776 UseAVX == 2); 6777 match(Set dst (RShiftVL src shift)); 6778 effect(TEMP dst, TEMP vtmp); 6779 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 6780 ins_encode %{ 6781 int opcode = this->ideal_Opcode(); 6782 int vlen_enc = vector_length_encoding(this); 6783 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 6784 $vtmp$$XMMRegister); 6785 %} 6786 ins_pipe( pipe_slow ); 6787 %} 6788 6789 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 6790 predicate(n->as_ShiftV()->is_var_shift() && 6791 UseAVX > 2); 6792 match(Set dst (RShiftVL src shift)); 6793 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 6794 ins_encode %{ 6795 int opcode = this->ideal_Opcode(); 6796 int vlen_enc = vector_length_encoding(this); 6797 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6798 %} 6799 ins_pipe( pipe_slow ); 6800 %} 6801 6802 // --------------------------------- AND -------------------------------------- 6803 6804 instruct vand(vec dst, vec src) %{ 6805 predicate(UseAVX == 0); 6806 match(Set dst (AndV dst src)); 6807 format %{ "pand $dst,$src\t! and vectors" %} 6808 ins_encode %{ 6809 __ pand($dst$$XMMRegister, $src$$XMMRegister); 6810 %} 6811 ins_pipe( pipe_slow ); 6812 %} 6813 6814 instruct vand_reg(vec dst, vec src1, vec src2) %{ 6815 predicate(UseAVX > 0); 6816 match(Set dst (AndV src1 src2)); 6817 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 6818 ins_encode %{ 6819 int vlen_enc = vector_length_encoding(this); 6820 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6821 %} 6822 ins_pipe( pipe_slow ); 6823 %} 6824 6825 instruct vand_mem(vec dst, vec src, memory mem) %{ 6826 predicate((UseAVX > 0) && 6827 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6828 match(Set dst (AndV src (LoadVector mem))); 6829 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 6830 ins_encode %{ 6831 int vlen_enc = vector_length_encoding(this); 6832 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6833 %} 6834 ins_pipe( pipe_slow ); 6835 %} 6836 6837 // --------------------------------- OR --------------------------------------- 6838 6839 instruct vor(vec dst, vec src) %{ 6840 predicate(UseAVX == 0); 6841 match(Set dst (OrV dst src)); 6842 format %{ "por $dst,$src\t! or vectors" %} 6843 ins_encode %{ 6844 __ por($dst$$XMMRegister, $src$$XMMRegister); 6845 %} 6846 ins_pipe( pipe_slow ); 6847 %} 6848 6849 instruct vor_reg(vec dst, vec src1, vec src2) %{ 6850 predicate(UseAVX > 0); 6851 match(Set dst (OrV src1 src2)); 6852 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 6853 ins_encode %{ 6854 int vlen_enc = vector_length_encoding(this); 6855 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6856 %} 6857 ins_pipe( pipe_slow ); 6858 %} 6859 6860 instruct vor_mem(vec dst, vec src, memory mem) %{ 6861 predicate((UseAVX > 0) && 6862 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6863 match(Set dst (OrV src (LoadVector mem))); 6864 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 6865 ins_encode %{ 6866 int vlen_enc = vector_length_encoding(this); 6867 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6868 %} 6869 ins_pipe( pipe_slow ); 6870 %} 6871 6872 // --------------------------------- XOR -------------------------------------- 6873 6874 instruct vxor(vec dst, vec src) %{ 6875 predicate(UseAVX == 0); 6876 match(Set dst (XorV dst src)); 6877 format %{ "pxor $dst,$src\t! xor vectors" %} 6878 ins_encode %{ 6879 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 6880 %} 6881 ins_pipe( pipe_slow ); 6882 %} 6883 6884 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 6885 predicate(UseAVX > 0); 6886 match(Set dst (XorV src1 src2)); 6887 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 6888 ins_encode %{ 6889 int vlen_enc = vector_length_encoding(this); 6890 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6891 %} 6892 ins_pipe( pipe_slow ); 6893 %} 6894 6895 instruct vxor_mem(vec dst, vec src, memory mem) %{ 6896 predicate((UseAVX > 0) && 6897 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6898 match(Set dst (XorV src (LoadVector mem))); 6899 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 6900 ins_encode %{ 6901 int vlen_enc = vector_length_encoding(this); 6902 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6903 %} 6904 ins_pipe( pipe_slow ); 6905 %} 6906 6907 // --------------------------------- VectorCast -------------------------------------- 6908 6909 instruct vcastBtoX(vec dst, vec src) %{ 6910 match(Set dst (VectorCastB2X src)); 6911 format %{ "vector_cast_b2x $dst,$src\t!" %} 6912 ins_encode %{ 6913 assert(UseAVX > 0, "required"); 6914 6915 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 6916 int vlen_enc = vector_length_encoding(this); 6917 switch (to_elem_bt) { 6918 case T_SHORT: 6919 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6920 break; 6921 case T_INT: 6922 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6923 break; 6924 case T_FLOAT: 6925 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6926 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6927 break; 6928 case T_LONG: 6929 __ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6930 break; 6931 case T_DOUBLE: { 6932 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 6933 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 6934 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6935 break; 6936 } 6937 default: assert(false, "%s", type2name(to_elem_bt)); 6938 } 6939 %} 6940 ins_pipe( pipe_slow ); 6941 %} 6942 6943 instruct castStoX(vec dst, vec src, rRegP scratch) %{ 6944 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 6945 Matcher::vector_length(n->in(1)) <= 8 && // src 6946 Matcher::vector_element_basic_type(n) == T_BYTE); 6947 effect(TEMP scratch); 6948 match(Set dst (VectorCastS2X src)); 6949 format %{ "vector_cast_s2x $dst,$src\t! using $scratch as TEMP" %} 6950 ins_encode %{ 6951 assert(UseAVX > 0, "required"); 6952 6953 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, $scratch$$Register); 6954 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 6955 %} 6956 ins_pipe( pipe_slow ); 6957 %} 6958 6959 instruct vcastStoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ 6960 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 6961 Matcher::vector_length(n->in(1)) == 16 && // src 6962 Matcher::vector_element_basic_type(n) == T_BYTE); 6963 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6964 match(Set dst (VectorCastS2X src)); 6965 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp, $scratch as TEMP" %} 6966 ins_encode %{ 6967 assert(UseAVX > 0, "required"); 6968 6969 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 6970 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 6971 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 6972 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 6973 %} 6974 ins_pipe( pipe_slow ); 6975 %} 6976 6977 instruct vcastStoX_evex(vec dst, vec src) %{ 6978 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 6979 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 6980 match(Set dst (VectorCastS2X src)); 6981 format %{ "vector_cast_s2x $dst,$src\t!" %} 6982 ins_encode %{ 6983 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 6984 int src_vlen_enc = vector_length_encoding(this, $src); 6985 int vlen_enc = vector_length_encoding(this); 6986 switch (to_elem_bt) { 6987 case T_BYTE: 6988 if (!VM_Version::supports_avx512vl()) { 6989 vlen_enc = Assembler::AVX_512bit; 6990 } 6991 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 6992 break; 6993 case T_INT: 6994 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6995 break; 6996 case T_FLOAT: 6997 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6998 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6999 break; 7000 case T_LONG: 7001 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7002 break; 7003 case T_DOUBLE: { 7004 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7005 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7006 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7007 break; 7008 } 7009 default: 7010 ShouldNotReachHere(); 7011 } 7012 %} 7013 ins_pipe( pipe_slow ); 7014 %} 7015 7016 instruct castItoX(vec dst, vec src, rRegP scratch) %{ 7017 predicate(UseAVX <= 2 && 7018 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7019 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7020 match(Set dst (VectorCastI2X src)); 7021 format %{ "vector_cast_i2x $dst,$src\t! using $scratch as TEMP" %} 7022 effect(TEMP scratch); 7023 ins_encode %{ 7024 assert(UseAVX > 0, "required"); 7025 7026 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7027 int vlen_enc = vector_length_encoding(this, $src); 7028 7029 if (to_elem_bt == T_BYTE) { 7030 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register); 7031 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7032 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7033 } else { 7034 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7035 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 7036 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7037 } 7038 %} 7039 ins_pipe( pipe_slow ); 7040 %} 7041 7042 instruct vcastItoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ 7043 predicate(UseAVX <= 2 && 7044 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7045 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7046 match(Set dst (VectorCastI2X src)); 7047 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp and $scratch as TEMP" %} 7048 effect(TEMP dst, TEMP vtmp, TEMP scratch); 7049 ins_encode %{ 7050 assert(UseAVX > 0, "required"); 7051 7052 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7053 int vlen_enc = vector_length_encoding(this, $src); 7054 7055 if (to_elem_bt == T_BYTE) { 7056 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register); 7057 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7058 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7059 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7060 } else { 7061 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7062 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 7063 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7064 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7065 } 7066 %} 7067 ins_pipe( pipe_slow ); 7068 %} 7069 7070 instruct vcastItoX_evex(vec dst, vec src) %{ 7071 predicate(UseAVX > 2 || 7072 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7073 match(Set dst (VectorCastI2X src)); 7074 format %{ "vector_cast_i2x $dst,$src\t!" %} 7075 ins_encode %{ 7076 assert(UseAVX > 0, "required"); 7077 7078 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7079 int src_vlen_enc = vector_length_encoding(this, $src); 7080 int dst_vlen_enc = vector_length_encoding(this); 7081 switch (dst_elem_bt) { 7082 case T_BYTE: 7083 if (!VM_Version::supports_avx512vl()) { 7084 src_vlen_enc = Assembler::AVX_512bit; 7085 } 7086 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7087 break; 7088 case T_SHORT: 7089 if (!VM_Version::supports_avx512vl()) { 7090 src_vlen_enc = Assembler::AVX_512bit; 7091 } 7092 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7093 break; 7094 case T_FLOAT: 7095 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7096 break; 7097 case T_LONG: 7098 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7099 break; 7100 case T_DOUBLE: 7101 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7102 break; 7103 default: 7104 ShouldNotReachHere(); 7105 } 7106 %} 7107 ins_pipe( pipe_slow ); 7108 %} 7109 7110 instruct vcastLtoBS(vec dst, vec src, rRegP scratch) %{ 7111 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7112 UseAVX <= 2); 7113 match(Set dst (VectorCastL2X src)); 7114 effect(TEMP scratch); 7115 format %{ "vector_cast_l2x $dst,$src\t! using $scratch as TEMP" %} 7116 ins_encode %{ 7117 assert(UseAVX > 0, "required"); 7118 7119 int vlen = Matcher::vector_length_in_bytes(this, $src); 7120 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7121 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7122 : ExternalAddress(vector_int_to_short_mask()); 7123 if (vlen <= 16) { 7124 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7125 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register); 7126 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7127 } else { 7128 assert(vlen <= 32, "required"); 7129 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7130 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7131 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register); 7132 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7133 } 7134 if (to_elem_bt == T_BYTE) { 7135 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7136 } 7137 %} 7138 ins_pipe( pipe_slow ); 7139 %} 7140 7141 instruct vcastLtoX_evex(vec dst, vec src) %{ 7142 predicate(UseAVX > 2 || 7143 (Matcher::vector_element_basic_type(n) == T_INT || 7144 Matcher::vector_element_basic_type(n) == T_FLOAT || 7145 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7146 match(Set dst (VectorCastL2X src)); 7147 format %{ "vector_cast_l2x $dst,$src\t!" %} 7148 ins_encode %{ 7149 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7150 int vlen = Matcher::vector_length_in_bytes(this, $src); 7151 int vlen_enc = vector_length_encoding(this, $src); 7152 switch (to_elem_bt) { 7153 case T_BYTE: 7154 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7155 vlen_enc = Assembler::AVX_512bit; 7156 } 7157 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7158 break; 7159 case T_SHORT: 7160 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7161 vlen_enc = Assembler::AVX_512bit; 7162 } 7163 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7164 break; 7165 case T_INT: 7166 if (vlen == 8) { 7167 if ($dst$$XMMRegister != $src$$XMMRegister) { 7168 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7169 } 7170 } else if (vlen == 16) { 7171 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7172 } else if (vlen == 32) { 7173 if (UseAVX > 2) { 7174 if (!VM_Version::supports_avx512vl()) { 7175 vlen_enc = Assembler::AVX_512bit; 7176 } 7177 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7178 } else { 7179 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7180 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7181 } 7182 } else { // vlen == 64 7183 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7184 } 7185 break; 7186 case T_FLOAT: 7187 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7188 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7189 break; 7190 case T_DOUBLE: 7191 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7192 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7193 break; 7194 7195 default: assert(false, "%s", type2name(to_elem_bt)); 7196 } 7197 %} 7198 ins_pipe( pipe_slow ); 7199 %} 7200 7201 instruct vcastFtoD_reg(vec dst, vec src) %{ 7202 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7203 match(Set dst (VectorCastF2X src)); 7204 format %{ "vector_cast_f2d $dst,$src\t!" %} 7205 ins_encode %{ 7206 int vlen_enc = vector_length_encoding(this); 7207 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7208 %} 7209 ins_pipe( pipe_slow ); 7210 %} 7211 7212 instruct vcastFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{ 7213 predicate(!VM_Version::supports_avx512vl() && 7214 Matcher::vector_length_in_bytes(n) < 64 && 7215 Matcher::vector_element_basic_type(n) == T_INT); 7216 match(Set dst (VectorCastF2X src)); 7217 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP scratch, KILL cr); 7218 format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7219 ins_encode %{ 7220 int vlen_enc = vector_length_encoding(this); 7221 __ vector_castF2I_avx($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7222 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7223 ExternalAddress(vector_float_signflip()), $scratch$$Register, vlen_enc); 7224 %} 7225 ins_pipe( pipe_slow ); 7226 %} 7227 7228 instruct vcastFtoI_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{ 7229 predicate((VM_Version::supports_avx512vl() || 7230 Matcher::vector_length_in_bytes(n) == 64) && 7231 Matcher::vector_element_basic_type(n) == T_INT); 7232 match(Set dst (VectorCastF2X src)); 7233 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr); 7234 format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7235 ins_encode %{ 7236 int vlen_enc = vector_length_encoding(this); 7237 __ vector_castF2I_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7238 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7239 ExternalAddress(vector_float_signflip()), $scratch$$Register, vlen_enc); 7240 %} 7241 ins_pipe( pipe_slow ); 7242 %} 7243 7244 instruct vcastDtoF_reg(vec dst, vec src) %{ 7245 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7246 match(Set dst (VectorCastD2X src)); 7247 format %{ "vector_cast_d2x $dst,$src\t!" %} 7248 ins_encode %{ 7249 int vlen_enc = vector_length_encoding(this, $src); 7250 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7251 %} 7252 ins_pipe( pipe_slow ); 7253 %} 7254 7255 instruct vcastDtoL_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{ 7256 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7257 match(Set dst (VectorCastD2X src)); 7258 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr); 7259 format %{ "vector_cast_d2l $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7260 ins_encode %{ 7261 int vlen_enc = vector_length_encoding(this); 7262 __ vector_castD2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7263 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7264 ExternalAddress(vector_double_signflip()), $scratch$$Register, vlen_enc); 7265 %} 7266 ins_pipe( pipe_slow ); 7267 %} 7268 7269 instruct vucast(vec dst, vec src) %{ 7270 match(Set dst (VectorUCastB2X src)); 7271 match(Set dst (VectorUCastS2X src)); 7272 match(Set dst (VectorUCastI2X src)); 7273 format %{ "vector_ucast $dst,$src\t!" %} 7274 ins_encode %{ 7275 assert(UseAVX > 0, "required"); 7276 7277 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7278 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7279 int vlen_enc = vector_length_encoding(this); 7280 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7281 %} 7282 ins_pipe( pipe_slow ); 7283 %} 7284 7285 // --------------------------------- VectorMaskCmp -------------------------------------- 7286 7287 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7288 predicate(n->bottom_type()->isa_vectmask() == NULL && 7289 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7290 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7291 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7292 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7293 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7294 ins_encode %{ 7295 int vlen_enc = vector_length_encoding(this, $src1); 7296 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7297 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7298 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7299 } else { 7300 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7301 } 7302 %} 7303 ins_pipe( pipe_slow ); 7304 %} 7305 7306 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{ 7307 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7308 n->bottom_type()->isa_vectmask() == NULL && 7309 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7310 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7311 effect(TEMP scratch, TEMP ktmp); 7312 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} 7313 ins_encode %{ 7314 int vlen_enc = Assembler::AVX_512bit; 7315 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7316 KRegister mask = k0; // The comparison itself is not being masked. 7317 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7318 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7319 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register); 7320 } else { 7321 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7322 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register); 7323 } 7324 %} 7325 ins_pipe( pipe_slow ); 7326 %} 7327 7328 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7329 predicate(n->bottom_type()->isa_vectmask() && 7330 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7331 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7332 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7333 ins_encode %{ 7334 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7335 int vlen_enc = vector_length_encoding(this, $src1); 7336 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7337 KRegister mask = k0; // The comparison itself is not being masked. 7338 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7339 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7340 } else { 7341 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7342 } 7343 %} 7344 ins_pipe( pipe_slow ); 7345 %} 7346 7347 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7348 predicate(n->bottom_type()->isa_vectmask() == NULL && 7349 !is_unsigned_booltest_pred(n->in(2)->get_int()) && 7350 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7351 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7352 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7353 (n->in(2)->get_int() == BoolTest::eq || 7354 n->in(2)->get_int() == BoolTest::lt || 7355 n->in(2)->get_int() == BoolTest::gt)); // cond 7356 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7357 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7358 ins_encode %{ 7359 int vlen_enc = vector_length_encoding(this, $src1); 7360 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7361 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7362 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 7363 %} 7364 ins_pipe( pipe_slow ); 7365 %} 7366 7367 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7368 predicate(n->bottom_type()->isa_vectmask() == NULL && 7369 !is_unsigned_booltest_pred(n->in(2)->get_int()) && 7370 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7371 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7372 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7373 (n->in(2)->get_int() == BoolTest::ne || 7374 n->in(2)->get_int() == BoolTest::le || 7375 n->in(2)->get_int() == BoolTest::ge)); // cond 7376 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7377 effect(TEMP dst, TEMP xtmp); 7378 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7379 ins_encode %{ 7380 int vlen_enc = vector_length_encoding(this, $src1); 7381 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7382 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7383 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7384 %} 7385 ins_pipe( pipe_slow ); 7386 %} 7387 7388 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7389 predicate(n->bottom_type()->isa_vectmask() == NULL && 7390 is_unsigned_booltest_pred(n->in(2)->get_int()) && 7391 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7392 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7393 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7394 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7395 effect(TEMP dst, TEMP xtmp); 7396 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7397 ins_encode %{ 7398 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 7399 int vlen_enc = vector_length_encoding(this, $src1); 7400 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7401 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7402 7403 if (vlen_enc == Assembler::AVX_128bit) { 7404 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7405 } else { 7406 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7407 } 7408 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7409 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7410 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7411 %} 7412 ins_pipe( pipe_slow ); 7413 %} 7414 7415 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{ 7416 predicate((n->bottom_type()->isa_vectmask() == NULL && 7417 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 7418 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7419 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7420 effect(TEMP scratch, TEMP ktmp); 7421 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} 7422 ins_encode %{ 7423 assert(UseAVX > 2, "required"); 7424 7425 int vlen_enc = vector_length_encoding(this, $src1); 7426 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7427 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 7428 KRegister mask = k0; // The comparison itself is not being masked. 7429 bool merge = false; 7430 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7431 7432 switch (src1_elem_bt) { 7433 case T_INT: { 7434 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7435 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 7436 break; 7437 } 7438 case T_LONG: { 7439 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7440 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 7441 break; 7442 } 7443 default: assert(false, "%s", type2name(src1_elem_bt)); 7444 } 7445 %} 7446 ins_pipe( pipe_slow ); 7447 %} 7448 7449 7450 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 7451 predicate(n->bottom_type()->isa_vectmask() && 7452 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7453 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7454 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 7455 ins_encode %{ 7456 assert(UseAVX > 2, "required"); 7457 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7458 7459 int vlen_enc = vector_length_encoding(this, $src1); 7460 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7461 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 7462 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7463 7464 // Comparison i 7465 switch (src1_elem_bt) { 7466 case T_BYTE: { 7467 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7468 break; 7469 } 7470 case T_SHORT: { 7471 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7472 break; 7473 } 7474 case T_INT: { 7475 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7476 break; 7477 } 7478 case T_LONG: { 7479 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7480 break; 7481 } 7482 default: assert(false, "%s", type2name(src1_elem_bt)); 7483 } 7484 %} 7485 ins_pipe( pipe_slow ); 7486 %} 7487 7488 // Extract 7489 7490 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 7491 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 7492 match(Set dst (ExtractI src idx)); 7493 match(Set dst (ExtractS src idx)); 7494 #ifdef _LP64 7495 match(Set dst (ExtractB src idx)); 7496 #endif 7497 format %{ "extractI $dst,$src,$idx\t!" %} 7498 ins_encode %{ 7499 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7500 7501 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 7502 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7503 %} 7504 ins_pipe( pipe_slow ); 7505 %} 7506 7507 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 7508 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 7509 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 7510 match(Set dst (ExtractI src idx)); 7511 match(Set dst (ExtractS src idx)); 7512 #ifdef _LP64 7513 match(Set dst (ExtractB src idx)); 7514 #endif 7515 effect(TEMP vtmp); 7516 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 7517 ins_encode %{ 7518 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7519 7520 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 7521 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7522 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 7523 %} 7524 ins_pipe( pipe_slow ); 7525 %} 7526 7527 #ifdef _LP64 7528 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 7529 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 7530 match(Set dst (ExtractL src idx)); 7531 format %{ "extractL $dst,$src,$idx\t!" %} 7532 ins_encode %{ 7533 assert(UseSSE >= 4, "required"); 7534 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7535 7536 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7537 %} 7538 ins_pipe( pipe_slow ); 7539 %} 7540 7541 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 7542 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 7543 Matcher::vector_length(n->in(1)) == 8); // src 7544 match(Set dst (ExtractL src idx)); 7545 effect(TEMP vtmp); 7546 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 7547 ins_encode %{ 7548 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7549 7550 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7551 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 7552 %} 7553 ins_pipe( pipe_slow ); 7554 %} 7555 #endif 7556 7557 instruct extractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ 7558 predicate(Matcher::vector_length(n->in(1)) <= 4); 7559 match(Set dst (ExtractF src idx)); 7560 effect(TEMP dst, TEMP tmp, TEMP vtmp); 7561 format %{ "extractF $dst,$src,$idx\t! using $tmp, $vtmp as TEMP" %} 7562 ins_encode %{ 7563 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7564 7565 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $tmp$$Register, $vtmp$$XMMRegister); 7566 %} 7567 ins_pipe( pipe_slow ); 7568 %} 7569 7570 instruct vextractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ 7571 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 7572 Matcher::vector_length(n->in(1)/*src*/) == 16); 7573 match(Set dst (ExtractF src idx)); 7574 effect(TEMP tmp, TEMP vtmp); 7575 format %{ "vextractF $dst,$src,$idx\t! using $tmp, $vtmp as TEMP" %} 7576 ins_encode %{ 7577 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7578 7579 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7580 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant, $tmp$$Register); 7581 %} 7582 ins_pipe( pipe_slow ); 7583 %} 7584 7585 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 7586 predicate(Matcher::vector_length(n->in(1)) == 2); // src 7587 match(Set dst (ExtractD src idx)); 7588 format %{ "extractD $dst,$src,$idx\t!" %} 7589 ins_encode %{ 7590 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7591 7592 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7593 %} 7594 ins_pipe( pipe_slow ); 7595 %} 7596 7597 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 7598 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 7599 Matcher::vector_length(n->in(1)) == 8); // src 7600 match(Set dst (ExtractD src idx)); 7601 effect(TEMP vtmp); 7602 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 7603 ins_encode %{ 7604 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7605 7606 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7607 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 7608 %} 7609 ins_pipe( pipe_slow ); 7610 %} 7611 7612 // --------------------------------- Vector Blend -------------------------------------- 7613 7614 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 7615 predicate(UseAVX == 0); 7616 match(Set dst (VectorBlend (Binary dst src) mask)); 7617 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 7618 effect(TEMP tmp); 7619 ins_encode %{ 7620 assert(UseSSE >= 4, "required"); 7621 7622 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 7623 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 7624 } 7625 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 7626 %} 7627 ins_pipe( pipe_slow ); 7628 %} 7629 7630 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7631 predicate(UseAVX > 0 && 7632 n->in(2)->bottom_type()->isa_vectmask() == NULL && 7633 Matcher::vector_length_in_bytes(n) <= 32 && 7634 is_integral_type(Matcher::vector_element_basic_type(n))); 7635 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7636 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7637 ins_encode %{ 7638 int vlen_enc = vector_length_encoding(this); 7639 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7640 %} 7641 ins_pipe( pipe_slow ); 7642 %} 7643 7644 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7645 predicate(UseAVX > 0 && 7646 n->in(2)->bottom_type()->isa_vectmask() == NULL && 7647 Matcher::vector_length_in_bytes(n) <= 32 && 7648 !is_integral_type(Matcher::vector_element_basic_type(n))); 7649 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7650 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7651 ins_encode %{ 7652 int vlen_enc = vector_length_encoding(this); 7653 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7654 %} 7655 ins_pipe( pipe_slow ); 7656 %} 7657 7658 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch, kReg ktmp) %{ 7659 predicate(Matcher::vector_length_in_bytes(n) == 64 && 7660 n->in(2)->bottom_type()->isa_vectmask() == NULL); 7661 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7662 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %} 7663 effect(TEMP scratch, TEMP ktmp); 7664 ins_encode %{ 7665 int vlen_enc = Assembler::AVX_512bit; 7666 BasicType elem_bt = Matcher::vector_element_basic_type(this); 7667 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, $scratch$$Register); 7668 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7669 %} 7670 ins_pipe( pipe_slow ); 7671 %} 7672 7673 7674 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask, rRegP scratch) %{ 7675 predicate(n->in(2)->bottom_type()->isa_vectmask() && 7676 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 7677 VM_Version::supports_avx512bw())); 7678 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7679 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %} 7680 effect(TEMP scratch); 7681 ins_encode %{ 7682 int vlen_enc = vector_length_encoding(this); 7683 BasicType elem_bt = Matcher::vector_element_basic_type(this); 7684 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7685 %} 7686 ins_pipe( pipe_slow ); 7687 %} 7688 7689 // --------------------------------- ABS -------------------------------------- 7690 // a = |a| 7691 instruct vabsB_reg(vec dst, vec src) %{ 7692 match(Set dst (AbsVB src)); 7693 ins_cost(450); 7694 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 7695 ins_encode %{ 7696 uint vlen = Matcher::vector_length(this); 7697 if (vlen <= 16) { 7698 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 7699 } else { 7700 int vlen_enc = vector_length_encoding(this); 7701 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7702 } 7703 %} 7704 ins_pipe( pipe_slow ); 7705 %} 7706 7707 instruct vabsS_reg(vec dst, vec src) %{ 7708 match(Set dst (AbsVS src)); 7709 ins_cost(450); 7710 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 7711 ins_encode %{ 7712 uint vlen = Matcher::vector_length(this); 7713 if (vlen <= 8) { 7714 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 7715 } else { 7716 int vlen_enc = vector_length_encoding(this); 7717 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7718 } 7719 %} 7720 ins_pipe( pipe_slow ); 7721 %} 7722 7723 instruct vabsI_reg(vec dst, vec src) %{ 7724 match(Set dst (AbsVI src)); 7725 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 7726 ins_cost(250); 7727 ins_encode %{ 7728 uint vlen = Matcher::vector_length(this); 7729 if (vlen <= 4) { 7730 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 7731 } else { 7732 int vlen_enc = vector_length_encoding(this); 7733 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7734 } 7735 %} 7736 ins_pipe( pipe_slow ); 7737 %} 7738 7739 instruct vabsL_reg(vec dst, vec src) %{ 7740 match(Set dst (AbsVL src)); 7741 ins_cost(450); 7742 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 7743 ins_encode %{ 7744 assert(UseAVX > 2, "required"); 7745 int vlen_enc = vector_length_encoding(this); 7746 if (!VM_Version::supports_avx512vl()) { 7747 vlen_enc = Assembler::AVX_512bit; 7748 } 7749 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7750 %} 7751 ins_pipe( pipe_slow ); 7752 %} 7753 7754 // --------------------------------- ABSNEG -------------------------------------- 7755 7756 instruct vabsnegF(vec dst, vec src, rRegI scratch) %{ 7757 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 7758 match(Set dst (AbsVF src)); 7759 match(Set dst (NegVF src)); 7760 effect(TEMP scratch); 7761 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 7762 ins_cost(150); 7763 ins_encode %{ 7764 int opcode = this->ideal_Opcode(); 7765 int vlen = Matcher::vector_length(this); 7766 if (vlen == 2) { 7767 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 7768 } else { 7769 assert(vlen == 8 || vlen == 16, "required"); 7770 int vlen_enc = vector_length_encoding(this); 7771 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 7772 } 7773 %} 7774 ins_pipe( pipe_slow ); 7775 %} 7776 7777 instruct vabsneg4F(vec dst, rRegI scratch) %{ 7778 predicate(Matcher::vector_length(n) == 4); 7779 match(Set dst (AbsVF dst)); 7780 match(Set dst (NegVF dst)); 7781 effect(TEMP scratch); 7782 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 7783 ins_cost(150); 7784 ins_encode %{ 7785 int opcode = this->ideal_Opcode(); 7786 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $scratch$$Register); 7787 %} 7788 ins_pipe( pipe_slow ); 7789 %} 7790 7791 instruct vabsnegD(vec dst, vec src, rRegI scratch) %{ 7792 match(Set dst (AbsVD src)); 7793 match(Set dst (NegVD src)); 7794 effect(TEMP scratch); 7795 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 7796 ins_encode %{ 7797 int opcode = this->ideal_Opcode(); 7798 uint vlen = Matcher::vector_length(this); 7799 if (vlen == 2) { 7800 assert(UseSSE >= 2, "required"); 7801 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 7802 } else { 7803 int vlen_enc = vector_length_encoding(this); 7804 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 7805 } 7806 %} 7807 ins_pipe( pipe_slow ); 7808 %} 7809 7810 //------------------------------------- VectorTest -------------------------------------------- 7811 7812 #ifdef _LP64 7813 instruct vptest_alltrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp1, legVec vtmp2, rFlagsReg cr) %{ 7814 predicate(!VM_Version::supports_avx512bwdq() && 7815 Matcher::vector_length_in_bytes(n->in(1)) >= 4 && 7816 Matcher::vector_length_in_bytes(n->in(1)) < 16 && 7817 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 7818 match(Set dst (VectorTest src1 src2 )); 7819 effect(TEMP vtmp1, TEMP vtmp2, KILL cr); 7820 format %{ "vptest_alltrue_lt16 $dst,$src1, $src2\t! using $vtmp1, $vtmp2 and $cr as TEMP" %} 7821 ins_encode %{ 7822 int vlen = Matcher::vector_length_in_bytes(this, $src1); 7823 __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 7824 __ setb(Assembler::carrySet, $dst$$Register); 7825 __ movzbl($dst$$Register, $dst$$Register); 7826 %} 7827 ins_pipe( pipe_slow ); 7828 %} 7829 7830 instruct vptest_alltrue_ge16(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ 7831 predicate(!VM_Version::supports_avx512bwdq() && 7832 Matcher::vector_length_in_bytes(n->in(1)) >= 16 && 7833 Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7834 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 7835 match(Set dst (VectorTest src1 src2 )); 7836 effect(KILL cr); 7837 format %{ "vptest_alltrue_ge16 $dst,$src1, $src2\t! using $cr as TEMP" %} 7838 ins_encode %{ 7839 int vlen = Matcher::vector_length_in_bytes(this, $src1); 7840 __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); 7841 __ setb(Assembler::carrySet, $dst$$Register); 7842 __ movzbl($dst$$Register, $dst$$Register); 7843 %} 7844 ins_pipe( pipe_slow ); 7845 %} 7846 7847 instruct vptest_alltrue_lt8_evex(rRegI dst, kReg src1, kReg src2, kReg kscratch, rFlagsReg cr) %{ 7848 predicate(VM_Version::supports_avx512bwdq() && 7849 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow && 7850 n->in(1)->bottom_type()->isa_vectmask() && 7851 Matcher::vector_length(n->in(1)) < 8); 7852 match(Set dst (VectorTest src1 src2)); 7853 effect(KILL cr, TEMP kscratch); 7854 format %{ "vptest_alltrue_lt8_evex $dst,$src1,$src2\t! using $cr as TEMP" %} 7855 ins_encode %{ 7856 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 7857 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 7858 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 7859 uint masklen = Matcher::vector_length(this, $src1); 7860 __ alltrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister, $kscratch$$KRegister); 7861 %} 7862 ins_pipe( pipe_slow ); 7863 %} 7864 7865 7866 instruct vptest_alltrue_ge8_evex(rRegI dst, kReg src1, kReg src2, rFlagsReg cr) %{ 7867 predicate(VM_Version::supports_avx512bwdq() && 7868 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow && 7869 n->in(1)->bottom_type()->isa_vectmask() && 7870 Matcher::vector_length(n->in(1)) >= 8); 7871 match(Set dst (VectorTest src1 src2)); 7872 effect(KILL cr); 7873 format %{ "vptest_alltrue_ge8_evex $dst,$src1,$src2\t! using $cr as TEMP" %} 7874 ins_encode %{ 7875 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 7876 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 7877 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 7878 uint masklen = Matcher::vector_length(this, $src1); 7879 __ alltrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister, knoreg); 7880 %} 7881 ins_pipe( pipe_slow ); 7882 %} 7883 7884 7885 instruct vptest_anytrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp, rFlagsReg cr) %{ 7886 predicate(!VM_Version::supports_avx512bwdq() && 7887 Matcher::vector_length_in_bytes(n->in(1)) >= 4 && 7888 Matcher::vector_length_in_bytes(n->in(1)) < 16 && 7889 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 7890 match(Set dst (VectorTest src1 src2 )); 7891 effect(TEMP vtmp, KILL cr); 7892 format %{ "vptest_anytrue_lt16 $dst,$src1,$src2\t! using $vtmp, $cr as TEMP" %} 7893 ins_encode %{ 7894 int vlen = Matcher::vector_length_in_bytes(this, $src1); 7895 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 7896 __ setb(Assembler::notZero, $dst$$Register); 7897 __ movzbl($dst$$Register, $dst$$Register); 7898 %} 7899 ins_pipe( pipe_slow ); 7900 %} 7901 7902 instruct vptest_anytrue_ge16(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ 7903 predicate(!VM_Version::supports_avx512bwdq() && 7904 Matcher::vector_length_in_bytes(n->in(1)) >= 16 && 7905 Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7906 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 7907 match(Set dst (VectorTest src1 src2 )); 7908 effect(KILL cr); 7909 format %{ "vptest_anytrue_ge16 $dst,$src1,$src2\t! using $cr as TEMP" %} 7910 ins_encode %{ 7911 int vlen = Matcher::vector_length_in_bytes(this, $src1); 7912 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); 7913 __ setb(Assembler::notZero, $dst$$Register); 7914 __ movzbl($dst$$Register, $dst$$Register); 7915 %} 7916 ins_pipe( pipe_slow ); 7917 %} 7918 7919 instruct vptest_anytrue_evex(rRegI dst, kReg src1, kReg src2, rFlagsReg cr) %{ 7920 predicate(VM_Version::supports_avx512bwdq() && 7921 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 7922 match(Set dst (VectorTest src1 src2)); 7923 effect(KILL cr); 7924 format %{ "vptest_anytrue_lt8_evex $dst,$src1,$src2\t! using $cr as TEMP" %} 7925 ins_encode %{ 7926 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 7927 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 7928 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 7929 uint masklen = Matcher::vector_length(this, $src1); 7930 __ anytrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister); 7931 %} 7932 ins_pipe( pipe_slow ); 7933 %} 7934 7935 instruct cmpvptest_anytrue_lt16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero, legVec vtmp) %{ 7936 predicate(!VM_Version::supports_avx512bwdq() && 7937 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && 7938 Matcher::vector_length_in_bytes(n->in(1)->in(1)) < 16 && 7939 static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne); 7940 match(Set cr (CmpI (VectorTest src1 src2) zero)); 7941 effect(TEMP vtmp); 7942 format %{ "cmpvptest_anytrue_lt16 $src1,$src2\t! using $vtmp as TEMP" %} 7943 ins_encode %{ 7944 int vlen = Matcher::vector_length_in_bytes(this, $src1); 7945 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 7946 %} 7947 ins_pipe( pipe_slow ); 7948 %} 7949 7950 instruct cmpvptest_anytrue_ge16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero) %{ 7951 predicate(!VM_Version::supports_avx512bwdq() && 7952 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 16 && 7953 Matcher::vector_length_in_bytes(n->in(1)->in(1)) < 64 && 7954 static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne); 7955 match(Set cr (CmpI (VectorTest src1 src2) zero)); 7956 format %{ "cmpvptest_anytrue_ge16 $src1,$src2\t!" %} 7957 ins_encode %{ 7958 int vlen = Matcher::vector_length_in_bytes(this, $src1); 7959 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); 7960 %} 7961 ins_pipe( pipe_slow ); 7962 %} 7963 7964 instruct cmpvptest_anytrue_evex(rFlagsReg cr, kReg src1, kReg src2, immI_0 zero) %{ 7965 predicate(VM_Version::supports_avx512bwdq() && 7966 static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne); 7967 match(Set cr (CmpI (VectorTest src1 src2) zero)); 7968 format %{ "cmpvptest_anytrue_evex $src1,$src2\t!" %} 7969 ins_encode %{ 7970 uint masklen = Matcher::vector_length(this, $src1); 7971 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 7972 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 7973 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 7974 masklen = masklen < 8 ? 8 : masklen; 7975 __ ktest(masklen, $src1$$KRegister, $src2$$KRegister); 7976 %} 7977 ins_pipe( pipe_slow ); 7978 %} 7979 #endif 7980 7981 //------------------------------------- LoadMask -------------------------------------------- 7982 7983 instruct loadMask(legVec dst, legVec src) %{ 7984 predicate(n->bottom_type()->isa_vectmask() == NULL && !VM_Version::supports_avx512vlbw()); 7985 match(Set dst (VectorLoadMask src)); 7986 effect(TEMP dst); 7987 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 7988 ins_encode %{ 7989 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 7990 BasicType elem_bt = Matcher::vector_element_basic_type(this); 7991 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 7992 %} 7993 ins_pipe( pipe_slow ); 7994 %} 7995 7996 instruct loadMask64(kReg dst, vec src, vec xtmp, rRegI tmp) %{ 7997 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 7998 match(Set dst (VectorLoadMask src)); 7999 effect(TEMP xtmp, TEMP tmp); 8000 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp and $tmp as TEMP" %} 8001 ins_encode %{ 8002 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8003 $tmp$$Register, true, Assembler::AVX_512bit); 8004 %} 8005 ins_pipe( pipe_slow ); 8006 %} 8007 8008 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8009 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8010 match(Set dst (VectorLoadMask src)); 8011 effect(TEMP xtmp); 8012 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8013 ins_encode %{ 8014 int vlen_enc = vector_length_encoding(in(1)); 8015 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8016 noreg, false, vlen_enc); 8017 %} 8018 ins_pipe( pipe_slow ); 8019 %} 8020 8021 //------------------------------------- StoreMask -------------------------------------------- 8022 8023 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8024 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8025 match(Set dst (VectorStoreMask src size)); 8026 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8027 ins_encode %{ 8028 int vlen = Matcher::vector_length(this); 8029 if (vlen <= 16 && UseAVX <= 2) { 8030 assert(UseSSE >= 3, "required"); 8031 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8032 } else { 8033 assert(UseAVX > 0, "required"); 8034 int src_vlen_enc = vector_length_encoding(this, $src); 8035 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8036 } 8037 %} 8038 ins_pipe( pipe_slow ); 8039 %} 8040 8041 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8042 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8043 match(Set dst (VectorStoreMask src size)); 8044 effect(TEMP_DEF dst, TEMP xtmp); 8045 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8046 ins_encode %{ 8047 int vlen_enc = Assembler::AVX_128bit; 8048 int vlen = Matcher::vector_length(this); 8049 if (vlen <= 8) { 8050 assert(UseSSE >= 3, "required"); 8051 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8052 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8053 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8054 } else { 8055 assert(UseAVX > 0, "required"); 8056 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8057 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8058 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8059 } 8060 %} 8061 ins_pipe( pipe_slow ); 8062 %} 8063 8064 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8065 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8066 match(Set dst (VectorStoreMask src size)); 8067 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8068 effect(TEMP_DEF dst, TEMP xtmp); 8069 ins_encode %{ 8070 int vlen_enc = Assembler::AVX_128bit; 8071 int vlen = Matcher::vector_length(this); 8072 if (vlen <= 4) { 8073 assert(UseSSE >= 3, "required"); 8074 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8075 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8076 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8077 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8078 } else { 8079 assert(UseAVX > 0, "required"); 8080 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8081 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8082 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8083 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8084 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8085 } 8086 %} 8087 ins_pipe( pipe_slow ); 8088 %} 8089 8090 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8091 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8092 match(Set dst (VectorStoreMask src size)); 8093 effect(TEMP_DEF dst, TEMP xtmp); 8094 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8095 ins_encode %{ 8096 assert(UseSSE >= 3, "required"); 8097 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8098 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8099 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8100 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8101 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8102 %} 8103 ins_pipe( pipe_slow ); 8104 %} 8105 8106 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8107 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8108 match(Set dst (VectorStoreMask src size)); 8109 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8110 effect(TEMP_DEF dst, TEMP vtmp); 8111 ins_encode %{ 8112 int vlen_enc = Assembler::AVX_128bit; 8113 __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8114 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8115 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8116 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8117 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8118 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8119 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8120 %} 8121 ins_pipe( pipe_slow ); 8122 %} 8123 8124 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8125 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8126 match(Set dst (VectorStoreMask src size)); 8127 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8128 ins_encode %{ 8129 int src_vlen_enc = vector_length_encoding(this, $src); 8130 int dst_vlen_enc = vector_length_encoding(this); 8131 if (!VM_Version::supports_avx512vl()) { 8132 src_vlen_enc = Assembler::AVX_512bit; 8133 } 8134 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8135 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8136 %} 8137 ins_pipe( pipe_slow ); 8138 %} 8139 8140 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8141 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8142 match(Set dst (VectorStoreMask src size)); 8143 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8144 ins_encode %{ 8145 int src_vlen_enc = vector_length_encoding(this, $src); 8146 int dst_vlen_enc = vector_length_encoding(this); 8147 if (!VM_Version::supports_avx512vl()) { 8148 src_vlen_enc = Assembler::AVX_512bit; 8149 } 8150 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8151 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8152 %} 8153 ins_pipe( pipe_slow ); 8154 %} 8155 8156 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size, rRegI tmp) %{ 8157 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8158 match(Set dst (VectorStoreMask mask size)); 8159 effect(TEMP_DEF dst, TEMP tmp); 8160 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8161 ins_encode %{ 8162 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8163 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8164 false, Assembler::AVX_512bit, $tmp$$Register); 8165 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8166 %} 8167 ins_pipe( pipe_slow ); 8168 %} 8169 8170 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8171 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8172 match(Set dst (VectorStoreMask mask size)); 8173 effect(TEMP_DEF dst); 8174 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8175 ins_encode %{ 8176 int dst_vlen_enc = vector_length_encoding(this); 8177 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8178 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8179 %} 8180 ins_pipe( pipe_slow ); 8181 %} 8182 8183 instruct vmaskcast_evex(kReg dst) %{ 8184 predicate(Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); 8185 match(Set dst (VectorMaskCast dst)); 8186 ins_cost(0); 8187 format %{ "vector_mask_cast $dst" %} 8188 ins_encode %{ 8189 // empty 8190 %} 8191 ins_pipe(empty); 8192 %} 8193 8194 instruct vmaskcast(vec dst) %{ 8195 predicate((Matcher::vector_length(n) == Matcher::vector_length(n->in(1))) && 8196 (Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)))); 8197 match(Set dst (VectorMaskCast dst)); 8198 ins_cost(0); 8199 format %{ "vector_mask_cast $dst" %} 8200 ins_encode %{ 8201 // empty 8202 %} 8203 ins_pipe(empty); 8204 %} 8205 8206 //-------------------------------- Load Iota Indices ---------------------------------- 8207 8208 instruct loadIotaIndices(vec dst, immI_0 src, rRegP scratch) %{ 8209 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 8210 match(Set dst (VectorLoadConst src)); 8211 effect(TEMP scratch); 8212 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8213 ins_encode %{ 8214 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8215 __ load_iota_indices($dst$$XMMRegister, $scratch$$Register, vlen_in_bytes); 8216 %} 8217 ins_pipe( pipe_slow ); 8218 %} 8219 8220 //-------------------------------- Rearrange ---------------------------------- 8221 8222 // LoadShuffle/Rearrange for Byte 8223 8224 instruct loadShuffleB(vec dst) %{ 8225 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 8226 match(Set dst (VectorLoadShuffle dst)); 8227 format %{ "vector_load_shuffle $dst, $dst" %} 8228 ins_encode %{ 8229 // empty 8230 %} 8231 ins_pipe( pipe_slow ); 8232 %} 8233 8234 instruct rearrangeB(vec dst, vec shuffle) %{ 8235 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8236 Matcher::vector_length(n) < 32); 8237 match(Set dst (VectorRearrange dst shuffle)); 8238 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8239 ins_encode %{ 8240 assert(UseSSE >= 4, "required"); 8241 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8242 %} 8243 ins_pipe( pipe_slow ); 8244 %} 8245 8246 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2, rRegP scratch) %{ 8247 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8248 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8249 match(Set dst (VectorRearrange src shuffle)); 8250 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 8251 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2, $scratch as TEMP" %} 8252 ins_encode %{ 8253 assert(UseAVX >= 2, "required"); 8254 // Swap src into vtmp1 8255 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8256 // Shuffle swapped src to get entries from other 128 bit lane 8257 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8258 // Shuffle original src to get entries from self 128 bit lane 8259 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8260 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8261 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, $scratch$$Register); 8262 // Perform the blend 8263 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8264 %} 8265 ins_pipe( pipe_slow ); 8266 %} 8267 8268 instruct rearrangeB_evex(vec dst, vec src, vec shuffle) %{ 8269 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8270 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8271 match(Set dst (VectorRearrange src shuffle)); 8272 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8273 ins_encode %{ 8274 int vlen_enc = vector_length_encoding(this); 8275 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8276 %} 8277 ins_pipe( pipe_slow ); 8278 %} 8279 8280 // LoadShuffle/Rearrange for Short 8281 8282 instruct loadShuffleS(vec dst, vec src, vec vtmp, rRegP scratch) %{ 8283 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8284 Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS 8285 match(Set dst (VectorLoadShuffle src)); 8286 effect(TEMP dst, TEMP vtmp, TEMP scratch); 8287 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 8288 ins_encode %{ 8289 // Create a byte shuffle mask from short shuffle mask 8290 // only byte shuffle instruction available on these platforms 8291 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8292 if (UseAVX == 0) { 8293 assert(vlen_in_bytes <= 16, "required"); 8294 // Multiply each shuffle by two to get byte index 8295 __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); 8296 __ psllw($vtmp$$XMMRegister, 1); 8297 8298 // Duplicate to create 2 copies of byte index 8299 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8300 __ psllw($dst$$XMMRegister, 8); 8301 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8302 8303 // Add one to get alternate byte index 8304 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), $scratch$$Register); 8305 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8306 } else { 8307 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8308 int vlen_enc = vector_length_encoding(this); 8309 // Multiply each shuffle by two to get byte index 8310 __ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8311 __ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8312 8313 // Duplicate to create 2 copies of byte index 8314 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8315 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8316 8317 // Add one to get alternate byte index 8318 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, $scratch$$Register); 8319 } 8320 %} 8321 ins_pipe( pipe_slow ); 8322 %} 8323 8324 instruct rearrangeS(vec dst, vec shuffle) %{ 8325 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8326 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8327 match(Set dst (VectorRearrange dst shuffle)); 8328 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8329 ins_encode %{ 8330 assert(UseSSE >= 4, "required"); 8331 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8332 %} 8333 ins_pipe( pipe_slow ); 8334 %} 8335 8336 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2, rRegP scratch) %{ 8337 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8338 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8339 match(Set dst (VectorRearrange src shuffle)); 8340 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 8341 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2, $scratch as TEMP" %} 8342 ins_encode %{ 8343 assert(UseAVX >= 2, "required"); 8344 // Swap src into vtmp1 8345 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8346 // Shuffle swapped src to get entries from other 128 bit lane 8347 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8348 // Shuffle original src to get entries from self 128 bit lane 8349 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8350 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8351 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, $scratch$$Register); 8352 // Perform the blend 8353 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8354 %} 8355 ins_pipe( pipe_slow ); 8356 %} 8357 8358 instruct loadShuffleS_evex(vec dst, vec src) %{ 8359 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8360 VM_Version::supports_avx512bw()); 8361 match(Set dst (VectorLoadShuffle src)); 8362 format %{ "vector_load_shuffle $dst, $src" %} 8363 ins_encode %{ 8364 int vlen_enc = vector_length_encoding(this); 8365 if (!VM_Version::supports_avx512vl()) { 8366 vlen_enc = Assembler::AVX_512bit; 8367 } 8368 __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8369 %} 8370 ins_pipe( pipe_slow ); 8371 %} 8372 8373 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8374 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8375 VM_Version::supports_avx512bw()); 8376 match(Set dst (VectorRearrange src shuffle)); 8377 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8378 ins_encode %{ 8379 int vlen_enc = vector_length_encoding(this); 8380 if (!VM_Version::supports_avx512vl()) { 8381 vlen_enc = Assembler::AVX_512bit; 8382 } 8383 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8384 %} 8385 ins_pipe( pipe_slow ); 8386 %} 8387 8388 // LoadShuffle/Rearrange for Integer and Float 8389 8390 instruct loadShuffleI(vec dst, vec src, vec vtmp, rRegP scratch) %{ 8391 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8392 Matcher::vector_length(n) == 4 && UseAVX < 2); 8393 match(Set dst (VectorLoadShuffle src)); 8394 effect(TEMP dst, TEMP vtmp, TEMP scratch); 8395 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 8396 ins_encode %{ 8397 assert(UseSSE >= 4, "required"); 8398 8399 // Create a byte shuffle mask from int shuffle mask 8400 // only byte shuffle instruction available on these platforms 8401 8402 // Duplicate and multiply each shuffle by 4 8403 __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); 8404 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8405 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8406 __ psllw($vtmp$$XMMRegister, 2); 8407 8408 // Duplicate again to create 4 copies of byte index 8409 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8410 __ psllw($dst$$XMMRegister, 8); 8411 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 8412 8413 // Add 3,2,1,0 to get alternate byte index 8414 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), $scratch$$Register); 8415 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8416 %} 8417 ins_pipe( pipe_slow ); 8418 %} 8419 8420 instruct rearrangeI(vec dst, vec shuffle) %{ 8421 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8422 Matcher::vector_length(n) == 4 && UseAVX < 2); 8423 match(Set dst (VectorRearrange dst shuffle)); 8424 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8425 ins_encode %{ 8426 assert(UseSSE >= 4, "required"); 8427 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8428 %} 8429 ins_pipe( pipe_slow ); 8430 %} 8431 8432 instruct loadShuffleI_avx(vec dst, vec src) %{ 8433 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8434 UseAVX >= 2); 8435 match(Set dst (VectorLoadShuffle src)); 8436 format %{ "vector_load_shuffle $dst, $src" %} 8437 ins_encode %{ 8438 int vlen_enc = vector_length_encoding(this); 8439 __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8440 %} 8441 ins_pipe( pipe_slow ); 8442 %} 8443 8444 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 8445 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8446 UseAVX >= 2); 8447 match(Set dst (VectorRearrange src shuffle)); 8448 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8449 ins_encode %{ 8450 int vlen_enc = vector_length_encoding(this); 8451 if (vlen_enc == Assembler::AVX_128bit) { 8452 vlen_enc = Assembler::AVX_256bit; 8453 } 8454 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8455 %} 8456 ins_pipe( pipe_slow ); 8457 %} 8458 8459 // LoadShuffle/Rearrange for Long and Double 8460 8461 instruct loadShuffleL(vec dst, vec src, vec vtmp, rRegP scratch) %{ 8462 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8463 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8464 match(Set dst (VectorLoadShuffle src)); 8465 effect(TEMP dst, TEMP vtmp, TEMP scratch); 8466 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 8467 ins_encode %{ 8468 assert(UseAVX >= 2, "required"); 8469 8470 int vlen_enc = vector_length_encoding(this); 8471 // Create a double word shuffle mask from long shuffle mask 8472 // only double word shuffle instruction available on these platforms 8473 8474 // Multiply each shuffle by two to get double word index 8475 __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8476 __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8477 8478 // Duplicate each double word shuffle 8479 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 8480 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8481 8482 // Add one to get alternate double word index 8483 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, $scratch$$Register); 8484 %} 8485 ins_pipe( pipe_slow ); 8486 %} 8487 8488 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 8489 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8490 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8491 match(Set dst (VectorRearrange src shuffle)); 8492 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8493 ins_encode %{ 8494 assert(UseAVX >= 2, "required"); 8495 8496 int vlen_enc = vector_length_encoding(this); 8497 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8498 %} 8499 ins_pipe( pipe_slow ); 8500 %} 8501 8502 instruct loadShuffleL_evex(vec dst, vec src) %{ 8503 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8504 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8505 match(Set dst (VectorLoadShuffle src)); 8506 format %{ "vector_load_shuffle $dst, $src" %} 8507 ins_encode %{ 8508 assert(UseAVX > 2, "required"); 8509 8510 int vlen_enc = vector_length_encoding(this); 8511 __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8512 %} 8513 ins_pipe( pipe_slow ); 8514 %} 8515 8516 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 8517 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8518 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8519 match(Set dst (VectorRearrange src shuffle)); 8520 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8521 ins_encode %{ 8522 assert(UseAVX > 2, "required"); 8523 8524 int vlen_enc = vector_length_encoding(this); 8525 if (vlen_enc == Assembler::AVX_128bit) { 8526 vlen_enc = Assembler::AVX_256bit; 8527 } 8528 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8529 %} 8530 ins_pipe( pipe_slow ); 8531 %} 8532 8533 // --------------------------------- FMA -------------------------------------- 8534 // a * b + c 8535 8536 instruct vfmaF_reg(vec a, vec b, vec c) %{ 8537 match(Set c (FmaVF c (Binary a b))); 8538 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8539 ins_cost(150); 8540 ins_encode %{ 8541 assert(UseFMA, "not enabled"); 8542 int vlen_enc = vector_length_encoding(this); 8543 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8544 %} 8545 ins_pipe( pipe_slow ); 8546 %} 8547 8548 instruct vfmaF_mem(vec a, memory b, vec c) %{ 8549 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8550 match(Set c (FmaVF c (Binary a (LoadVector b)))); 8551 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8552 ins_cost(150); 8553 ins_encode %{ 8554 assert(UseFMA, "not enabled"); 8555 int vlen_enc = vector_length_encoding(this); 8556 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8557 %} 8558 ins_pipe( pipe_slow ); 8559 %} 8560 8561 instruct vfmaD_reg(vec a, vec b, vec c) %{ 8562 match(Set c (FmaVD c (Binary a b))); 8563 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8564 ins_cost(150); 8565 ins_encode %{ 8566 assert(UseFMA, "not enabled"); 8567 int vlen_enc = vector_length_encoding(this); 8568 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8569 %} 8570 ins_pipe( pipe_slow ); 8571 %} 8572 8573 instruct vfmaD_mem(vec a, memory b, vec c) %{ 8574 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8575 match(Set c (FmaVD c (Binary a (LoadVector b)))); 8576 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8577 ins_cost(150); 8578 ins_encode %{ 8579 assert(UseFMA, "not enabled"); 8580 int vlen_enc = vector_length_encoding(this); 8581 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8582 %} 8583 ins_pipe( pipe_slow ); 8584 %} 8585 8586 // --------------------------------- Vector Multiply Add -------------------------------------- 8587 8588 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 8589 predicate(UseAVX == 0); 8590 match(Set dst (MulAddVS2VI dst src1)); 8591 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 8592 ins_encode %{ 8593 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 8594 %} 8595 ins_pipe( pipe_slow ); 8596 %} 8597 8598 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 8599 predicate(UseAVX > 0); 8600 match(Set dst (MulAddVS2VI src1 src2)); 8601 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 8602 ins_encode %{ 8603 int vlen_enc = vector_length_encoding(this); 8604 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8605 %} 8606 ins_pipe( pipe_slow ); 8607 %} 8608 8609 // --------------------------------- Vector Multiply Add Add ---------------------------------- 8610 8611 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 8612 predicate(VM_Version::supports_avx512_vnni()); 8613 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 8614 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 8615 ins_encode %{ 8616 assert(UseAVX > 2, "required"); 8617 int vlen_enc = vector_length_encoding(this); 8618 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8619 %} 8620 ins_pipe( pipe_slow ); 8621 ins_cost(10); 8622 %} 8623 8624 // --------------------------------- PopCount -------------------------------------- 8625 8626 instruct vpopcountI_popcntd(vec dst, vec src) %{ 8627 predicate(VM_Version::supports_avx512_vpopcntdq()); 8628 match(Set dst (PopCountVI src)); 8629 format %{ "vector_popcount_int $dst, $src\t! vector popcount packedI" %} 8630 ins_encode %{ 8631 assert(UsePopCountInstruction, "not enabled"); 8632 int vlen_enc = vector_length_encoding(this); 8633 __ vector_popcount_int($dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, noreg, vlen_enc); 8634 %} 8635 ins_pipe( pipe_slow ); 8636 %} 8637 8638 instruct vpopcountI(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp, rFlagsReg cc) %{ 8639 predicate(!VM_Version::supports_avx512_vpopcntdq()); 8640 match(Set dst (PopCountVI src)); 8641 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, KILL cc); 8642 format %{ "vector_popcount_int $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 8643 ins_encode %{ 8644 assert(UsePopCountInstruction, "not enabled"); 8645 int vlen_enc = vector_length_encoding(this); 8646 __ vector_popcount_int($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 8647 $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 8648 %} 8649 ins_pipe( pipe_slow ); 8650 %} 8651 8652 instruct vpopcountL_popcntd(vec dst, vec src) %{ 8653 predicate(VM_Version::supports_avx512_vpopcntdq()); 8654 match(Set dst (PopCountVL src)); 8655 format %{ "vector_popcount_long $dst, $src\t! vector popcount packedL" %} 8656 ins_encode %{ 8657 assert(UsePopCountInstruction, "not enabled"); 8658 int vlen_enc = vector_length_encoding(this, $src); 8659 __ vector_popcount_long($dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, noreg, vlen_enc); 8660 %} 8661 ins_pipe( pipe_slow ); 8662 %} 8663 8664 instruct vpopcountL(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp, rFlagsReg cc) %{ 8665 predicate(!VM_Version::supports_avx512_vpopcntdq()); 8666 match(Set dst (PopCountVL src)); 8667 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, KILL cc); 8668 format %{ "vector_popcount_long $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 8669 ins_encode %{ 8670 assert(UsePopCountInstruction, "not enabled"); 8671 int vlen_enc = vector_length_encoding(this, $src); 8672 __ vector_popcount_long($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 8673 $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 8674 %} 8675 ins_pipe( pipe_slow ); 8676 %} 8677 8678 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 8679 8680 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 8681 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 8682 effect(TEMP dst); 8683 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8684 ins_encode %{ 8685 int vector_len = vector_length_encoding(this); 8686 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 8687 %} 8688 ins_pipe( pipe_slow ); 8689 %} 8690 8691 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 8692 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 8693 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 8694 effect(TEMP dst); 8695 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8696 ins_encode %{ 8697 int vector_len = vector_length_encoding(this); 8698 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 8699 %} 8700 ins_pipe( pipe_slow ); 8701 %} 8702 8703 // --------------------------------- Rotation Operations ---------------------------------- 8704 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 8705 match(Set dst (RotateLeftV src shift)); 8706 match(Set dst (RotateRightV src shift)); 8707 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 8708 ins_encode %{ 8709 int opcode = this->ideal_Opcode(); 8710 int vector_len = vector_length_encoding(this); 8711 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 8712 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 8713 %} 8714 ins_pipe( pipe_slow ); 8715 %} 8716 8717 instruct vprorate(vec dst, vec src, vec shift) %{ 8718 match(Set dst (RotateLeftV src shift)); 8719 match(Set dst (RotateRightV src shift)); 8720 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 8721 ins_encode %{ 8722 int opcode = this->ideal_Opcode(); 8723 int vector_len = vector_length_encoding(this); 8724 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 8725 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8726 %} 8727 ins_pipe( pipe_slow ); 8728 %} 8729 8730 #ifdef _LP64 8731 // ---------------------------------- Masked Operations ------------------------------------ 8732 8733 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 8734 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 8735 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 8736 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 8737 ins_encode %{ 8738 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 8739 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 8740 8741 Label DONE; 8742 int vlen_enc = vector_length_encoding(this, $src1); 8743 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 8744 8745 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 8746 __ mov64($dst$$Register, -1L); 8747 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 8748 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 8749 __ jccb(Assembler::carrySet, DONE); 8750 __ kmovql($dst$$Register, $ktmp1$$KRegister); 8751 __ notq($dst$$Register); 8752 __ tzcntq($dst$$Register, $dst$$Register); 8753 __ bind(DONE); 8754 %} 8755 ins_pipe( pipe_slow ); 8756 %} 8757 8758 8759 instruct vmasked_load64(vec dst, memory mem, kReg mask) %{ 8760 match(Set dst (LoadVectorMasked mem mask)); 8761 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 8762 ins_encode %{ 8763 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 8764 int vector_len = vector_length_encoding(this); 8765 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, vector_len); 8766 %} 8767 ins_pipe( pipe_slow ); 8768 %} 8769 8770 instruct vmask_gen(kReg dst, rRegL len, rRegL temp) %{ 8771 match(Set dst (VectorMaskGen len)); 8772 effect(TEMP temp); 8773 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 8774 ins_encode %{ 8775 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 8776 %} 8777 ins_pipe( pipe_slow ); 8778 %} 8779 8780 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 8781 match(Set dst (VectorMaskGen len)); 8782 format %{ "vector_mask_gen $len \t! vector mask generator" %} 8783 effect(TEMP temp); 8784 ins_encode %{ 8785 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 8786 __ kmovql($dst$$KRegister, $temp$$Register); 8787 %} 8788 ins_pipe( pipe_slow ); 8789 %} 8790 8791 instruct vmasked_store64(memory mem, vec src, kReg mask) %{ 8792 match(Set mem (StoreVectorMasked mem (Binary src mask))); 8793 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 8794 ins_encode %{ 8795 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 8796 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 8797 int vector_len = vector_length_encoding(src_node); 8798 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, vector_len); 8799 %} 8800 ins_pipe( pipe_slow ); 8801 %} 8802 8803 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 8804 predicate(n->in(1)->bottom_type()->isa_vectmask()); 8805 match(Set dst (VectorMaskToLong mask)); 8806 effect(TEMP dst, KILL cr); 8807 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 8808 ins_encode %{ 8809 int opcode = this->ideal_Opcode(); 8810 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8811 int mask_len = Matcher::vector_length(this, $mask); 8812 int mask_size = mask_len * type2aelembytes(mbt); 8813 int vlen_enc = vector_length_encoding(this, $mask); 8814 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 8815 $dst$$Register, mask_len, mask_size, vlen_enc); 8816 %} 8817 ins_pipe( pipe_slow ); 8818 %} 8819 8820 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 8821 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL); 8822 match(Set dst (VectorMaskToLong mask)); 8823 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 8824 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 8825 ins_encode %{ 8826 int opcode = this->ideal_Opcode(); 8827 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8828 int mask_len = Matcher::vector_length(this, $mask); 8829 int vlen_enc = vector_length_encoding(this, $mask); 8830 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 8831 $dst$$Register, mask_len, mbt, vlen_enc); 8832 %} 8833 ins_pipe( pipe_slow ); 8834 %} 8835 8836 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 8837 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL); 8838 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 8839 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 8840 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 8841 ins_encode %{ 8842 int opcode = this->ideal_Opcode(); 8843 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8844 int mask_len = Matcher::vector_length(this, $mask); 8845 int vlen_enc = vector_length_encoding(this, $mask); 8846 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 8847 $dst$$Register, mask_len, mbt, vlen_enc); 8848 %} 8849 ins_pipe( pipe_slow ); 8850 %} 8851 8852 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 8853 predicate(n->in(1)->bottom_type()->isa_vectmask()); 8854 match(Set dst (VectorMaskTrueCount mask)); 8855 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 8856 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 8857 ins_encode %{ 8858 int opcode = this->ideal_Opcode(); 8859 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8860 int mask_len = Matcher::vector_length(this, $mask); 8861 int mask_size = mask_len * type2aelembytes(mbt); 8862 int vlen_enc = vector_length_encoding(this, $mask); 8863 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 8864 $tmp$$Register, mask_len, mask_size, vlen_enc); 8865 %} 8866 ins_pipe( pipe_slow ); 8867 %} 8868 8869 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 8870 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL); 8871 match(Set dst (VectorMaskTrueCount mask)); 8872 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 8873 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 8874 ins_encode %{ 8875 int opcode = this->ideal_Opcode(); 8876 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8877 int mask_len = Matcher::vector_length(this, $mask); 8878 int vlen_enc = vector_length_encoding(this, $mask); 8879 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 8880 $tmp$$Register, mask_len, mbt, vlen_enc); 8881 %} 8882 ins_pipe( pipe_slow ); 8883 %} 8884 8885 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 8886 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL); 8887 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 8888 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 8889 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 8890 ins_encode %{ 8891 int opcode = this->ideal_Opcode(); 8892 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8893 int mask_len = Matcher::vector_length(this, $mask); 8894 int vlen_enc = vector_length_encoding(this, $mask); 8895 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 8896 $tmp$$Register, mask_len, mbt, vlen_enc); 8897 %} 8898 ins_pipe( pipe_slow ); 8899 %} 8900 8901 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 8902 predicate(n->in(1)->bottom_type()->isa_vectmask()); 8903 match(Set dst (VectorMaskFirstTrue mask)); 8904 match(Set dst (VectorMaskLastTrue mask)); 8905 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 8906 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 8907 ins_encode %{ 8908 int opcode = this->ideal_Opcode(); 8909 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8910 int mask_len = Matcher::vector_length(this, $mask); 8911 int mask_size = mask_len * type2aelembytes(mbt); 8912 int vlen_enc = vector_length_encoding(this, $mask); 8913 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 8914 $tmp$$Register, mask_len, mask_size, vlen_enc); 8915 %} 8916 ins_pipe( pipe_slow ); 8917 %} 8918 8919 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 8920 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL); 8921 match(Set dst (VectorMaskFirstTrue mask)); 8922 match(Set dst (VectorMaskLastTrue mask)); 8923 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 8924 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 8925 ins_encode %{ 8926 int opcode = this->ideal_Opcode(); 8927 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8928 int mask_len = Matcher::vector_length(this, $mask); 8929 int vlen_enc = vector_length_encoding(this, $mask); 8930 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 8931 $tmp$$Register, mask_len, mbt, vlen_enc); 8932 %} 8933 ins_pipe( pipe_slow ); 8934 %} 8935 8936 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 8937 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL); 8938 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 8939 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 8940 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 8941 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 8942 ins_encode %{ 8943 int opcode = this->ideal_Opcode(); 8944 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8945 int mask_len = Matcher::vector_length(this, $mask); 8946 int vlen_enc = vector_length_encoding(this, $mask); 8947 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 8948 $tmp$$Register, mask_len, mbt, vlen_enc); 8949 %} 8950 ins_pipe( pipe_slow ); 8951 %} 8952 #endif // _LP64 8953 8954 // ---------------------------------- Vector Masked Operations ------------------------------------ 8955 8956 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 8957 match(Set dst (AddVB (Binary dst src2) mask)); 8958 match(Set dst (AddVS (Binary dst src2) mask)); 8959 match(Set dst (AddVI (Binary dst src2) mask)); 8960 match(Set dst (AddVL (Binary dst src2) mask)); 8961 match(Set dst (AddVF (Binary dst src2) mask)); 8962 match(Set dst (AddVD (Binary dst src2) mask)); 8963 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 8964 ins_encode %{ 8965 int vlen_enc = vector_length_encoding(this); 8966 BasicType bt = Matcher::vector_element_basic_type(this); 8967 int opc = this->ideal_Opcode(); 8968 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 8969 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8970 %} 8971 ins_pipe( pipe_slow ); 8972 %} 8973 8974 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 8975 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 8976 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 8977 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 8978 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 8979 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 8980 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 8981 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 8982 ins_encode %{ 8983 int vlen_enc = vector_length_encoding(this); 8984 BasicType bt = Matcher::vector_element_basic_type(this); 8985 int opc = this->ideal_Opcode(); 8986 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 8987 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 8988 %} 8989 ins_pipe( pipe_slow ); 8990 %} 8991 8992 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 8993 match(Set dst (XorV (Binary dst src2) mask)); 8994 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 8995 ins_encode %{ 8996 int vlen_enc = vector_length_encoding(this); 8997 BasicType bt = Matcher::vector_element_basic_type(this); 8998 int opc = this->ideal_Opcode(); 8999 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9000 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9001 %} 9002 ins_pipe( pipe_slow ); 9003 %} 9004 9005 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9006 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9007 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9008 ins_encode %{ 9009 int vlen_enc = vector_length_encoding(this); 9010 BasicType bt = Matcher::vector_element_basic_type(this); 9011 int opc = this->ideal_Opcode(); 9012 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9013 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9014 %} 9015 ins_pipe( pipe_slow ); 9016 %} 9017 9018 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9019 match(Set dst (OrV (Binary dst src2) mask)); 9020 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9021 ins_encode %{ 9022 int vlen_enc = vector_length_encoding(this); 9023 BasicType bt = Matcher::vector_element_basic_type(this); 9024 int opc = this->ideal_Opcode(); 9025 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9026 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9027 %} 9028 ins_pipe( pipe_slow ); 9029 %} 9030 9031 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9032 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9033 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9034 ins_encode %{ 9035 int vlen_enc = vector_length_encoding(this); 9036 BasicType bt = Matcher::vector_element_basic_type(this); 9037 int opc = this->ideal_Opcode(); 9038 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9039 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9040 %} 9041 ins_pipe( pipe_slow ); 9042 %} 9043 9044 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9045 match(Set dst (AndV (Binary dst src2) mask)); 9046 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9047 ins_encode %{ 9048 int vlen_enc = vector_length_encoding(this); 9049 BasicType bt = Matcher::vector_element_basic_type(this); 9050 int opc = this->ideal_Opcode(); 9051 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9052 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9053 %} 9054 ins_pipe( pipe_slow ); 9055 %} 9056 9057 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9058 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9059 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9060 ins_encode %{ 9061 int vlen_enc = vector_length_encoding(this); 9062 BasicType bt = Matcher::vector_element_basic_type(this); 9063 int opc = this->ideal_Opcode(); 9064 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9065 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9066 %} 9067 ins_pipe( pipe_slow ); 9068 %} 9069 9070 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9071 match(Set dst (SubVB (Binary dst src2) mask)); 9072 match(Set dst (SubVS (Binary dst src2) mask)); 9073 match(Set dst (SubVI (Binary dst src2) mask)); 9074 match(Set dst (SubVL (Binary dst src2) mask)); 9075 match(Set dst (SubVF (Binary dst src2) mask)); 9076 match(Set dst (SubVD (Binary dst src2) mask)); 9077 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9078 ins_encode %{ 9079 int vlen_enc = vector_length_encoding(this); 9080 BasicType bt = Matcher::vector_element_basic_type(this); 9081 int opc = this->ideal_Opcode(); 9082 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9083 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9084 %} 9085 ins_pipe( pipe_slow ); 9086 %} 9087 9088 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9089 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9090 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9091 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9092 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9093 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9094 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9095 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9096 ins_encode %{ 9097 int vlen_enc = vector_length_encoding(this); 9098 BasicType bt = Matcher::vector_element_basic_type(this); 9099 int opc = this->ideal_Opcode(); 9100 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9101 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9102 %} 9103 ins_pipe( pipe_slow ); 9104 %} 9105 9106 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9107 match(Set dst (MulVS (Binary dst src2) mask)); 9108 match(Set dst (MulVI (Binary dst src2) mask)); 9109 match(Set dst (MulVL (Binary dst src2) mask)); 9110 match(Set dst (MulVF (Binary dst src2) mask)); 9111 match(Set dst (MulVD (Binary dst src2) mask)); 9112 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9113 ins_encode %{ 9114 int vlen_enc = vector_length_encoding(this); 9115 BasicType bt = Matcher::vector_element_basic_type(this); 9116 int opc = this->ideal_Opcode(); 9117 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9118 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9119 %} 9120 ins_pipe( pipe_slow ); 9121 %} 9122 9123 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 9124 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 9125 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 9126 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 9127 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 9128 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 9129 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9130 ins_encode %{ 9131 int vlen_enc = vector_length_encoding(this); 9132 BasicType bt = Matcher::vector_element_basic_type(this); 9133 int opc = this->ideal_Opcode(); 9134 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9135 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9136 %} 9137 ins_pipe( pipe_slow ); 9138 %} 9139 9140 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 9141 match(Set dst (SqrtVF dst mask)); 9142 match(Set dst (SqrtVD dst mask)); 9143 ins_cost(100); 9144 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 9145 ins_encode %{ 9146 int vlen_enc = vector_length_encoding(this); 9147 BasicType bt = Matcher::vector_element_basic_type(this); 9148 int opc = this->ideal_Opcode(); 9149 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9150 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9151 %} 9152 ins_pipe( pipe_slow ); 9153 %} 9154 9155 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 9156 match(Set dst (DivVF (Binary dst src2) mask)); 9157 match(Set dst (DivVD (Binary dst src2) mask)); 9158 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9159 ins_encode %{ 9160 int vlen_enc = vector_length_encoding(this); 9161 BasicType bt = Matcher::vector_element_basic_type(this); 9162 int opc = this->ideal_Opcode(); 9163 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9164 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9165 %} 9166 ins_pipe( pipe_slow ); 9167 %} 9168 9169 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 9170 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 9171 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 9172 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9173 ins_encode %{ 9174 int vlen_enc = vector_length_encoding(this); 9175 BasicType bt = Matcher::vector_element_basic_type(this); 9176 int opc = this->ideal_Opcode(); 9177 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9178 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9179 %} 9180 ins_pipe( pipe_slow ); 9181 %} 9182 9183 9184 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9185 match(Set dst (RotateLeftV (Binary dst shift) mask)); 9186 match(Set dst (RotateRightV (Binary dst shift) mask)); 9187 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 9188 ins_encode %{ 9189 int vlen_enc = vector_length_encoding(this); 9190 BasicType bt = Matcher::vector_element_basic_type(this); 9191 int opc = this->ideal_Opcode(); 9192 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9193 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9194 %} 9195 ins_pipe( pipe_slow ); 9196 %} 9197 9198 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 9199 match(Set dst (RotateLeftV (Binary dst src2) mask)); 9200 match(Set dst (RotateRightV (Binary dst src2) mask)); 9201 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 9202 ins_encode %{ 9203 int vlen_enc = vector_length_encoding(this); 9204 BasicType bt = Matcher::vector_element_basic_type(this); 9205 int opc = this->ideal_Opcode(); 9206 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9207 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9208 %} 9209 ins_pipe( pipe_slow ); 9210 %} 9211 9212 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9213 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 9214 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 9215 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 9216 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 9217 ins_encode %{ 9218 int vlen_enc = vector_length_encoding(this); 9219 BasicType bt = Matcher::vector_element_basic_type(this); 9220 int opc = this->ideal_Opcode(); 9221 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9222 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9223 %} 9224 ins_pipe( pipe_slow ); 9225 %} 9226 9227 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9228 predicate(!n->as_ShiftV()->is_var_shift()); 9229 match(Set dst (LShiftVS (Binary dst src2) mask)); 9230 match(Set dst (LShiftVI (Binary dst src2) mask)); 9231 match(Set dst (LShiftVL (Binary dst src2) mask)); 9232 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9233 ins_encode %{ 9234 int vlen_enc = vector_length_encoding(this); 9235 BasicType bt = Matcher::vector_element_basic_type(this); 9236 int opc = this->ideal_Opcode(); 9237 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9238 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9239 %} 9240 ins_pipe( pipe_slow ); 9241 %} 9242 9243 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9244 predicate(n->as_ShiftV()->is_var_shift()); 9245 match(Set dst (LShiftVS (Binary dst src2) mask)); 9246 match(Set dst (LShiftVI (Binary dst src2) mask)); 9247 match(Set dst (LShiftVL (Binary dst src2) mask)); 9248 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9249 ins_encode %{ 9250 int vlen_enc = vector_length_encoding(this); 9251 BasicType bt = Matcher::vector_element_basic_type(this); 9252 int opc = this->ideal_Opcode(); 9253 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9254 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9255 %} 9256 ins_pipe( pipe_slow ); 9257 %} 9258 9259 instruct vlshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9260 match(Set dst (LShiftVS (Binary dst (LoadVector src2)) mask)); 9261 match(Set dst (LShiftVI (Binary dst (LoadVector src2)) mask)); 9262 match(Set dst (LShiftVL (Binary dst (LoadVector src2)) mask)); 9263 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9264 ins_encode %{ 9265 int vlen_enc = vector_length_encoding(this); 9266 BasicType bt = Matcher::vector_element_basic_type(this); 9267 int opc = this->ideal_Opcode(); 9268 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9269 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9270 %} 9271 ins_pipe( pipe_slow ); 9272 %} 9273 9274 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9275 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 9276 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 9277 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 9278 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 9279 ins_encode %{ 9280 int vlen_enc = vector_length_encoding(this); 9281 BasicType bt = Matcher::vector_element_basic_type(this); 9282 int opc = this->ideal_Opcode(); 9283 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9284 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9285 %} 9286 ins_pipe( pipe_slow ); 9287 %} 9288 9289 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9290 predicate(!n->as_ShiftV()->is_var_shift()); 9291 match(Set dst (RShiftVS (Binary dst src2) mask)); 9292 match(Set dst (RShiftVI (Binary dst src2) mask)); 9293 match(Set dst (RShiftVL (Binary dst src2) mask)); 9294 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9295 ins_encode %{ 9296 int vlen_enc = vector_length_encoding(this); 9297 BasicType bt = Matcher::vector_element_basic_type(this); 9298 int opc = this->ideal_Opcode(); 9299 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9300 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9301 %} 9302 ins_pipe( pipe_slow ); 9303 %} 9304 9305 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9306 predicate(n->as_ShiftV()->is_var_shift()); 9307 match(Set dst (RShiftVS (Binary dst src2) mask)); 9308 match(Set dst (RShiftVI (Binary dst src2) mask)); 9309 match(Set dst (RShiftVL (Binary dst src2) mask)); 9310 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9311 ins_encode %{ 9312 int vlen_enc = vector_length_encoding(this); 9313 BasicType bt = Matcher::vector_element_basic_type(this); 9314 int opc = this->ideal_Opcode(); 9315 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9316 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9317 %} 9318 ins_pipe( pipe_slow ); 9319 %} 9320 9321 instruct vrshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9322 match(Set dst (RShiftVS (Binary dst (LoadVector src2)) mask)); 9323 match(Set dst (RShiftVI (Binary dst (LoadVector src2)) mask)); 9324 match(Set dst (RShiftVL (Binary dst (LoadVector src2)) mask)); 9325 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9326 ins_encode %{ 9327 int vlen_enc = vector_length_encoding(this); 9328 BasicType bt = Matcher::vector_element_basic_type(this); 9329 int opc = this->ideal_Opcode(); 9330 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9331 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9332 %} 9333 ins_pipe( pipe_slow ); 9334 %} 9335 9336 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9337 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 9338 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 9339 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 9340 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 9341 ins_encode %{ 9342 int vlen_enc = vector_length_encoding(this); 9343 BasicType bt = Matcher::vector_element_basic_type(this); 9344 int opc = this->ideal_Opcode(); 9345 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9346 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9347 %} 9348 ins_pipe( pipe_slow ); 9349 %} 9350 9351 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9352 predicate(!n->as_ShiftV()->is_var_shift()); 9353 match(Set dst (URShiftVS (Binary dst src2) mask)); 9354 match(Set dst (URShiftVI (Binary dst src2) mask)); 9355 match(Set dst (URShiftVL (Binary dst src2) mask)); 9356 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9357 ins_encode %{ 9358 int vlen_enc = vector_length_encoding(this); 9359 BasicType bt = Matcher::vector_element_basic_type(this); 9360 int opc = this->ideal_Opcode(); 9361 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9362 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9363 %} 9364 ins_pipe( pipe_slow ); 9365 %} 9366 9367 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9368 predicate(n->as_ShiftV()->is_var_shift()); 9369 match(Set dst (URShiftVS (Binary dst src2) mask)); 9370 match(Set dst (URShiftVI (Binary dst src2) mask)); 9371 match(Set dst (URShiftVL (Binary dst src2) mask)); 9372 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9373 ins_encode %{ 9374 int vlen_enc = vector_length_encoding(this); 9375 BasicType bt = Matcher::vector_element_basic_type(this); 9376 int opc = this->ideal_Opcode(); 9377 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9378 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9379 %} 9380 ins_pipe( pipe_slow ); 9381 %} 9382 9383 instruct vurshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9384 match(Set dst (URShiftVS (Binary dst (LoadVector src2)) mask)); 9385 match(Set dst (URShiftVI (Binary dst (LoadVector src2)) mask)); 9386 match(Set dst (URShiftVL (Binary dst (LoadVector src2)) mask)); 9387 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9388 ins_encode %{ 9389 int vlen_enc = vector_length_encoding(this); 9390 BasicType bt = Matcher::vector_element_basic_type(this); 9391 int opc = this->ideal_Opcode(); 9392 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9393 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9394 %} 9395 ins_pipe( pipe_slow ); 9396 %} 9397 9398 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 9399 match(Set dst (MaxV (Binary dst src2) mask)); 9400 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 9401 ins_encode %{ 9402 int vlen_enc = vector_length_encoding(this); 9403 BasicType bt = Matcher::vector_element_basic_type(this); 9404 int opc = this->ideal_Opcode(); 9405 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9406 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9407 %} 9408 ins_pipe( pipe_slow ); 9409 %} 9410 9411 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 9412 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 9413 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 9414 ins_encode %{ 9415 int vlen_enc = vector_length_encoding(this); 9416 BasicType bt = Matcher::vector_element_basic_type(this); 9417 int opc = this->ideal_Opcode(); 9418 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9419 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9420 %} 9421 ins_pipe( pipe_slow ); 9422 %} 9423 9424 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 9425 match(Set dst (MinV (Binary dst src2) mask)); 9426 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 9427 ins_encode %{ 9428 int vlen_enc = vector_length_encoding(this); 9429 BasicType bt = Matcher::vector_element_basic_type(this); 9430 int opc = this->ideal_Opcode(); 9431 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9432 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9433 %} 9434 ins_pipe( pipe_slow ); 9435 %} 9436 9437 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 9438 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 9439 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 9440 ins_encode %{ 9441 int vlen_enc = vector_length_encoding(this); 9442 BasicType bt = Matcher::vector_element_basic_type(this); 9443 int opc = this->ideal_Opcode(); 9444 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9445 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9446 %} 9447 ins_pipe( pipe_slow ); 9448 %} 9449 9450 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 9451 match(Set dst (VectorRearrange (Binary dst src2) mask)); 9452 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 9453 ins_encode %{ 9454 int vlen_enc = vector_length_encoding(this); 9455 BasicType bt = Matcher::vector_element_basic_type(this); 9456 int opc = this->ideal_Opcode(); 9457 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9458 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 9459 %} 9460 ins_pipe( pipe_slow ); 9461 %} 9462 9463 instruct vabs_masked(vec dst, kReg mask) %{ 9464 match(Set dst (AbsVB dst mask)); 9465 match(Set dst (AbsVS dst mask)); 9466 match(Set dst (AbsVI dst mask)); 9467 match(Set dst (AbsVL dst mask)); 9468 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 9469 ins_cost(100); 9470 ins_encode %{ 9471 int vlen_enc = vector_length_encoding(this); 9472 BasicType bt = Matcher::vector_element_basic_type(this); 9473 int opc = this->ideal_Opcode(); 9474 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9475 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9476 %} 9477 ins_pipe( pipe_slow ); 9478 %} 9479 9480 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 9481 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 9482 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 9483 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 9484 ins_encode %{ 9485 int vlen_enc = vector_length_encoding(this); 9486 BasicType bt = Matcher::vector_element_basic_type(this); 9487 int opc = this->ideal_Opcode(); 9488 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9489 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 9490 %} 9491 ins_pipe( pipe_slow ); 9492 %} 9493 9494 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 9495 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 9496 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 9497 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 9498 ins_encode %{ 9499 int vlen_enc = vector_length_encoding(this); 9500 BasicType bt = Matcher::vector_element_basic_type(this); 9501 int opc = this->ideal_Opcode(); 9502 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9503 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 9504 %} 9505 ins_pipe( pipe_slow ); 9506 %} 9507 9508 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask, rRegP scratch) %{ 9509 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 9510 effect(TEMP scratch); 9511 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask\t! using $scratch as TEMP" %} 9512 ins_encode %{ 9513 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 9514 int vlen_enc = vector_length_encoding(this, $src1); 9515 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 9516 9517 // Comparison i 9518 switch (src1_elem_bt) { 9519 case T_BYTE: { 9520 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 9521 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9522 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9523 break; 9524 } 9525 case T_SHORT: { 9526 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 9527 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9528 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9529 break; 9530 } 9531 case T_INT: { 9532 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 9533 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9534 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9535 break; 9536 } 9537 case T_LONG: { 9538 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 9539 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9540 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9541 break; 9542 } 9543 case T_FLOAT: { 9544 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 9545 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 9546 break; 9547 } 9548 case T_DOUBLE: { 9549 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 9550 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 9551 break; 9552 } 9553 default: assert(false, "%s", type2name(src1_elem_bt)); break; 9554 } 9555 %} 9556 ins_pipe( pipe_slow ); 9557 %} 9558 9559 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 9560 predicate(Matcher::vector_length(n) <= 32); 9561 match(Set dst (MaskAll src)); 9562 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 9563 ins_encode %{ 9564 int mask_len = Matcher::vector_length(this); 9565 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 9566 %} 9567 ins_pipe( pipe_slow ); 9568 %} 9569 9570 #ifdef _LP64 9571 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 9572 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 9573 match(Set dst (XorVMask src (MaskAll cnt))); 9574 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 9575 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 9576 ins_encode %{ 9577 uint masklen = Matcher::vector_length(this); 9578 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 9579 %} 9580 ins_pipe( pipe_slow ); 9581 %} 9582 9583 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 9584 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 9585 (Matcher::vector_length(n) == 16) || 9586 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 9587 match(Set dst (XorVMask src (MaskAll cnt))); 9588 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 9589 ins_encode %{ 9590 uint masklen = Matcher::vector_length(this); 9591 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 9592 %} 9593 ins_pipe( pipe_slow ); 9594 %} 9595 9596 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 9597 predicate(n->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length(n) <= 8); 9598 match(Set dst (VectorLongToMask src)); 9599 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 9600 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 9601 ins_encode %{ 9602 int mask_len = Matcher::vector_length(this); 9603 int vec_enc = vector_length_encoding(mask_len); 9604 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 9605 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 9606 %} 9607 ins_pipe( pipe_slow ); 9608 %} 9609 9610 9611 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 9612 predicate(n->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length(n) > 8); 9613 match(Set dst (VectorLongToMask src)); 9614 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 9615 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 9616 ins_encode %{ 9617 int mask_len = Matcher::vector_length(this); 9618 assert(mask_len <= 32, "invalid mask length"); 9619 int vec_enc = vector_length_encoding(mask_len); 9620 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 9621 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 9622 %} 9623 ins_pipe( pipe_slow ); 9624 %} 9625 9626 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 9627 predicate(n->bottom_type()->isa_vectmask()); 9628 match(Set dst (VectorLongToMask src)); 9629 format %{ "long_to_mask_evex $dst, $src\t!" %} 9630 ins_encode %{ 9631 __ kmov($dst$$KRegister, $src$$Register); 9632 %} 9633 ins_pipe( pipe_slow ); 9634 %} 9635 #endif 9636 9637 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 9638 match(Set dst (AndVMask src1 src2)); 9639 match(Set dst (OrVMask src1 src2)); 9640 match(Set dst (XorVMask src1 src2)); 9641 effect(TEMP kscratch); 9642 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 9643 ins_encode %{ 9644 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 9645 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 9646 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 9647 uint masklen = Matcher::vector_length(this); 9648 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 9649 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 9650 %} 9651 ins_pipe( pipe_slow ); 9652 %} 9653 9654 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 9655 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 9656 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 9657 ins_encode %{ 9658 int vlen_enc = vector_length_encoding(this); 9659 BasicType bt = Matcher::vector_element_basic_type(this); 9660 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 9661 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 9662 %} 9663 ins_pipe( pipe_slow ); 9664 %} 9665 9666 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 9667 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 9668 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 9669 ins_encode %{ 9670 int vlen_enc = vector_length_encoding(this); 9671 BasicType bt = Matcher::vector_element_basic_type(this); 9672 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 9673 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 9674 %} 9675 ins_pipe( pipe_slow ); 9676 %} 9677 9678 instruct castMM(kReg dst) 9679 %{ 9680 match(Set dst (CastVV dst)); 9681 9682 size(0); 9683 format %{ "# castVV of $dst" %} 9684 ins_encode(/* empty encoding */); 9685 ins_cost(0); 9686 ins_pipe(empty); 9687 %} 9688 9689 instruct castVV(vec dst) 9690 %{ 9691 match(Set dst (CastVV dst)); 9692 9693 size(0); 9694 format %{ "# castVV of $dst" %} 9695 ins_encode(/* empty encoding */); 9696 ins_cost(0); 9697 ins_pipe(empty); 9698 %} 9699 9700 instruct castVVLeg(legVec dst) 9701 %{ 9702 match(Set dst (CastVV dst)); 9703 9704 size(0); 9705 format %{ "# castVV of $dst" %} 9706 ins_encode(/* empty encoding */); 9707 ins_cost(0); 9708 ins_pipe(empty); 9709 %}