1 // 2 // Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 // AVX3 Mask Registers. 632 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 633 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 634 635 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 636 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 637 638 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 639 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 640 641 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 642 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 643 644 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 645 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 646 647 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 648 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 649 650 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 651 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 652 653 654 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 655 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 656 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 657 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 658 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 659 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 660 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 661 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 662 #ifdef _LP64 663 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 664 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 665 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 666 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 667 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 668 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 669 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 670 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 671 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 672 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 673 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 674 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 675 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 676 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 677 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 678 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 679 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 680 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 681 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 682 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 683 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 684 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 685 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 686 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 687 #endif 688 ); 689 690 alloc_class chunk2(K7, K7_H, 691 K6, K6_H, 692 K5, K5_H, 693 K4, K4_H, 694 K3, K3_H, 695 K2, K2_H, 696 K1, K1_H); 697 698 reg_class vectmask_reg(K1, K1_H, 699 K2, K2_H, 700 K3, K3_H, 701 K4, K4_H, 702 K5, K5_H, 703 K6, K6_H, 704 K7, K7_H); 705 706 reg_class vectmask_reg_K1(K1, K1_H); 707 reg_class vectmask_reg_K2(K2, K2_H); 708 reg_class vectmask_reg_K3(K3, K3_H); 709 reg_class vectmask_reg_K4(K4, K4_H); 710 reg_class vectmask_reg_K5(K5, K5_H); 711 reg_class vectmask_reg_K6(K6, K6_H); 712 reg_class vectmask_reg_K7(K7, K7_H); 713 714 // flags allocation class should be last. 715 alloc_class chunk3(RFLAGS); 716 717 718 // Singleton class for condition codes 719 reg_class int_flags(RFLAGS); 720 721 // Class for pre evex float registers 722 reg_class float_reg_legacy(XMM0, 723 XMM1, 724 XMM2, 725 XMM3, 726 XMM4, 727 XMM5, 728 XMM6, 729 XMM7 730 #ifdef _LP64 731 ,XMM8, 732 XMM9, 733 XMM10, 734 XMM11, 735 XMM12, 736 XMM13, 737 XMM14, 738 XMM15 739 #endif 740 ); 741 742 // Class for evex float registers 743 reg_class float_reg_evex(XMM0, 744 XMM1, 745 XMM2, 746 XMM3, 747 XMM4, 748 XMM5, 749 XMM6, 750 XMM7 751 #ifdef _LP64 752 ,XMM8, 753 XMM9, 754 XMM10, 755 XMM11, 756 XMM12, 757 XMM13, 758 XMM14, 759 XMM15, 760 XMM16, 761 XMM17, 762 XMM18, 763 XMM19, 764 XMM20, 765 XMM21, 766 XMM22, 767 XMM23, 768 XMM24, 769 XMM25, 770 XMM26, 771 XMM27, 772 XMM28, 773 XMM29, 774 XMM30, 775 XMM31 776 #endif 777 ); 778 779 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 780 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 781 782 // Class for pre evex double registers 783 reg_class double_reg_legacy(XMM0, XMM0b, 784 XMM1, XMM1b, 785 XMM2, XMM2b, 786 XMM3, XMM3b, 787 XMM4, XMM4b, 788 XMM5, XMM5b, 789 XMM6, XMM6b, 790 XMM7, XMM7b 791 #ifdef _LP64 792 ,XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b 800 #endif 801 ); 802 803 // Class for evex double registers 804 reg_class double_reg_evex(XMM0, XMM0b, 805 XMM1, XMM1b, 806 XMM2, XMM2b, 807 XMM3, XMM3b, 808 XMM4, XMM4b, 809 XMM5, XMM5b, 810 XMM6, XMM6b, 811 XMM7, XMM7b 812 #ifdef _LP64 813 ,XMM8, XMM8b, 814 XMM9, XMM9b, 815 XMM10, XMM10b, 816 XMM11, XMM11b, 817 XMM12, XMM12b, 818 XMM13, XMM13b, 819 XMM14, XMM14b, 820 XMM15, XMM15b, 821 XMM16, XMM16b, 822 XMM17, XMM17b, 823 XMM18, XMM18b, 824 XMM19, XMM19b, 825 XMM20, XMM20b, 826 XMM21, XMM21b, 827 XMM22, XMM22b, 828 XMM23, XMM23b, 829 XMM24, XMM24b, 830 XMM25, XMM25b, 831 XMM26, XMM26b, 832 XMM27, XMM27b, 833 XMM28, XMM28b, 834 XMM29, XMM29b, 835 XMM30, XMM30b, 836 XMM31, XMM31b 837 #endif 838 ); 839 840 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 841 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 842 843 // Class for pre evex 32bit vector registers 844 reg_class vectors_reg_legacy(XMM0, 845 XMM1, 846 XMM2, 847 XMM3, 848 XMM4, 849 XMM5, 850 XMM6, 851 XMM7 852 #ifdef _LP64 853 ,XMM8, 854 XMM9, 855 XMM10, 856 XMM11, 857 XMM12, 858 XMM13, 859 XMM14, 860 XMM15 861 #endif 862 ); 863 864 // Class for evex 32bit vector registers 865 reg_class vectors_reg_evex(XMM0, 866 XMM1, 867 XMM2, 868 XMM3, 869 XMM4, 870 XMM5, 871 XMM6, 872 XMM7 873 #ifdef _LP64 874 ,XMM8, 875 XMM9, 876 XMM10, 877 XMM11, 878 XMM12, 879 XMM13, 880 XMM14, 881 XMM15, 882 XMM16, 883 XMM17, 884 XMM18, 885 XMM19, 886 XMM20, 887 XMM21, 888 XMM22, 889 XMM23, 890 XMM24, 891 XMM25, 892 XMM26, 893 XMM27, 894 XMM28, 895 XMM29, 896 XMM30, 897 XMM31 898 #endif 899 ); 900 901 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 902 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 903 904 // Class for all 64bit vector registers 905 reg_class vectord_reg_legacy(XMM0, XMM0b, 906 XMM1, XMM1b, 907 XMM2, XMM2b, 908 XMM3, XMM3b, 909 XMM4, XMM4b, 910 XMM5, XMM5b, 911 XMM6, XMM6b, 912 XMM7, XMM7b 913 #ifdef _LP64 914 ,XMM8, XMM8b, 915 XMM9, XMM9b, 916 XMM10, XMM10b, 917 XMM11, XMM11b, 918 XMM12, XMM12b, 919 XMM13, XMM13b, 920 XMM14, XMM14b, 921 XMM15, XMM15b 922 #endif 923 ); 924 925 // Class for all 64bit vector registers 926 reg_class vectord_reg_evex(XMM0, XMM0b, 927 XMM1, XMM1b, 928 XMM2, XMM2b, 929 XMM3, XMM3b, 930 XMM4, XMM4b, 931 XMM5, XMM5b, 932 XMM6, XMM6b, 933 XMM7, XMM7b 934 #ifdef _LP64 935 ,XMM8, XMM8b, 936 XMM9, XMM9b, 937 XMM10, XMM10b, 938 XMM11, XMM11b, 939 XMM12, XMM12b, 940 XMM13, XMM13b, 941 XMM14, XMM14b, 942 XMM15, XMM15b, 943 XMM16, XMM16b, 944 XMM17, XMM17b, 945 XMM18, XMM18b, 946 XMM19, XMM19b, 947 XMM20, XMM20b, 948 XMM21, XMM21b, 949 XMM22, XMM22b, 950 XMM23, XMM23b, 951 XMM24, XMM24b, 952 XMM25, XMM25b, 953 XMM26, XMM26b, 954 XMM27, XMM27b, 955 XMM28, XMM28b, 956 XMM29, XMM29b, 957 XMM30, XMM30b, 958 XMM31, XMM31b 959 #endif 960 ); 961 962 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 963 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 964 965 // Class for all 128bit vector registers 966 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 967 XMM1, XMM1b, XMM1c, XMM1d, 968 XMM2, XMM2b, XMM2c, XMM2d, 969 XMM3, XMM3b, XMM3c, XMM3d, 970 XMM4, XMM4b, XMM4c, XMM4d, 971 XMM5, XMM5b, XMM5c, XMM5d, 972 XMM6, XMM6b, XMM6c, XMM6d, 973 XMM7, XMM7b, XMM7c, XMM7d 974 #ifdef _LP64 975 ,XMM8, XMM8b, XMM8c, XMM8d, 976 XMM9, XMM9b, XMM9c, XMM9d, 977 XMM10, XMM10b, XMM10c, XMM10d, 978 XMM11, XMM11b, XMM11c, XMM11d, 979 XMM12, XMM12b, XMM12c, XMM12d, 980 XMM13, XMM13b, XMM13c, XMM13d, 981 XMM14, XMM14b, XMM14c, XMM14d, 982 XMM15, XMM15b, XMM15c, XMM15d 983 #endif 984 ); 985 986 // Class for all 128bit vector registers 987 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 988 XMM1, XMM1b, XMM1c, XMM1d, 989 XMM2, XMM2b, XMM2c, XMM2d, 990 XMM3, XMM3b, XMM3c, XMM3d, 991 XMM4, XMM4b, XMM4c, XMM4d, 992 XMM5, XMM5b, XMM5c, XMM5d, 993 XMM6, XMM6b, XMM6c, XMM6d, 994 XMM7, XMM7b, XMM7c, XMM7d 995 #ifdef _LP64 996 ,XMM8, XMM8b, XMM8c, XMM8d, 997 XMM9, XMM9b, XMM9c, XMM9d, 998 XMM10, XMM10b, XMM10c, XMM10d, 999 XMM11, XMM11b, XMM11c, XMM11d, 1000 XMM12, XMM12b, XMM12c, XMM12d, 1001 XMM13, XMM13b, XMM13c, XMM13d, 1002 XMM14, XMM14b, XMM14c, XMM14d, 1003 XMM15, XMM15b, XMM15c, XMM15d, 1004 XMM16, XMM16b, XMM16c, XMM16d, 1005 XMM17, XMM17b, XMM17c, XMM17d, 1006 XMM18, XMM18b, XMM18c, XMM18d, 1007 XMM19, XMM19b, XMM19c, XMM19d, 1008 XMM20, XMM20b, XMM20c, XMM20d, 1009 XMM21, XMM21b, XMM21c, XMM21d, 1010 XMM22, XMM22b, XMM22c, XMM22d, 1011 XMM23, XMM23b, XMM23c, XMM23d, 1012 XMM24, XMM24b, XMM24c, XMM24d, 1013 XMM25, XMM25b, XMM25c, XMM25d, 1014 XMM26, XMM26b, XMM26c, XMM26d, 1015 XMM27, XMM27b, XMM27c, XMM27d, 1016 XMM28, XMM28b, XMM28c, XMM28d, 1017 XMM29, XMM29b, XMM29c, XMM29d, 1018 XMM30, XMM30b, XMM30c, XMM30d, 1019 XMM31, XMM31b, XMM31c, XMM31d 1020 #endif 1021 ); 1022 1023 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1024 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1025 1026 // Class for all 256bit vector registers 1027 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1028 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1029 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1030 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1031 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1032 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1033 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1034 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1035 #ifdef _LP64 1036 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1037 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1038 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1039 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1040 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1041 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1042 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1043 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1044 #endif 1045 ); 1046 1047 // Class for all 256bit vector registers 1048 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1049 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1050 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1051 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1052 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1053 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1054 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1055 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1056 #ifdef _LP64 1057 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1058 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1059 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1060 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1061 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1062 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1063 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1064 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1065 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1066 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1067 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1068 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1069 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1070 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1071 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1072 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1073 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1074 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1075 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1076 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1077 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1078 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1079 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1080 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1081 #endif 1082 ); 1083 1084 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1085 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1086 1087 // Class for all 512bit vector registers 1088 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1089 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1090 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1091 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1092 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1093 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1094 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1095 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1096 #ifdef _LP64 1097 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1098 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1099 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1100 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1101 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1102 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1103 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1104 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1105 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1106 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1107 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1108 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1109 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1110 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1111 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1112 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1113 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1114 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1115 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1116 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1117 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1118 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1119 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1120 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1121 #endif 1122 ); 1123 1124 // Class for restricted 512bit vector registers 1125 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1126 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1127 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1128 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1129 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1130 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1131 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1132 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1133 #ifdef _LP64 1134 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1135 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1136 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1137 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1138 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1139 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1140 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1141 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1142 #endif 1143 ); 1144 1145 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1146 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1147 1148 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1149 %} 1150 1151 1152 //----------SOURCE BLOCK------------------------------------------------------- 1153 // This is a block of C++ code which provides values, functions, and 1154 // definitions necessary in the rest of the architecture description 1155 1156 source_hpp %{ 1157 // Header information of the source block. 1158 // Method declarations/definitions which are used outside 1159 // the ad-scope can conveniently be defined here. 1160 // 1161 // To keep related declarations/definitions/uses close together, 1162 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1163 1164 #include "runtime/vm_version.hpp" 1165 1166 class NativeJump; 1167 1168 class CallStubImpl { 1169 1170 //-------------------------------------------------------------- 1171 //---< Used for optimization in Compile::shorten_branches >--- 1172 //-------------------------------------------------------------- 1173 1174 public: 1175 // Size of call trampoline stub. 1176 static uint size_call_trampoline() { 1177 return 0; // no call trampolines on this platform 1178 } 1179 1180 // number of relocations needed by a call trampoline stub 1181 static uint reloc_call_trampoline() { 1182 return 0; // no call trampolines on this platform 1183 } 1184 }; 1185 1186 class HandlerImpl { 1187 1188 public: 1189 1190 static int emit_exception_handler(CodeBuffer &cbuf); 1191 static int emit_deopt_handler(CodeBuffer& cbuf); 1192 1193 static uint size_exception_handler() { 1194 // NativeCall instruction size is the same as NativeJump. 1195 // exception handler starts out as jump and can be patched to 1196 // a call be deoptimization. (4932387) 1197 // Note that this value is also credited (in output.cpp) to 1198 // the size of the code section. 1199 return NativeJump::instruction_size; 1200 } 1201 1202 #ifdef _LP64 1203 static uint size_deopt_handler() { 1204 // three 5 byte instructions plus one move for unreachable address. 1205 return 15+3; 1206 } 1207 #else 1208 static uint size_deopt_handler() { 1209 // NativeCall instruction size is the same as NativeJump. 1210 // exception handler starts out as jump and can be patched to 1211 // a call be deoptimization. (4932387) 1212 // Note that this value is also credited (in output.cpp) to 1213 // the size of the code section. 1214 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1215 } 1216 #endif 1217 }; 1218 1219 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1220 switch(bytes) { 1221 case 4: // fall-through 1222 case 8: // fall-through 1223 case 16: return Assembler::AVX_128bit; 1224 case 32: return Assembler::AVX_256bit; 1225 case 64: return Assembler::AVX_512bit; 1226 1227 default: { 1228 ShouldNotReachHere(); 1229 return Assembler::AVX_NoVec; 1230 } 1231 } 1232 } 1233 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1236 } 1237 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1239 uint def_idx = use->operand_index(opnd); 1240 Node* def = use->in(def_idx); 1241 return vector_length_encoding(def); 1242 } 1243 1244 static inline bool is_vector_popcount_predicate(BasicType bt) { 1245 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1246 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1247 } 1248 1249 static inline bool is_unsigned_booltest_pred(int bt) { 1250 return ((bt & BoolTest::unsigned_compare) == BoolTest::unsigned_compare); 1251 } 1252 1253 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) { 1254 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() && 1255 (VM_Version::supports_avx512vl() || vlen_bytes == 64); 1256 } 1257 1258 class Node::PD { 1259 public: 1260 enum NodeFlags { 1261 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1262 _last_flag = Flag_intel_jcc_erratum 1263 }; 1264 }; 1265 1266 %} // end source_hpp 1267 1268 source %{ 1269 1270 #include "opto/addnode.hpp" 1271 #include "c2_intelJccErratum_x86.hpp" 1272 1273 void PhaseOutput::pd_perform_mach_node_analysis() { 1274 if (VM_Version::has_intel_jcc_erratum()) { 1275 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1276 _buf_sizes._code += extra_padding; 1277 } 1278 } 1279 1280 int MachNode::pd_alignment_required() const { 1281 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1282 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1283 return IntelJccErratum::largest_jcc_size() + 1; 1284 } else { 1285 return 1; 1286 } 1287 } 1288 1289 int MachNode::compute_padding(int current_offset) const { 1290 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1291 Compile* C = Compile::current(); 1292 PhaseOutput* output = C->output(); 1293 Block* block = output->block(); 1294 int index = output->index(); 1295 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1296 } else { 1297 return 0; 1298 } 1299 } 1300 1301 // Emit exception handler code. 1302 // Stuff framesize into a register and call a VM stub routine. 1303 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1304 1305 // Note that the code buffer's insts_mark is always relative to insts. 1306 // That's why we must use the macroassembler to generate a handler. 1307 C2_MacroAssembler _masm(&cbuf); 1308 address base = __ start_a_stub(size_exception_handler()); 1309 if (base == NULL) { 1310 ciEnv::current()->record_failure("CodeCache is full"); 1311 return 0; // CodeBuffer::expand failed 1312 } 1313 int offset = __ offset(); 1314 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1315 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1316 __ end_a_stub(); 1317 return offset; 1318 } 1319 1320 // Emit deopt handler code. 1321 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1322 1323 // Note that the code buffer's insts_mark is always relative to insts. 1324 // That's why we must use the macroassembler to generate a handler. 1325 C2_MacroAssembler _masm(&cbuf); 1326 address base = __ start_a_stub(size_deopt_handler()); 1327 if (base == NULL) { 1328 ciEnv::current()->record_failure("CodeCache is full"); 1329 return 0; // CodeBuffer::expand failed 1330 } 1331 int offset = __ offset(); 1332 1333 #ifdef _LP64 1334 address the_pc = (address) __ pc(); 1335 Label next; 1336 // push a "the_pc" on the stack without destroying any registers 1337 // as they all may be live. 1338 1339 // push address of "next" 1340 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1341 __ bind(next); 1342 // adjust it so it matches "the_pc" 1343 __ subptr(Address(rsp, 0), __ offset() - offset); 1344 #else 1345 InternalAddress here(__ pc()); 1346 __ pushptr(here.addr(), noreg); 1347 #endif 1348 1349 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1350 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1351 __ end_a_stub(); 1352 return offset; 1353 } 1354 1355 Assembler::Width widthForType(BasicType bt) { 1356 if (bt == T_BYTE) { 1357 return Assembler::B; 1358 } else if (bt == T_SHORT) { 1359 return Assembler::W; 1360 } else if (bt == T_INT) { 1361 return Assembler::D; 1362 } else { 1363 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1364 return Assembler::Q; 1365 } 1366 } 1367 1368 //============================================================================= 1369 1370 // Float masks come from different places depending on platform. 1371 #ifdef _LP64 1372 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1373 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1374 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1375 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1376 #else 1377 static address float_signmask() { return (address)float_signmask_pool; } 1378 static address float_signflip() { return (address)float_signflip_pool; } 1379 static address double_signmask() { return (address)double_signmask_pool; } 1380 static address double_signflip() { return (address)double_signflip_pool; } 1381 #endif 1382 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1383 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1384 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1385 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1386 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1387 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1388 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1389 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1390 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1391 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1392 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1393 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1394 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1395 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1396 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1397 1398 //============================================================================= 1399 const bool Matcher::match_rule_supported(int opcode) { 1400 if (!has_match_rule(opcode)) { 1401 return false; // no match rule present 1402 } 1403 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1404 switch (opcode) { 1405 case Op_AbsVL: 1406 case Op_StoreVectorScatter: 1407 if (UseAVX < 3) { 1408 return false; 1409 } 1410 break; 1411 case Op_PopCountI: 1412 case Op_PopCountL: 1413 if (!UsePopCountInstruction) { 1414 return false; 1415 } 1416 break; 1417 case Op_PopCountVI: 1418 if (UseAVX < 2) { 1419 return false; 1420 } 1421 break; 1422 case Op_PopCountVL: 1423 if (UseAVX < 2) { 1424 return false; 1425 } 1426 break; 1427 case Op_MulVI: 1428 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1429 return false; 1430 } 1431 break; 1432 case Op_MulVL: 1433 if (UseSSE < 4) { // only with SSE4_1 or AVX 1434 return false; 1435 } 1436 break; 1437 case Op_MulReductionVL: 1438 if (VM_Version::supports_avx512dq() == false) { 1439 return false; 1440 } 1441 break; 1442 case Op_AddReductionVL: 1443 if (UseSSE < 2) { // requires at least SSE2 1444 return false; 1445 } 1446 break; 1447 case Op_AbsVB: 1448 case Op_AbsVS: 1449 case Op_AbsVI: 1450 case Op_AddReductionVI: 1451 case Op_AndReductionV: 1452 case Op_OrReductionV: 1453 case Op_XorReductionV: 1454 if (UseSSE < 3) { // requires at least SSSE3 1455 return false; 1456 } 1457 break; 1458 case Op_VectorLoadShuffle: 1459 case Op_VectorRearrange: 1460 case Op_MulReductionVI: 1461 if (UseSSE < 4) { // requires at least SSE4 1462 return false; 1463 } 1464 break; 1465 case Op_IsInfiniteF: 1466 case Op_IsInfiniteD: 1467 if (!VM_Version::supports_avx512dq()) { 1468 return false; 1469 } 1470 break; 1471 case Op_SqrtVD: 1472 case Op_SqrtVF: 1473 case Op_VectorMaskCmp: 1474 case Op_VectorCastB2X: 1475 case Op_VectorCastS2X: 1476 case Op_VectorCastI2X: 1477 case Op_VectorCastL2X: 1478 case Op_VectorCastF2X: 1479 case Op_VectorCastD2X: 1480 case Op_VectorUCastB2X: 1481 case Op_VectorUCastS2X: 1482 case Op_VectorUCastI2X: 1483 case Op_VectorMaskCast: 1484 if (UseAVX < 1) { // enabled for AVX only 1485 return false; 1486 } 1487 break; 1488 case Op_PopulateIndex: 1489 if (!is_LP64 || (UseAVX < 2)) { 1490 return false; 1491 } 1492 break; 1493 case Op_RoundVF: 1494 if (UseAVX < 2) { // enabled for AVX2 only 1495 return false; 1496 } 1497 break; 1498 case Op_RoundVD: 1499 if (UseAVX < 3) { 1500 return false; // enabled for AVX3 only 1501 } 1502 break; 1503 case Op_CompareAndSwapL: 1504 #ifdef _LP64 1505 case Op_CompareAndSwapP: 1506 #endif 1507 if (!VM_Version::supports_cx8()) { 1508 return false; 1509 } 1510 break; 1511 case Op_CMoveVF: 1512 case Op_CMoveVD: 1513 if (UseAVX < 1) { // enabled for AVX only 1514 return false; 1515 } 1516 break; 1517 case Op_StrIndexOf: 1518 if (!UseSSE42Intrinsics) { 1519 return false; 1520 } 1521 break; 1522 case Op_StrIndexOfChar: 1523 if (!UseSSE42Intrinsics) { 1524 return false; 1525 } 1526 break; 1527 case Op_OnSpinWait: 1528 if (VM_Version::supports_on_spin_wait() == false) { 1529 return false; 1530 } 1531 break; 1532 case Op_MulVB: 1533 case Op_LShiftVB: 1534 case Op_RShiftVB: 1535 case Op_URShiftVB: 1536 case Op_VectorInsert: 1537 case Op_VectorLoadMask: 1538 case Op_VectorStoreMask: 1539 case Op_VectorBlend: 1540 if (UseSSE < 4) { 1541 return false; 1542 } 1543 break; 1544 #ifdef _LP64 1545 case Op_MaxD: 1546 case Op_MaxF: 1547 case Op_MinD: 1548 case Op_MinF: 1549 if (UseAVX < 1) { // enabled for AVX only 1550 return false; 1551 } 1552 break; 1553 #endif 1554 case Op_CacheWB: 1555 case Op_CacheWBPreSync: 1556 case Op_CacheWBPostSync: 1557 if (!VM_Version::supports_data_cache_line_flush()) { 1558 return false; 1559 } 1560 break; 1561 case Op_ExtractB: 1562 case Op_ExtractL: 1563 case Op_ExtractI: 1564 case Op_RoundDoubleMode: 1565 if (UseSSE < 4) { 1566 return false; 1567 } 1568 break; 1569 case Op_RoundDoubleModeV: 1570 if (VM_Version::supports_avx() == false) { 1571 return false; // 128bit vroundpd is not available 1572 } 1573 break; 1574 case Op_LoadVectorGather: 1575 if (UseAVX < 2) { 1576 return false; 1577 } 1578 break; 1579 case Op_FmaVD: 1580 case Op_FmaVF: 1581 if (!UseFMA) { 1582 return false; 1583 } 1584 break; 1585 case Op_MacroLogicV: 1586 if (UseAVX < 3 || !UseVectorMacroLogic) { 1587 return false; 1588 } 1589 break; 1590 1591 case Op_VectorCmpMasked: 1592 case Op_VectorMaskGen: 1593 if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { 1594 return false; 1595 } 1596 break; 1597 case Op_VectorMaskFirstTrue: 1598 case Op_VectorMaskLastTrue: 1599 case Op_VectorMaskTrueCount: 1600 case Op_VectorMaskToLong: 1601 if (!is_LP64 || UseAVX < 1) { 1602 return false; 1603 } 1604 break; 1605 case Op_RoundF: 1606 case Op_RoundD: 1607 if (!is_LP64) { 1608 return false; 1609 } 1610 break; 1611 case Op_CopySignD: 1612 case Op_CopySignF: 1613 if (UseAVX < 3 || !is_LP64) { 1614 return false; 1615 } 1616 if (!VM_Version::supports_avx512vl()) { 1617 return false; 1618 } 1619 break; 1620 #ifndef _LP64 1621 case Op_AddReductionVF: 1622 case Op_AddReductionVD: 1623 case Op_MulReductionVF: 1624 case Op_MulReductionVD: 1625 if (UseSSE < 1) { // requires at least SSE 1626 return false; 1627 } 1628 break; 1629 case Op_MulAddVS2VI: 1630 case Op_RShiftVL: 1631 case Op_AbsVD: 1632 case Op_NegVD: 1633 if (UseSSE < 2) { 1634 return false; 1635 } 1636 break; 1637 #endif // !LP64 1638 case Op_CompressBits: 1639 if (!VM_Version::supports_bmi2() || (!is_LP64 && UseSSE < 2)) { 1640 return false; 1641 } 1642 break; 1643 case Op_ExpandBits: 1644 if (!VM_Version::supports_bmi2() || (!is_LP64 && (UseSSE < 2 || !VM_Version::supports_bmi1()))) { 1645 return false; 1646 } 1647 break; 1648 case Op_SignumF: 1649 if (UseSSE < 1) { 1650 return false; 1651 } 1652 break; 1653 case Op_SignumD: 1654 if (UseSSE < 2) { 1655 return false; 1656 } 1657 break; 1658 case Op_CompressM: 1659 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) { 1660 return false; 1661 } 1662 break; 1663 case Op_CompressV: 1664 case Op_ExpandV: 1665 if (!VM_Version::supports_avx512vl()) { 1666 return false; 1667 } 1668 break; 1669 case Op_SqrtF: 1670 if (UseSSE < 1) { 1671 return false; 1672 } 1673 break; 1674 case Op_SqrtD: 1675 #ifdef _LP64 1676 if (UseSSE < 2) { 1677 return false; 1678 } 1679 #else 1680 // x86_32.ad has a special match rule for SqrtD. 1681 // Together with common x86 rules, this handles all UseSSE cases. 1682 #endif 1683 break; 1684 case Op_ConvF2HF: 1685 case Op_ConvHF2F: 1686 if (!VM_Version::supports_f16c() && !VM_Version::supports_avx512vl()) { 1687 return false; 1688 } 1689 break; 1690 case Op_VectorCastF2HF: 1691 case Op_VectorCastHF2F: 1692 if (!VM_Version::supports_f16c() && !VM_Version::supports_evex()) { 1693 return false; 1694 } 1695 break; 1696 } 1697 return true; // Match rules are supported by default. 1698 } 1699 1700 //------------------------------------------------------------------------ 1701 1702 static inline bool is_pop_count_instr_target(BasicType bt) { 1703 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) || 1704 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq()); 1705 } 1706 1707 const bool Matcher::match_rule_supported_superword(int opcode, int vlen, BasicType bt) { 1708 return match_rule_supported_vector(opcode, vlen, bt); 1709 } 1710 1711 // Identify extra cases that we might want to provide match rules for vector nodes and 1712 // other intrinsics guarded with vector length (vlen) and element type (bt). 1713 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1714 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1715 if (!match_rule_supported(opcode)) { 1716 return false; 1717 } 1718 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1719 // * SSE2 supports 128bit vectors for all types; 1720 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1721 // * AVX2 supports 256bit vectors for all types; 1722 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1723 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1724 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1725 // And MaxVectorSize is taken into account as well. 1726 if (!vector_size_supported(bt, vlen)) { 1727 return false; 1728 } 1729 // Special cases which require vector length follow: 1730 // * implementation limitations 1731 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1732 // * 128bit vroundpd instruction is present only in AVX1 1733 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1734 switch (opcode) { 1735 case Op_AbsVF: 1736 case Op_NegVF: 1737 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1738 return false; // 512bit vandps and vxorps are not available 1739 } 1740 break; 1741 case Op_AbsVD: 1742 case Op_NegVD: 1743 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1744 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1745 } 1746 break; 1747 case Op_CMoveVF: 1748 if (vlen != 8) { 1749 return false; // implementation limitation (only vcmov8F_reg is present) 1750 } 1751 break; 1752 case Op_RotateRightV: 1753 case Op_RotateLeftV: 1754 if (bt != T_INT && bt != T_LONG) { 1755 return false; 1756 } // fallthrough 1757 case Op_MacroLogicV: 1758 if (!VM_Version::supports_evex() || 1759 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1760 return false; 1761 } 1762 break; 1763 case Op_ClearArray: 1764 case Op_VectorMaskGen: 1765 case Op_VectorCmpMasked: 1766 if (!is_LP64 || !VM_Version::supports_avx512bw()) { 1767 return false; 1768 } 1769 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1770 return false; 1771 } 1772 break; 1773 case Op_LoadVectorMasked: 1774 case Op_StoreVectorMasked: 1775 if (!VM_Version::supports_avx512bw() && (is_subword_type(bt) || UseAVX < 1)) { 1776 return false; 1777 } 1778 break; 1779 case Op_CMoveVD: 1780 if (vlen != 4) { 1781 return false; // implementation limitation (only vcmov4D_reg is present) 1782 } 1783 break; 1784 case Op_MaxV: 1785 case Op_MinV: 1786 if (UseSSE < 4 && is_integral_type(bt)) { 1787 return false; 1788 } 1789 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1790 // Float/Double intrinsics are enabled for AVX family currently. 1791 if (UseAVX == 0) { 1792 return false; 1793 } 1794 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1795 return false; 1796 } 1797 } 1798 break; 1799 case Op_CallLeafVector: 1800 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1801 return false; 1802 } 1803 break; 1804 case Op_AddReductionVI: 1805 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1806 return false; 1807 } 1808 // fallthrough 1809 case Op_AndReductionV: 1810 case Op_OrReductionV: 1811 case Op_XorReductionV: 1812 if (is_subword_type(bt) && (UseSSE < 4)) { 1813 return false; 1814 } 1815 #ifndef _LP64 1816 if (bt == T_BYTE || bt == T_LONG) { 1817 return false; 1818 } 1819 #endif 1820 break; 1821 #ifndef _LP64 1822 case Op_VectorInsert: 1823 if (bt == T_LONG || bt == T_DOUBLE) { 1824 return false; 1825 } 1826 break; 1827 #endif 1828 case Op_MinReductionV: 1829 case Op_MaxReductionV: 1830 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1831 return false; 1832 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1833 return false; 1834 } 1835 // Float/Double intrinsics enabled for AVX family. 1836 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1837 return false; 1838 } 1839 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1840 return false; 1841 } 1842 #ifndef _LP64 1843 if (bt == T_BYTE || bt == T_LONG) { 1844 return false; 1845 } 1846 #endif 1847 break; 1848 case Op_VectorTest: 1849 if (UseSSE < 4) { 1850 return false; // Implementation limitation 1851 } else if (size_in_bits < 32) { 1852 return false; // Implementation limitation 1853 } 1854 break; 1855 case Op_VectorLoadShuffle: 1856 case Op_VectorRearrange: 1857 if(vlen == 2) { 1858 return false; // Implementation limitation due to how shuffle is loaded 1859 } else if (size_in_bits == 256 && UseAVX < 2) { 1860 return false; // Implementation limitation 1861 } 1862 break; 1863 case Op_VectorLoadMask: 1864 case Op_VectorMaskCast: 1865 if (size_in_bits == 256 && UseAVX < 2) { 1866 return false; // Implementation limitation 1867 } 1868 // fallthrough 1869 case Op_VectorStoreMask: 1870 if (vlen == 2) { 1871 return false; // Implementation limitation 1872 } 1873 break; 1874 case Op_PopulateIndex: 1875 if (size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1876 return false; 1877 } 1878 break; 1879 case Op_VectorCastB2X: 1880 case Op_VectorCastS2X: 1881 case Op_VectorCastI2X: 1882 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1883 return false; 1884 } 1885 break; 1886 case Op_VectorCastL2X: 1887 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1888 return false; 1889 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1890 return false; 1891 } 1892 break; 1893 case Op_VectorCastF2X: { 1894 // As per JLS section 5.1.3 narrowing conversion to sub-word types 1895 // happen after intermediate conversion to integer and special handling 1896 // code needs AVX2 vpcmpeqd instruction for 256 bit vectors. 1897 int src_size_in_bits = type2aelembytes(T_FLOAT) * vlen * BitsPerByte; 1898 if (is_integral_type(bt) && src_size_in_bits == 256 && UseAVX < 2) { 1899 return false; 1900 } 1901 } 1902 // fallthrough 1903 case Op_VectorCastD2X: 1904 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1905 return false; 1906 } 1907 break; 1908 case Op_VectorCastF2HF: 1909 case Op_VectorCastHF2F: 1910 if (!VM_Version::supports_f16c() && 1911 ((!VM_Version::supports_evex() || 1912 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())))) { 1913 return false; 1914 } 1915 break; 1916 case Op_RoundVD: 1917 if (!VM_Version::supports_avx512dq()) { 1918 return false; 1919 } 1920 break; 1921 case Op_MulReductionVI: 1922 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1923 return false; 1924 } 1925 break; 1926 case Op_LoadVectorGatherMasked: 1927 case Op_StoreVectorScatterMasked: 1928 case Op_StoreVectorScatter: 1929 if (is_subword_type(bt)) { 1930 return false; 1931 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1932 return false; 1933 } 1934 // fallthrough 1935 case Op_LoadVectorGather: 1936 if (size_in_bits == 64 ) { 1937 return false; 1938 } 1939 break; 1940 case Op_MaskAll: 1941 if (!VM_Version::supports_evex()) { 1942 return false; 1943 } 1944 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1945 return false; 1946 } 1947 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1948 return false; 1949 } 1950 break; 1951 case Op_VectorMaskCmp: 1952 if (vlen < 2 || size_in_bits < 32) { 1953 return false; 1954 } 1955 break; 1956 case Op_CompressM: 1957 if (UseAVX < 3 || !VM_Version::supports_bmi2()) { 1958 return false; 1959 } 1960 break; 1961 case Op_CompressV: 1962 case Op_ExpandV: 1963 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) { 1964 return false; 1965 } 1966 if (size_in_bits < 128 ) { 1967 return false; 1968 } 1969 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1970 return false; 1971 } 1972 break; 1973 case Op_VectorLongToMask: 1974 if (UseAVX < 1 || !is_LP64) { 1975 return false; 1976 } 1977 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1978 return false; 1979 } 1980 break; 1981 case Op_SignumVD: 1982 case Op_SignumVF: 1983 if (UseAVX < 1) { 1984 return false; 1985 } 1986 break; 1987 case Op_PopCountVI: 1988 case Op_PopCountVL: { 1989 if (!is_pop_count_instr_target(bt) && 1990 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) { 1991 return false; 1992 } 1993 } 1994 break; 1995 case Op_ReverseV: 1996 case Op_ReverseBytesV: 1997 if (UseAVX < 2) { 1998 return false; 1999 } 2000 break; 2001 case Op_CountTrailingZerosV: 2002 case Op_CountLeadingZerosV: 2003 if (UseAVX < 2) { 2004 return false; 2005 } 2006 break; 2007 } 2008 return true; // Per default match rules are supported. 2009 } 2010 2011 const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 2012 // ADLC based match_rule_supported routine checks for the existence of pattern based 2013 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 2014 // of their non-masked counterpart with mask edge being the differentiator. 2015 // This routine does a strict check on the existence of masked operation patterns 2016 // by returning a default false value for all the other opcodes apart from the 2017 // ones whose masked instruction patterns are defined in this file. 2018 if (!match_rule_supported_vector(opcode, vlen, bt)) { 2019 return false; 2020 } 2021 2022 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 2023 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 2024 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 2025 return false; 2026 } 2027 switch(opcode) { 2028 // Unary masked operations 2029 case Op_AbsVB: 2030 case Op_AbsVS: 2031 if(!VM_Version::supports_avx512bw()) { 2032 return false; // Implementation limitation 2033 } 2034 case Op_AbsVI: 2035 case Op_AbsVL: 2036 return true; 2037 2038 // Ternary masked operations 2039 case Op_FmaVF: 2040 case Op_FmaVD: 2041 return true; 2042 2043 case Op_MacroLogicV: 2044 if(bt != T_INT && bt != T_LONG) { 2045 return false; 2046 } 2047 return true; 2048 2049 // Binary masked operations 2050 case Op_AddVB: 2051 case Op_AddVS: 2052 case Op_SubVB: 2053 case Op_SubVS: 2054 case Op_MulVS: 2055 case Op_LShiftVS: 2056 case Op_RShiftVS: 2057 case Op_URShiftVS: 2058 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2059 if (!VM_Version::supports_avx512bw()) { 2060 return false; // Implementation limitation 2061 } 2062 return true; 2063 2064 case Op_MulVL: 2065 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2066 if (!VM_Version::supports_avx512dq()) { 2067 return false; // Implementation limitation 2068 } 2069 return true; 2070 2071 case Op_AndV: 2072 case Op_OrV: 2073 case Op_XorV: 2074 case Op_RotateRightV: 2075 case Op_RotateLeftV: 2076 if (bt != T_INT && bt != T_LONG) { 2077 return false; // Implementation limitation 2078 } 2079 return true; 2080 2081 case Op_VectorLoadMask: 2082 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 2083 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2084 return false; 2085 } 2086 return true; 2087 2088 case Op_AddVI: 2089 case Op_AddVL: 2090 case Op_AddVF: 2091 case Op_AddVD: 2092 case Op_SubVI: 2093 case Op_SubVL: 2094 case Op_SubVF: 2095 case Op_SubVD: 2096 case Op_MulVI: 2097 case Op_MulVF: 2098 case Op_MulVD: 2099 case Op_DivVF: 2100 case Op_DivVD: 2101 case Op_SqrtVF: 2102 case Op_SqrtVD: 2103 case Op_LShiftVI: 2104 case Op_LShiftVL: 2105 case Op_RShiftVI: 2106 case Op_RShiftVL: 2107 case Op_URShiftVI: 2108 case Op_URShiftVL: 2109 case Op_LoadVectorMasked: 2110 case Op_StoreVectorMasked: 2111 case Op_LoadVectorGatherMasked: 2112 case Op_StoreVectorScatterMasked: 2113 return true; 2114 2115 case Op_MaxV: 2116 case Op_MinV: 2117 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2118 return false; // Implementation limitation 2119 } 2120 if (is_floating_point_type(bt)) { 2121 return false; // Implementation limitation 2122 } 2123 return true; 2124 2125 case Op_VectorMaskCmp: 2126 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2127 return false; // Implementation limitation 2128 } 2129 return true; 2130 2131 case Op_VectorRearrange: 2132 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2133 return false; // Implementation limitation 2134 } 2135 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2136 return false; // Implementation limitation 2137 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2138 return false; // Implementation limitation 2139 } 2140 return true; 2141 2142 // Binary Logical operations 2143 case Op_AndVMask: 2144 case Op_OrVMask: 2145 case Op_XorVMask: 2146 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2147 return false; // Implementation limitation 2148 } 2149 return true; 2150 2151 case Op_PopCountVI: 2152 case Op_PopCountVL: 2153 if (!is_pop_count_instr_target(bt)) { 2154 return false; 2155 } 2156 return true; 2157 2158 case Op_MaskAll: 2159 return true; 2160 2161 case Op_CountLeadingZerosV: 2162 if (is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd()) { 2163 return true; 2164 } 2165 default: 2166 return false; 2167 } 2168 } 2169 2170 const bool Matcher::vector_needs_partial_operations(Node* node, const TypeVect* vt) { 2171 return false; 2172 } 2173 2174 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2175 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2176 bool legacy = (generic_opnd->opcode() == LEGVEC); 2177 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2178 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2179 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2180 return new legVecZOper(); 2181 } 2182 if (legacy) { 2183 switch (ideal_reg) { 2184 case Op_VecS: return new legVecSOper(); 2185 case Op_VecD: return new legVecDOper(); 2186 case Op_VecX: return new legVecXOper(); 2187 case Op_VecY: return new legVecYOper(); 2188 case Op_VecZ: return new legVecZOper(); 2189 } 2190 } else { 2191 switch (ideal_reg) { 2192 case Op_VecS: return new vecSOper(); 2193 case Op_VecD: return new vecDOper(); 2194 case Op_VecX: return new vecXOper(); 2195 case Op_VecY: return new vecYOper(); 2196 case Op_VecZ: return new vecZOper(); 2197 } 2198 } 2199 ShouldNotReachHere(); 2200 return NULL; 2201 } 2202 2203 bool Matcher::is_reg2reg_move(MachNode* m) { 2204 switch (m->rule()) { 2205 case MoveVec2Leg_rule: 2206 case MoveLeg2Vec_rule: 2207 case MoveF2VL_rule: 2208 case MoveF2LEG_rule: 2209 case MoveVL2F_rule: 2210 case MoveLEG2F_rule: 2211 case MoveD2VL_rule: 2212 case MoveD2LEG_rule: 2213 case MoveVL2D_rule: 2214 case MoveLEG2D_rule: 2215 return true; 2216 default: 2217 return false; 2218 } 2219 } 2220 2221 bool Matcher::is_generic_vector(MachOper* opnd) { 2222 switch (opnd->opcode()) { 2223 case VEC: 2224 case LEGVEC: 2225 return true; 2226 default: 2227 return false; 2228 } 2229 } 2230 2231 //------------------------------------------------------------------------ 2232 2233 const RegMask* Matcher::predicate_reg_mask(void) { 2234 return &_VECTMASK_REG_mask; 2235 } 2236 2237 const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) { 2238 return new TypeVectMask(elemTy, length); 2239 } 2240 2241 // Max vector size in bytes. 0 if not supported. 2242 const int Matcher::vector_width_in_bytes(BasicType bt) { 2243 assert(is_java_primitive(bt), "only primitive type vectors"); 2244 if (UseSSE < 2) return 0; 2245 // SSE2 supports 128bit vectors for all types. 2246 // AVX2 supports 256bit vectors for all types. 2247 // AVX2/EVEX supports 512bit vectors for all types. 2248 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2249 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2250 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2251 size = (UseAVX > 2) ? 64 : 32; 2252 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2253 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2254 // Use flag to limit vector size. 2255 size = MIN2(size,(int)MaxVectorSize); 2256 // Minimum 2 values in vector (or 4 for bytes). 2257 switch (bt) { 2258 case T_DOUBLE: 2259 case T_LONG: 2260 if (size < 16) return 0; 2261 break; 2262 case T_FLOAT: 2263 case T_INT: 2264 if (size < 8) return 0; 2265 break; 2266 case T_BOOLEAN: 2267 if (size < 4) return 0; 2268 break; 2269 case T_CHAR: 2270 if (size < 4) return 0; 2271 break; 2272 case T_BYTE: 2273 if (size < 4) return 0; 2274 break; 2275 case T_SHORT: 2276 if (size < 4) return 0; 2277 break; 2278 default: 2279 ShouldNotReachHere(); 2280 } 2281 return size; 2282 } 2283 2284 // Limits on vector size (number of elements) loaded into vector. 2285 const int Matcher::max_vector_size(const BasicType bt) { 2286 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2287 } 2288 const int Matcher::min_vector_size(const BasicType bt) { 2289 int max_size = max_vector_size(bt); 2290 // Min size which can be loaded into vector is 4 bytes. 2291 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2292 // Support for calling svml double64 vectors 2293 if (bt == T_DOUBLE) { 2294 size = 1; 2295 } 2296 return MIN2(size,max_size); 2297 } 2298 2299 const int Matcher::scalable_vector_reg_size(const BasicType bt) { 2300 return -1; 2301 } 2302 2303 // Vector ideal reg corresponding to specified size in bytes 2304 const uint Matcher::vector_ideal_reg(int size) { 2305 assert(MaxVectorSize >= size, ""); 2306 switch(size) { 2307 case 4: return Op_VecS; 2308 case 8: return Op_VecD; 2309 case 16: return Op_VecX; 2310 case 32: return Op_VecY; 2311 case 64: return Op_VecZ; 2312 } 2313 ShouldNotReachHere(); 2314 return 0; 2315 } 2316 2317 // Check for shift by small constant as well 2318 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2319 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2320 shift->in(2)->get_int() <= 3 && 2321 // Are there other uses besides address expressions? 2322 !matcher->is_visited(shift)) { 2323 address_visited.set(shift->_idx); // Flag as address_visited 2324 mstack.push(shift->in(2), Matcher::Visit); 2325 Node *conv = shift->in(1); 2326 #ifdef _LP64 2327 // Allow Matcher to match the rule which bypass 2328 // ConvI2L operation for an array index on LP64 2329 // if the index value is positive. 2330 if (conv->Opcode() == Op_ConvI2L && 2331 conv->as_Type()->type()->is_long()->_lo >= 0 && 2332 // Are there other uses besides address expressions? 2333 !matcher->is_visited(conv)) { 2334 address_visited.set(conv->_idx); // Flag as address_visited 2335 mstack.push(conv->in(1), Matcher::Pre_Visit); 2336 } else 2337 #endif 2338 mstack.push(conv, Matcher::Pre_Visit); 2339 return true; 2340 } 2341 return false; 2342 } 2343 2344 // This function identifies sub-graphs in which a 'load' node is 2345 // input to two different nodes, and such that it can be matched 2346 // with BMI instructions like blsi, blsr, etc. 2347 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2348 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2349 // refers to the same node. 2350 // 2351 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2352 // This is a temporary solution until we make DAGs expressible in ADL. 2353 template<typename ConType> 2354 class FusedPatternMatcher { 2355 Node* _op1_node; 2356 Node* _mop_node; 2357 int _con_op; 2358 2359 static int match_next(Node* n, int next_op, int next_op_idx) { 2360 if (n->in(1) == NULL || n->in(2) == NULL) { 2361 return -1; 2362 } 2363 2364 if (next_op_idx == -1) { // n is commutative, try rotations 2365 if (n->in(1)->Opcode() == next_op) { 2366 return 1; 2367 } else if (n->in(2)->Opcode() == next_op) { 2368 return 2; 2369 } 2370 } else { 2371 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2372 if (n->in(next_op_idx)->Opcode() == next_op) { 2373 return next_op_idx; 2374 } 2375 } 2376 return -1; 2377 } 2378 2379 public: 2380 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2381 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2382 2383 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2384 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2385 typename ConType::NativeType con_value) { 2386 if (_op1_node->Opcode() != op1) { 2387 return false; 2388 } 2389 if (_mop_node->outcnt() > 2) { 2390 return false; 2391 } 2392 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2393 if (op1_op2_idx == -1) { 2394 return false; 2395 } 2396 // Memory operation must be the other edge 2397 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2398 2399 // Check that the mop node is really what we want 2400 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2401 Node* op2_node = _op1_node->in(op1_op2_idx); 2402 if (op2_node->outcnt() > 1) { 2403 return false; 2404 } 2405 assert(op2_node->Opcode() == op2, "Should be"); 2406 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2407 if (op2_con_idx == -1) { 2408 return false; 2409 } 2410 // Memory operation must be the other edge 2411 int op2_mop_idx = (op2_con_idx & 1) + 1; 2412 // Check that the memory operation is the same node 2413 if (op2_node->in(op2_mop_idx) == _mop_node) { 2414 // Now check the constant 2415 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2416 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2417 return true; 2418 } 2419 } 2420 } 2421 return false; 2422 } 2423 }; 2424 2425 static bool is_bmi_pattern(Node* n, Node* m) { 2426 assert(UseBMI1Instructions, "sanity"); 2427 if (n != NULL && m != NULL) { 2428 if (m->Opcode() == Op_LoadI) { 2429 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2430 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2431 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2432 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2433 } else if (m->Opcode() == Op_LoadL) { 2434 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2435 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2436 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2437 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2438 } 2439 } 2440 return false; 2441 } 2442 2443 // Should the matcher clone input 'm' of node 'n'? 2444 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2445 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2446 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2447 mstack.push(m, Visit); 2448 return true; 2449 } 2450 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2451 mstack.push(m, Visit); // m = ShiftCntV 2452 return true; 2453 } 2454 return false; 2455 } 2456 2457 // Should the Matcher clone shifts on addressing modes, expecting them 2458 // to be subsumed into complex addressing expressions or compute them 2459 // into registers? 2460 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2461 Node *off = m->in(AddPNode::Offset); 2462 if (off->is_Con()) { 2463 address_visited.test_set(m->_idx); // Flag as address_visited 2464 Node *adr = m->in(AddPNode::Address); 2465 2466 // Intel can handle 2 adds in addressing mode 2467 // AtomicAdd is not an addressing expression. 2468 // Cheap to find it by looking for screwy base. 2469 if (adr->is_AddP() && 2470 !adr->in(AddPNode::Base)->is_top() && 2471 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2472 // Are there other uses besides address expressions? 2473 !is_visited(adr)) { 2474 address_visited.set(adr->_idx); // Flag as address_visited 2475 Node *shift = adr->in(AddPNode::Offset); 2476 if (!clone_shift(shift, this, mstack, address_visited)) { 2477 mstack.push(shift, Pre_Visit); 2478 } 2479 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2480 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2481 } else { 2482 mstack.push(adr, Pre_Visit); 2483 } 2484 2485 // Clone X+offset as it also folds into most addressing expressions 2486 mstack.push(off, Visit); 2487 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2488 return true; 2489 } else if (clone_shift(off, this, mstack, address_visited)) { 2490 address_visited.test_set(m->_idx); // Flag as address_visited 2491 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2492 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2493 return true; 2494 } 2495 return false; 2496 } 2497 2498 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2499 switch (bt) { 2500 case BoolTest::eq: 2501 return Assembler::eq; 2502 case BoolTest::ne: 2503 return Assembler::neq; 2504 case BoolTest::le: 2505 case BoolTest::ule: 2506 return Assembler::le; 2507 case BoolTest::ge: 2508 case BoolTest::uge: 2509 return Assembler::nlt; 2510 case BoolTest::lt: 2511 case BoolTest::ult: 2512 return Assembler::lt; 2513 case BoolTest::gt: 2514 case BoolTest::ugt: 2515 return Assembler::nle; 2516 default : ShouldNotReachHere(); return Assembler::_false; 2517 } 2518 } 2519 2520 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2521 switch (bt) { 2522 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2523 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2524 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2525 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2526 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2527 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2528 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2529 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2530 } 2531 } 2532 2533 // Helper methods for MachSpillCopyNode::implementation(). 2534 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 2535 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2536 assert(ireg == Op_VecS || // 32bit vector 2537 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2538 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 2539 "no non-adjacent vector moves" ); 2540 if (cbuf) { 2541 C2_MacroAssembler _masm(cbuf); 2542 switch (ireg) { 2543 case Op_VecS: // copy whole register 2544 case Op_VecD: 2545 case Op_VecX: 2546 #ifndef _LP64 2547 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2548 #else 2549 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2550 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2551 } else { 2552 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2553 } 2554 #endif 2555 break; 2556 case Op_VecY: 2557 #ifndef _LP64 2558 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2559 #else 2560 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2561 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2562 } else { 2563 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2564 } 2565 #endif 2566 break; 2567 case Op_VecZ: 2568 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2569 break; 2570 default: 2571 ShouldNotReachHere(); 2572 } 2573 #ifndef PRODUCT 2574 } else { 2575 switch (ireg) { 2576 case Op_VecS: 2577 case Op_VecD: 2578 case Op_VecX: 2579 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2580 break; 2581 case Op_VecY: 2582 case Op_VecZ: 2583 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2584 break; 2585 default: 2586 ShouldNotReachHere(); 2587 } 2588 #endif 2589 } 2590 } 2591 2592 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 2593 int stack_offset, int reg, uint ireg, outputStream* st) { 2594 if (cbuf) { 2595 C2_MacroAssembler _masm(cbuf); 2596 if (is_load) { 2597 switch (ireg) { 2598 case Op_VecS: 2599 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2600 break; 2601 case Op_VecD: 2602 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2603 break; 2604 case Op_VecX: 2605 #ifndef _LP64 2606 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2607 #else 2608 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2609 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2610 } else { 2611 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2612 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2613 } 2614 #endif 2615 break; 2616 case Op_VecY: 2617 #ifndef _LP64 2618 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2619 #else 2620 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2621 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2622 } else { 2623 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2624 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2625 } 2626 #endif 2627 break; 2628 case Op_VecZ: 2629 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2630 break; 2631 default: 2632 ShouldNotReachHere(); 2633 } 2634 } else { // store 2635 switch (ireg) { 2636 case Op_VecS: 2637 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2638 break; 2639 case Op_VecD: 2640 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2641 break; 2642 case Op_VecX: 2643 #ifndef _LP64 2644 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2645 #else 2646 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2647 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2648 } 2649 else { 2650 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2651 } 2652 #endif 2653 break; 2654 case Op_VecY: 2655 #ifndef _LP64 2656 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2657 #else 2658 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2659 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2660 } 2661 else { 2662 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2663 } 2664 #endif 2665 break; 2666 case Op_VecZ: 2667 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2668 break; 2669 default: 2670 ShouldNotReachHere(); 2671 } 2672 } 2673 #ifndef PRODUCT 2674 } else { 2675 if (is_load) { 2676 switch (ireg) { 2677 case Op_VecS: 2678 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2679 break; 2680 case Op_VecD: 2681 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2682 break; 2683 case Op_VecX: 2684 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2685 break; 2686 case Op_VecY: 2687 case Op_VecZ: 2688 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2689 break; 2690 default: 2691 ShouldNotReachHere(); 2692 } 2693 } else { // store 2694 switch (ireg) { 2695 case Op_VecS: 2696 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2697 break; 2698 case Op_VecD: 2699 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2700 break; 2701 case Op_VecX: 2702 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2703 break; 2704 case Op_VecY: 2705 case Op_VecZ: 2706 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2707 break; 2708 default: 2709 ShouldNotReachHere(); 2710 } 2711 } 2712 #endif 2713 } 2714 } 2715 2716 template <class T> 2717 static inline GrowableArray<jvalue>* vreplicate_imm(BasicType bt, T con, int len) { 2718 GrowableArray<jvalue>* val = new GrowableArray<jvalue>(len); 2719 jvalue ele; 2720 switch (bt) { 2721 case T_BYTE: ele.b = con; break; 2722 case T_SHORT: ele.s = con; break; 2723 case T_INT: ele.i = con; break; 2724 case T_LONG: ele.j = con; break; 2725 case T_FLOAT: ele.f = con; break; 2726 case T_DOUBLE: ele.d = con; break; 2727 default: ShouldNotReachHere(); 2728 } 2729 for (int i = 0; i < len; i++) { 2730 val->append(ele); 2731 } 2732 return val; 2733 } 2734 2735 static inline jlong high_bit_set(BasicType bt) { 2736 switch (bt) { 2737 case T_BYTE: return 0x8080808080808080; 2738 case T_SHORT: return 0x8000800080008000; 2739 case T_INT: return 0x8000000080000000; 2740 case T_LONG: return 0x8000000000000000; 2741 default: 2742 ShouldNotReachHere(); 2743 return 0; 2744 } 2745 } 2746 2747 #ifndef PRODUCT 2748 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2749 st->print("nop \t# %d bytes pad for loops and calls", _count); 2750 } 2751 #endif 2752 2753 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2754 C2_MacroAssembler _masm(&cbuf); 2755 __ nop(_count); 2756 } 2757 2758 uint MachNopNode::size(PhaseRegAlloc*) const { 2759 return _count; 2760 } 2761 2762 #ifndef PRODUCT 2763 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2764 st->print("# breakpoint"); 2765 } 2766 #endif 2767 2768 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2769 C2_MacroAssembler _masm(&cbuf); 2770 __ int3(); 2771 } 2772 2773 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2774 return MachNode::size(ra_); 2775 } 2776 2777 %} 2778 2779 encode %{ 2780 2781 enc_class call_epilog %{ 2782 C2_MacroAssembler _masm(&cbuf); 2783 if (VerifyStackAtCalls) { 2784 // Check that stack depth is unchanged: find majik cookie on stack 2785 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2786 Label L; 2787 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2788 __ jccb(Assembler::equal, L); 2789 // Die if stack mismatch 2790 __ int3(); 2791 __ bind(L); 2792 } 2793 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) { 2794 C2_MacroAssembler _masm(&cbuf); 2795 if (!_method->signature()->returns_null_free_inline_type()) { 2796 // The last return value is not set by the callee but used to pass IsInit information to compiled code. 2797 // Search for the corresponding projection, get the register and emit code that initialized it. 2798 uint con = (tf()->range_cc()->cnt() - 1); 2799 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) { 2800 ProjNode* proj = fast_out(i)->as_Proj(); 2801 if (proj->_con == con) { 2802 // Set IsInit if rax is non-null (a non-null value is returned buffered or scalarized) 2803 OptoReg::Name optoReg = ra_->get_reg_first(proj); 2804 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP)); 2805 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1; 2806 __ testq(rax, rax); 2807 __ set_byte_if_not_zero(toReg); 2808 __ movzbl(toReg, toReg); 2809 if (reg->is_stack()) { 2810 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size; 2811 __ movq(Address(rsp, st_off), toReg); 2812 } 2813 break; 2814 } 2815 } 2816 } 2817 if (return_value_is_used()) { 2818 // An inline type is returned as fields in multiple registers. 2819 // Rax either contains an oop if the inline type is buffered or a pointer 2820 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax 2821 // if the lowest bit is set to allow C2 to use the oop after null checking. 2822 // rax &= (rax & 1) - 1 2823 __ movptr(rscratch1, rax); 2824 __ andptr(rscratch1, 0x1); 2825 __ subptr(rscratch1, 0x1); 2826 __ andptr(rax, rscratch1); 2827 } 2828 } 2829 %} 2830 2831 %} 2832 2833 // Operands for bound floating pointer register arguments 2834 operand rxmm0() %{ 2835 constraint(ALLOC_IN_RC(xmm0_reg)); 2836 match(VecX); 2837 format%{%} 2838 interface(REG_INTER); 2839 %} 2840 2841 //----------OPERANDS----------------------------------------------------------- 2842 // Operand definitions must precede instruction definitions for correct parsing 2843 // in the ADLC because operands constitute user defined types which are used in 2844 // instruction definitions. 2845 2846 // Vectors 2847 2848 // Dummy generic vector class. Should be used for all vector operands. 2849 // Replaced with vec[SDXYZ] during post-selection pass. 2850 operand vec() %{ 2851 constraint(ALLOC_IN_RC(dynamic)); 2852 match(VecX); 2853 match(VecY); 2854 match(VecZ); 2855 match(VecS); 2856 match(VecD); 2857 2858 format %{ %} 2859 interface(REG_INTER); 2860 %} 2861 2862 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2863 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2864 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2865 // runtime code generation via reg_class_dynamic. 2866 operand legVec() %{ 2867 constraint(ALLOC_IN_RC(dynamic)); 2868 match(VecX); 2869 match(VecY); 2870 match(VecZ); 2871 match(VecS); 2872 match(VecD); 2873 2874 format %{ %} 2875 interface(REG_INTER); 2876 %} 2877 2878 // Replaces vec during post-selection cleanup. See above. 2879 operand vecS() %{ 2880 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2881 match(VecS); 2882 2883 format %{ %} 2884 interface(REG_INTER); 2885 %} 2886 2887 // Replaces legVec during post-selection cleanup. See above. 2888 operand legVecS() %{ 2889 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2890 match(VecS); 2891 2892 format %{ %} 2893 interface(REG_INTER); 2894 %} 2895 2896 // Replaces vec during post-selection cleanup. See above. 2897 operand vecD() %{ 2898 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2899 match(VecD); 2900 2901 format %{ %} 2902 interface(REG_INTER); 2903 %} 2904 2905 // Replaces legVec during post-selection cleanup. See above. 2906 operand legVecD() %{ 2907 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2908 match(VecD); 2909 2910 format %{ %} 2911 interface(REG_INTER); 2912 %} 2913 2914 // Replaces vec during post-selection cleanup. See above. 2915 operand vecX() %{ 2916 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2917 match(VecX); 2918 2919 format %{ %} 2920 interface(REG_INTER); 2921 %} 2922 2923 // Replaces legVec during post-selection cleanup. See above. 2924 operand legVecX() %{ 2925 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2926 match(VecX); 2927 2928 format %{ %} 2929 interface(REG_INTER); 2930 %} 2931 2932 // Replaces vec during post-selection cleanup. See above. 2933 operand vecY() %{ 2934 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2935 match(VecY); 2936 2937 format %{ %} 2938 interface(REG_INTER); 2939 %} 2940 2941 // Replaces legVec during post-selection cleanup. See above. 2942 operand legVecY() %{ 2943 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2944 match(VecY); 2945 2946 format %{ %} 2947 interface(REG_INTER); 2948 %} 2949 2950 // Replaces vec during post-selection cleanup. See above. 2951 operand vecZ() %{ 2952 constraint(ALLOC_IN_RC(vectorz_reg)); 2953 match(VecZ); 2954 2955 format %{ %} 2956 interface(REG_INTER); 2957 %} 2958 2959 // Replaces legVec during post-selection cleanup. See above. 2960 operand legVecZ() %{ 2961 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2962 match(VecZ); 2963 2964 format %{ %} 2965 interface(REG_INTER); 2966 %} 2967 2968 // Comparison Code for FP conditional move 2969 operand cmpOp_vcmppd() %{ 2970 match(Bool); 2971 2972 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2973 n->as_Bool()->_test._test != BoolTest::no_overflow); 2974 format %{ "" %} 2975 interface(COND_INTER) %{ 2976 equal (0x0, "eq"); 2977 less (0x1, "lt"); 2978 less_equal (0x2, "le"); 2979 not_equal (0xC, "ne"); 2980 greater_equal(0xD, "ge"); 2981 greater (0xE, "gt"); 2982 //TODO cannot compile (adlc breaks) without two next lines with error: 2983 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2984 // equal' for overflow. 2985 overflow (0x20, "o"); // not really supported by the instruction 2986 no_overflow (0x21, "no"); // not really supported by the instruction 2987 %} 2988 %} 2989 2990 2991 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2992 2993 // ============================================================================ 2994 2995 instruct ShouldNotReachHere() %{ 2996 match(Halt); 2997 format %{ "stop\t# ShouldNotReachHere" %} 2998 ins_encode %{ 2999 if (is_reachable()) { 3000 __ stop(_halt_reason); 3001 } 3002 %} 3003 ins_pipe(pipe_slow); 3004 %} 3005 3006 // ============================================================================ 3007 3008 instruct addF_reg(regF dst, regF src) %{ 3009 predicate((UseSSE>=1) && (UseAVX == 0)); 3010 match(Set dst (AddF dst src)); 3011 3012 format %{ "addss $dst, $src" %} 3013 ins_cost(150); 3014 ins_encode %{ 3015 __ addss($dst$$XMMRegister, $src$$XMMRegister); 3016 %} 3017 ins_pipe(pipe_slow); 3018 %} 3019 3020 instruct addF_mem(regF dst, memory src) %{ 3021 predicate((UseSSE>=1) && (UseAVX == 0)); 3022 match(Set dst (AddF dst (LoadF src))); 3023 3024 format %{ "addss $dst, $src" %} 3025 ins_cost(150); 3026 ins_encode %{ 3027 __ addss($dst$$XMMRegister, $src$$Address); 3028 %} 3029 ins_pipe(pipe_slow); 3030 %} 3031 3032 instruct addF_imm(regF dst, immF con) %{ 3033 predicate((UseSSE>=1) && (UseAVX == 0)); 3034 match(Set dst (AddF dst con)); 3035 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3036 ins_cost(150); 3037 ins_encode %{ 3038 __ addss($dst$$XMMRegister, $constantaddress($con)); 3039 %} 3040 ins_pipe(pipe_slow); 3041 %} 3042 3043 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 3044 predicate(UseAVX > 0); 3045 match(Set dst (AddF src1 src2)); 3046 3047 format %{ "vaddss $dst, $src1, $src2" %} 3048 ins_cost(150); 3049 ins_encode %{ 3050 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3051 %} 3052 ins_pipe(pipe_slow); 3053 %} 3054 3055 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 3056 predicate(UseAVX > 0); 3057 match(Set dst (AddF src1 (LoadF src2))); 3058 3059 format %{ "vaddss $dst, $src1, $src2" %} 3060 ins_cost(150); 3061 ins_encode %{ 3062 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3063 %} 3064 ins_pipe(pipe_slow); 3065 %} 3066 3067 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 3068 predicate(UseAVX > 0); 3069 match(Set dst (AddF src con)); 3070 3071 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3072 ins_cost(150); 3073 ins_encode %{ 3074 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3075 %} 3076 ins_pipe(pipe_slow); 3077 %} 3078 3079 instruct addD_reg(regD dst, regD src) %{ 3080 predicate((UseSSE>=2) && (UseAVX == 0)); 3081 match(Set dst (AddD dst src)); 3082 3083 format %{ "addsd $dst, $src" %} 3084 ins_cost(150); 3085 ins_encode %{ 3086 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 3087 %} 3088 ins_pipe(pipe_slow); 3089 %} 3090 3091 instruct addD_mem(regD dst, memory src) %{ 3092 predicate((UseSSE>=2) && (UseAVX == 0)); 3093 match(Set dst (AddD dst (LoadD src))); 3094 3095 format %{ "addsd $dst, $src" %} 3096 ins_cost(150); 3097 ins_encode %{ 3098 __ addsd($dst$$XMMRegister, $src$$Address); 3099 %} 3100 ins_pipe(pipe_slow); 3101 %} 3102 3103 instruct addD_imm(regD dst, immD con) %{ 3104 predicate((UseSSE>=2) && (UseAVX == 0)); 3105 match(Set dst (AddD dst con)); 3106 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3107 ins_cost(150); 3108 ins_encode %{ 3109 __ addsd($dst$$XMMRegister, $constantaddress($con)); 3110 %} 3111 ins_pipe(pipe_slow); 3112 %} 3113 3114 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 3115 predicate(UseAVX > 0); 3116 match(Set dst (AddD src1 src2)); 3117 3118 format %{ "vaddsd $dst, $src1, $src2" %} 3119 ins_cost(150); 3120 ins_encode %{ 3121 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3122 %} 3123 ins_pipe(pipe_slow); 3124 %} 3125 3126 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 3127 predicate(UseAVX > 0); 3128 match(Set dst (AddD src1 (LoadD src2))); 3129 3130 format %{ "vaddsd $dst, $src1, $src2" %} 3131 ins_cost(150); 3132 ins_encode %{ 3133 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3134 %} 3135 ins_pipe(pipe_slow); 3136 %} 3137 3138 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 3139 predicate(UseAVX > 0); 3140 match(Set dst (AddD src con)); 3141 3142 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3143 ins_cost(150); 3144 ins_encode %{ 3145 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3146 %} 3147 ins_pipe(pipe_slow); 3148 %} 3149 3150 instruct subF_reg(regF dst, regF src) %{ 3151 predicate((UseSSE>=1) && (UseAVX == 0)); 3152 match(Set dst (SubF dst src)); 3153 3154 format %{ "subss $dst, $src" %} 3155 ins_cost(150); 3156 ins_encode %{ 3157 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3158 %} 3159 ins_pipe(pipe_slow); 3160 %} 3161 3162 instruct subF_mem(regF dst, memory src) %{ 3163 predicate((UseSSE>=1) && (UseAVX == 0)); 3164 match(Set dst (SubF dst (LoadF src))); 3165 3166 format %{ "subss $dst, $src" %} 3167 ins_cost(150); 3168 ins_encode %{ 3169 __ subss($dst$$XMMRegister, $src$$Address); 3170 %} 3171 ins_pipe(pipe_slow); 3172 %} 3173 3174 instruct subF_imm(regF dst, immF con) %{ 3175 predicate((UseSSE>=1) && (UseAVX == 0)); 3176 match(Set dst (SubF dst con)); 3177 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3178 ins_cost(150); 3179 ins_encode %{ 3180 __ subss($dst$$XMMRegister, $constantaddress($con)); 3181 %} 3182 ins_pipe(pipe_slow); 3183 %} 3184 3185 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3186 predicate(UseAVX > 0); 3187 match(Set dst (SubF src1 src2)); 3188 3189 format %{ "vsubss $dst, $src1, $src2" %} 3190 ins_cost(150); 3191 ins_encode %{ 3192 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3193 %} 3194 ins_pipe(pipe_slow); 3195 %} 3196 3197 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3198 predicate(UseAVX > 0); 3199 match(Set dst (SubF src1 (LoadF src2))); 3200 3201 format %{ "vsubss $dst, $src1, $src2" %} 3202 ins_cost(150); 3203 ins_encode %{ 3204 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3205 %} 3206 ins_pipe(pipe_slow); 3207 %} 3208 3209 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3210 predicate(UseAVX > 0); 3211 match(Set dst (SubF src con)); 3212 3213 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3214 ins_cost(150); 3215 ins_encode %{ 3216 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3217 %} 3218 ins_pipe(pipe_slow); 3219 %} 3220 3221 instruct subD_reg(regD dst, regD src) %{ 3222 predicate((UseSSE>=2) && (UseAVX == 0)); 3223 match(Set dst (SubD dst src)); 3224 3225 format %{ "subsd $dst, $src" %} 3226 ins_cost(150); 3227 ins_encode %{ 3228 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3229 %} 3230 ins_pipe(pipe_slow); 3231 %} 3232 3233 instruct subD_mem(regD dst, memory src) %{ 3234 predicate((UseSSE>=2) && (UseAVX == 0)); 3235 match(Set dst (SubD dst (LoadD src))); 3236 3237 format %{ "subsd $dst, $src" %} 3238 ins_cost(150); 3239 ins_encode %{ 3240 __ subsd($dst$$XMMRegister, $src$$Address); 3241 %} 3242 ins_pipe(pipe_slow); 3243 %} 3244 3245 instruct subD_imm(regD dst, immD con) %{ 3246 predicate((UseSSE>=2) && (UseAVX == 0)); 3247 match(Set dst (SubD dst con)); 3248 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3249 ins_cost(150); 3250 ins_encode %{ 3251 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3252 %} 3253 ins_pipe(pipe_slow); 3254 %} 3255 3256 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3257 predicate(UseAVX > 0); 3258 match(Set dst (SubD src1 src2)); 3259 3260 format %{ "vsubsd $dst, $src1, $src2" %} 3261 ins_cost(150); 3262 ins_encode %{ 3263 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3264 %} 3265 ins_pipe(pipe_slow); 3266 %} 3267 3268 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3269 predicate(UseAVX > 0); 3270 match(Set dst (SubD src1 (LoadD src2))); 3271 3272 format %{ "vsubsd $dst, $src1, $src2" %} 3273 ins_cost(150); 3274 ins_encode %{ 3275 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3276 %} 3277 ins_pipe(pipe_slow); 3278 %} 3279 3280 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3281 predicate(UseAVX > 0); 3282 match(Set dst (SubD src con)); 3283 3284 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3285 ins_cost(150); 3286 ins_encode %{ 3287 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3288 %} 3289 ins_pipe(pipe_slow); 3290 %} 3291 3292 instruct mulF_reg(regF dst, regF src) %{ 3293 predicate((UseSSE>=1) && (UseAVX == 0)); 3294 match(Set dst (MulF dst src)); 3295 3296 format %{ "mulss $dst, $src" %} 3297 ins_cost(150); 3298 ins_encode %{ 3299 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3300 %} 3301 ins_pipe(pipe_slow); 3302 %} 3303 3304 instruct mulF_mem(regF dst, memory src) %{ 3305 predicate((UseSSE>=1) && (UseAVX == 0)); 3306 match(Set dst (MulF dst (LoadF src))); 3307 3308 format %{ "mulss $dst, $src" %} 3309 ins_cost(150); 3310 ins_encode %{ 3311 __ mulss($dst$$XMMRegister, $src$$Address); 3312 %} 3313 ins_pipe(pipe_slow); 3314 %} 3315 3316 instruct mulF_imm(regF dst, immF con) %{ 3317 predicate((UseSSE>=1) && (UseAVX == 0)); 3318 match(Set dst (MulF dst con)); 3319 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3320 ins_cost(150); 3321 ins_encode %{ 3322 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3323 %} 3324 ins_pipe(pipe_slow); 3325 %} 3326 3327 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3328 predicate(UseAVX > 0); 3329 match(Set dst (MulF src1 src2)); 3330 3331 format %{ "vmulss $dst, $src1, $src2" %} 3332 ins_cost(150); 3333 ins_encode %{ 3334 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3335 %} 3336 ins_pipe(pipe_slow); 3337 %} 3338 3339 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3340 predicate(UseAVX > 0); 3341 match(Set dst (MulF src1 (LoadF src2))); 3342 3343 format %{ "vmulss $dst, $src1, $src2" %} 3344 ins_cost(150); 3345 ins_encode %{ 3346 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3347 %} 3348 ins_pipe(pipe_slow); 3349 %} 3350 3351 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3352 predicate(UseAVX > 0); 3353 match(Set dst (MulF src con)); 3354 3355 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3356 ins_cost(150); 3357 ins_encode %{ 3358 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3359 %} 3360 ins_pipe(pipe_slow); 3361 %} 3362 3363 instruct mulD_reg(regD dst, regD src) %{ 3364 predicate((UseSSE>=2) && (UseAVX == 0)); 3365 match(Set dst (MulD dst src)); 3366 3367 format %{ "mulsd $dst, $src" %} 3368 ins_cost(150); 3369 ins_encode %{ 3370 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3371 %} 3372 ins_pipe(pipe_slow); 3373 %} 3374 3375 instruct mulD_mem(regD dst, memory src) %{ 3376 predicate((UseSSE>=2) && (UseAVX == 0)); 3377 match(Set dst (MulD dst (LoadD src))); 3378 3379 format %{ "mulsd $dst, $src" %} 3380 ins_cost(150); 3381 ins_encode %{ 3382 __ mulsd($dst$$XMMRegister, $src$$Address); 3383 %} 3384 ins_pipe(pipe_slow); 3385 %} 3386 3387 instruct mulD_imm(regD dst, immD con) %{ 3388 predicate((UseSSE>=2) && (UseAVX == 0)); 3389 match(Set dst (MulD dst con)); 3390 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3391 ins_cost(150); 3392 ins_encode %{ 3393 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3394 %} 3395 ins_pipe(pipe_slow); 3396 %} 3397 3398 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3399 predicate(UseAVX > 0); 3400 match(Set dst (MulD src1 src2)); 3401 3402 format %{ "vmulsd $dst, $src1, $src2" %} 3403 ins_cost(150); 3404 ins_encode %{ 3405 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3406 %} 3407 ins_pipe(pipe_slow); 3408 %} 3409 3410 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3411 predicate(UseAVX > 0); 3412 match(Set dst (MulD src1 (LoadD src2))); 3413 3414 format %{ "vmulsd $dst, $src1, $src2" %} 3415 ins_cost(150); 3416 ins_encode %{ 3417 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3418 %} 3419 ins_pipe(pipe_slow); 3420 %} 3421 3422 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3423 predicate(UseAVX > 0); 3424 match(Set dst (MulD src con)); 3425 3426 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3427 ins_cost(150); 3428 ins_encode %{ 3429 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3430 %} 3431 ins_pipe(pipe_slow); 3432 %} 3433 3434 instruct divF_reg(regF dst, regF src) %{ 3435 predicate((UseSSE>=1) && (UseAVX == 0)); 3436 match(Set dst (DivF dst src)); 3437 3438 format %{ "divss $dst, $src" %} 3439 ins_cost(150); 3440 ins_encode %{ 3441 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3442 %} 3443 ins_pipe(pipe_slow); 3444 %} 3445 3446 instruct divF_mem(regF dst, memory src) %{ 3447 predicate((UseSSE>=1) && (UseAVX == 0)); 3448 match(Set dst (DivF dst (LoadF src))); 3449 3450 format %{ "divss $dst, $src" %} 3451 ins_cost(150); 3452 ins_encode %{ 3453 __ divss($dst$$XMMRegister, $src$$Address); 3454 %} 3455 ins_pipe(pipe_slow); 3456 %} 3457 3458 instruct divF_imm(regF dst, immF con) %{ 3459 predicate((UseSSE>=1) && (UseAVX == 0)); 3460 match(Set dst (DivF dst con)); 3461 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3462 ins_cost(150); 3463 ins_encode %{ 3464 __ divss($dst$$XMMRegister, $constantaddress($con)); 3465 %} 3466 ins_pipe(pipe_slow); 3467 %} 3468 3469 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3470 predicate(UseAVX > 0); 3471 match(Set dst (DivF src1 src2)); 3472 3473 format %{ "vdivss $dst, $src1, $src2" %} 3474 ins_cost(150); 3475 ins_encode %{ 3476 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3477 %} 3478 ins_pipe(pipe_slow); 3479 %} 3480 3481 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3482 predicate(UseAVX > 0); 3483 match(Set dst (DivF src1 (LoadF src2))); 3484 3485 format %{ "vdivss $dst, $src1, $src2" %} 3486 ins_cost(150); 3487 ins_encode %{ 3488 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3489 %} 3490 ins_pipe(pipe_slow); 3491 %} 3492 3493 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3494 predicate(UseAVX > 0); 3495 match(Set dst (DivF src con)); 3496 3497 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3498 ins_cost(150); 3499 ins_encode %{ 3500 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3501 %} 3502 ins_pipe(pipe_slow); 3503 %} 3504 3505 instruct divD_reg(regD dst, regD src) %{ 3506 predicate((UseSSE>=2) && (UseAVX == 0)); 3507 match(Set dst (DivD dst src)); 3508 3509 format %{ "divsd $dst, $src" %} 3510 ins_cost(150); 3511 ins_encode %{ 3512 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3513 %} 3514 ins_pipe(pipe_slow); 3515 %} 3516 3517 instruct divD_mem(regD dst, memory src) %{ 3518 predicate((UseSSE>=2) && (UseAVX == 0)); 3519 match(Set dst (DivD dst (LoadD src))); 3520 3521 format %{ "divsd $dst, $src" %} 3522 ins_cost(150); 3523 ins_encode %{ 3524 __ divsd($dst$$XMMRegister, $src$$Address); 3525 %} 3526 ins_pipe(pipe_slow); 3527 %} 3528 3529 instruct divD_imm(regD dst, immD con) %{ 3530 predicate((UseSSE>=2) && (UseAVX == 0)); 3531 match(Set dst (DivD dst con)); 3532 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3533 ins_cost(150); 3534 ins_encode %{ 3535 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3536 %} 3537 ins_pipe(pipe_slow); 3538 %} 3539 3540 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3541 predicate(UseAVX > 0); 3542 match(Set dst (DivD src1 src2)); 3543 3544 format %{ "vdivsd $dst, $src1, $src2" %} 3545 ins_cost(150); 3546 ins_encode %{ 3547 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3548 %} 3549 ins_pipe(pipe_slow); 3550 %} 3551 3552 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3553 predicate(UseAVX > 0); 3554 match(Set dst (DivD src1 (LoadD src2))); 3555 3556 format %{ "vdivsd $dst, $src1, $src2" %} 3557 ins_cost(150); 3558 ins_encode %{ 3559 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3560 %} 3561 ins_pipe(pipe_slow); 3562 %} 3563 3564 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3565 predicate(UseAVX > 0); 3566 match(Set dst (DivD src con)); 3567 3568 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3569 ins_cost(150); 3570 ins_encode %{ 3571 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3572 %} 3573 ins_pipe(pipe_slow); 3574 %} 3575 3576 instruct absF_reg(regF dst) %{ 3577 predicate((UseSSE>=1) && (UseAVX == 0)); 3578 match(Set dst (AbsF dst)); 3579 ins_cost(150); 3580 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3581 ins_encode %{ 3582 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3583 %} 3584 ins_pipe(pipe_slow); 3585 %} 3586 3587 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3588 predicate(UseAVX > 0); 3589 match(Set dst (AbsF src)); 3590 ins_cost(150); 3591 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3592 ins_encode %{ 3593 int vlen_enc = Assembler::AVX_128bit; 3594 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3595 ExternalAddress(float_signmask()), vlen_enc); 3596 %} 3597 ins_pipe(pipe_slow); 3598 %} 3599 3600 instruct absD_reg(regD dst) %{ 3601 predicate((UseSSE>=2) && (UseAVX == 0)); 3602 match(Set dst (AbsD dst)); 3603 ins_cost(150); 3604 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3605 "# abs double by sign masking" %} 3606 ins_encode %{ 3607 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3608 %} 3609 ins_pipe(pipe_slow); 3610 %} 3611 3612 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3613 predicate(UseAVX > 0); 3614 match(Set dst (AbsD src)); 3615 ins_cost(150); 3616 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3617 "# abs double by sign masking" %} 3618 ins_encode %{ 3619 int vlen_enc = Assembler::AVX_128bit; 3620 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3621 ExternalAddress(double_signmask()), vlen_enc); 3622 %} 3623 ins_pipe(pipe_slow); 3624 %} 3625 3626 instruct negF_reg(regF dst) %{ 3627 predicate((UseSSE>=1) && (UseAVX == 0)); 3628 match(Set dst (NegF dst)); 3629 ins_cost(150); 3630 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3631 ins_encode %{ 3632 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3633 %} 3634 ins_pipe(pipe_slow); 3635 %} 3636 3637 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3638 predicate(UseAVX > 0); 3639 match(Set dst (NegF src)); 3640 ins_cost(150); 3641 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3642 ins_encode %{ 3643 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3644 ExternalAddress(float_signflip())); 3645 %} 3646 ins_pipe(pipe_slow); 3647 %} 3648 3649 instruct negD_reg(regD dst) %{ 3650 predicate((UseSSE>=2) && (UseAVX == 0)); 3651 match(Set dst (NegD dst)); 3652 ins_cost(150); 3653 format %{ "xorpd $dst, [0x8000000000000000]\t" 3654 "# neg double by sign flipping" %} 3655 ins_encode %{ 3656 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3657 %} 3658 ins_pipe(pipe_slow); 3659 %} 3660 3661 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3662 predicate(UseAVX > 0); 3663 match(Set dst (NegD src)); 3664 ins_cost(150); 3665 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3666 "# neg double by sign flipping" %} 3667 ins_encode %{ 3668 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3669 ExternalAddress(double_signflip())); 3670 %} 3671 ins_pipe(pipe_slow); 3672 %} 3673 3674 // sqrtss instruction needs destination register to be pre initialized for best performance 3675 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3676 instruct sqrtF_reg(regF dst) %{ 3677 predicate(UseSSE>=1); 3678 match(Set dst (SqrtF dst)); 3679 format %{ "sqrtss $dst, $dst" %} 3680 ins_encode %{ 3681 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3682 %} 3683 ins_pipe(pipe_slow); 3684 %} 3685 3686 // sqrtsd instruction needs destination register to be pre initialized for best performance 3687 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3688 instruct sqrtD_reg(regD dst) %{ 3689 predicate(UseSSE>=2); 3690 match(Set dst (SqrtD dst)); 3691 format %{ "sqrtsd $dst, $dst" %} 3692 ins_encode %{ 3693 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3694 %} 3695 ins_pipe(pipe_slow); 3696 %} 3697 3698 instruct convF2HF_reg_reg(rRegI dst, regF src, regF tmp) %{ 3699 effect(TEMP tmp); 3700 match(Set dst (ConvF2HF src)); 3701 ins_cost(125); 3702 format %{ "vcvtps2ph $dst,$src \t using $tmp as TEMP"%} 3703 ins_encode %{ 3704 __ vcvtps2ph($tmp$$XMMRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3705 __ movdl($dst$$Register, $tmp$$XMMRegister); 3706 __ movswl($dst$$Register, $dst$$Register); 3707 %} 3708 ins_pipe( pipe_slow ); 3709 %} 3710 3711 instruct convF2HF_mem_reg(memory mem, regF src, kReg ktmp, rRegI rtmp) %{ 3712 predicate((UseAVX > 2) && VM_Version::supports_avx512vl()); 3713 effect(TEMP ktmp, TEMP rtmp); 3714 match(Set mem (StoreC mem (ConvF2HF src))); 3715 format %{ "evcvtps2ph $mem,$src \t using $ktmp and $rtmp as TEMP" %} 3716 ins_encode %{ 3717 __ movl($rtmp$$Register, 0x1); 3718 __ kmovwl($ktmp$$KRegister, $rtmp$$Register); 3719 __ evcvtps2ph($mem$$Address, $ktmp$$KRegister, $src$$XMMRegister, 0x04, Assembler::AVX_128bit); 3720 %} 3721 ins_pipe( pipe_slow ); 3722 %} 3723 3724 instruct vconvF2HF(vec dst, vec src) %{ 3725 match(Set dst (VectorCastF2HF src)); 3726 format %{ "vector_conv_F2HF $dst $src" %} 3727 ins_encode %{ 3728 int vlen_enc = vector_length_encoding(this, $src); 3729 __ vcvtps2ph($dst$$XMMRegister, $src$$XMMRegister, 0x04, vlen_enc); 3730 %} 3731 ins_pipe( pipe_slow ); 3732 %} 3733 3734 instruct vconvF2HF_mem_reg(memory mem, vec src) %{ 3735 match(Set mem (StoreVector mem (VectorCastF2HF src))); 3736 format %{ "vcvtps2ph $mem,$src" %} 3737 ins_encode %{ 3738 int vlen_enc = vector_length_encoding(this, $src); 3739 __ vcvtps2ph($mem$$Address, $src$$XMMRegister, 0x04, vlen_enc); 3740 %} 3741 ins_pipe( pipe_slow ); 3742 %} 3743 3744 instruct convHF2F_reg_reg(regF dst, rRegI src) %{ 3745 match(Set dst (ConvHF2F src)); 3746 format %{ "vcvtph2ps $dst,$src" %} 3747 ins_encode %{ 3748 __ movdl($dst$$XMMRegister, $src$$Register); 3749 __ vcvtph2ps($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 3750 %} 3751 ins_pipe( pipe_slow ); 3752 %} 3753 3754 instruct vconvHF2F_reg_mem(vec dst, memory mem) %{ 3755 match(Set dst (VectorCastHF2F (LoadVector mem))); 3756 format %{ "vcvtph2ps $dst,$mem" %} 3757 ins_encode %{ 3758 int vlen_enc = vector_length_encoding(this); 3759 __ vcvtph2ps($dst$$XMMRegister, $mem$$Address, vlen_enc); 3760 %} 3761 ins_pipe( pipe_slow ); 3762 %} 3763 3764 instruct vconvHF2F(vec dst, vec src) %{ 3765 match(Set dst (VectorCastHF2F src)); 3766 ins_cost(125); 3767 format %{ "vector_conv_HF2F $dst,$src" %} 3768 ins_encode %{ 3769 int vlen_enc = vector_length_encoding(this); 3770 __ vcvtph2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 3771 %} 3772 ins_pipe( pipe_slow ); 3773 %} 3774 3775 // ---------------------------------------- VectorReinterpret ------------------------------------ 3776 instruct reinterpret_mask(kReg dst) %{ 3777 predicate(n->bottom_type()->isa_vectmask() && 3778 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3779 match(Set dst (VectorReinterpret dst)); 3780 ins_cost(125); 3781 format %{ "vector_reinterpret $dst\t!" %} 3782 ins_encode %{ 3783 // empty 3784 %} 3785 ins_pipe( pipe_slow ); 3786 %} 3787 3788 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3789 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3790 n->bottom_type()->isa_vectmask() && 3791 n->in(1)->bottom_type()->isa_vectmask() && 3792 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3793 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3794 match(Set dst (VectorReinterpret src)); 3795 effect(TEMP xtmp); 3796 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3797 ins_encode %{ 3798 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3799 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3800 assert(src_sz == dst_sz , "src and dst size mismatch"); 3801 int vlen_enc = vector_length_encoding(src_sz); 3802 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3803 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3804 %} 3805 ins_pipe( pipe_slow ); 3806 %} 3807 3808 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3809 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3810 n->bottom_type()->isa_vectmask() && 3811 n->in(1)->bottom_type()->isa_vectmask() && 3812 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3813 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3814 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3815 match(Set dst (VectorReinterpret src)); 3816 effect(TEMP xtmp); 3817 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3818 ins_encode %{ 3819 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3820 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3821 assert(src_sz == dst_sz , "src and dst size mismatch"); 3822 int vlen_enc = vector_length_encoding(src_sz); 3823 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3824 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3825 %} 3826 ins_pipe( pipe_slow ); 3827 %} 3828 3829 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3830 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3831 n->bottom_type()->isa_vectmask() && 3832 n->in(1)->bottom_type()->isa_vectmask() && 3833 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3834 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3835 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3836 match(Set dst (VectorReinterpret src)); 3837 effect(TEMP xtmp); 3838 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3839 ins_encode %{ 3840 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3841 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3842 assert(src_sz == dst_sz , "src and dst size mismatch"); 3843 int vlen_enc = vector_length_encoding(src_sz); 3844 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3845 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3846 %} 3847 ins_pipe( pipe_slow ); 3848 %} 3849 3850 instruct reinterpret(vec dst) %{ 3851 predicate(!n->bottom_type()->isa_vectmask() && 3852 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3853 match(Set dst (VectorReinterpret dst)); 3854 ins_cost(125); 3855 format %{ "vector_reinterpret $dst\t!" %} 3856 ins_encode %{ 3857 // empty 3858 %} 3859 ins_pipe( pipe_slow ); 3860 %} 3861 3862 instruct reinterpret_expand(vec dst, vec src) %{ 3863 predicate(UseAVX == 0 && 3864 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3865 match(Set dst (VectorReinterpret src)); 3866 ins_cost(125); 3867 effect(TEMP dst); 3868 format %{ "vector_reinterpret_expand $dst,$src" %} 3869 ins_encode %{ 3870 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3871 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3872 3873 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3874 if (src_vlen_in_bytes == 4) { 3875 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), noreg); 3876 } else { 3877 assert(src_vlen_in_bytes == 8, ""); 3878 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), noreg); 3879 } 3880 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3881 %} 3882 ins_pipe( pipe_slow ); 3883 %} 3884 3885 instruct vreinterpret_expand4(legVec dst, vec src) %{ 3886 predicate(UseAVX > 0 && 3887 !n->bottom_type()->isa_vectmask() && 3888 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3889 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3890 match(Set dst (VectorReinterpret src)); 3891 ins_cost(125); 3892 format %{ "vector_reinterpret_expand $dst,$src" %} 3893 ins_encode %{ 3894 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, noreg); 3895 %} 3896 ins_pipe( pipe_slow ); 3897 %} 3898 3899 3900 instruct vreinterpret_expand(legVec dst, vec src) %{ 3901 predicate(UseAVX > 0 && 3902 !n->bottom_type()->isa_vectmask() && 3903 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3904 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3905 match(Set dst (VectorReinterpret src)); 3906 ins_cost(125); 3907 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3908 ins_encode %{ 3909 switch (Matcher::vector_length_in_bytes(this, $src)) { 3910 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3911 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3912 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3913 default: ShouldNotReachHere(); 3914 } 3915 %} 3916 ins_pipe( pipe_slow ); 3917 %} 3918 3919 instruct reinterpret_shrink(vec dst, legVec src) %{ 3920 predicate(!n->bottom_type()->isa_vectmask() && 3921 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3922 match(Set dst (VectorReinterpret src)); 3923 ins_cost(125); 3924 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3925 ins_encode %{ 3926 switch (Matcher::vector_length_in_bytes(this)) { 3927 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3928 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3929 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3930 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3931 default: ShouldNotReachHere(); 3932 } 3933 %} 3934 ins_pipe( pipe_slow ); 3935 %} 3936 3937 // ---------------------------------------------------------------------------------------------------- 3938 3939 #ifdef _LP64 3940 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3941 match(Set dst (RoundDoubleMode src rmode)); 3942 format %{ "roundsd $dst,$src" %} 3943 ins_cost(150); 3944 ins_encode %{ 3945 assert(UseSSE >= 4, "required"); 3946 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3947 %} 3948 ins_pipe(pipe_slow); 3949 %} 3950 3951 instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{ 3952 match(Set dst (RoundDoubleMode (LoadD src) rmode)); 3953 format %{ "roundsd $dst,$src" %} 3954 ins_cost(150); 3955 ins_encode %{ 3956 assert(UseSSE >= 4, "required"); 3957 __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant); 3958 %} 3959 ins_pipe(pipe_slow); 3960 %} 3961 3962 instruct roundD_imm(legRegD dst, immD con, immU8 rmode) %{ 3963 match(Set dst (RoundDoubleMode con rmode)); 3964 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3965 ins_cost(150); 3966 ins_encode %{ 3967 assert(UseSSE >= 4, "required"); 3968 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, noreg); 3969 %} 3970 ins_pipe(pipe_slow); 3971 %} 3972 3973 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3974 predicate(Matcher::vector_length(n) < 8); 3975 match(Set dst (RoundDoubleModeV src rmode)); 3976 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3977 ins_encode %{ 3978 assert(UseAVX > 0, "required"); 3979 int vlen_enc = vector_length_encoding(this); 3980 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3981 %} 3982 ins_pipe( pipe_slow ); 3983 %} 3984 3985 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3986 predicate(Matcher::vector_length(n) == 8); 3987 match(Set dst (RoundDoubleModeV src rmode)); 3988 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3989 ins_encode %{ 3990 assert(UseAVX > 2, "required"); 3991 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3992 %} 3993 ins_pipe( pipe_slow ); 3994 %} 3995 3996 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3997 predicate(Matcher::vector_length(n) < 8); 3998 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3999 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 4000 ins_encode %{ 4001 assert(UseAVX > 0, "required"); 4002 int vlen_enc = vector_length_encoding(this); 4003 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 4004 %} 4005 ins_pipe( pipe_slow ); 4006 %} 4007 4008 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 4009 predicate(Matcher::vector_length(n) == 8); 4010 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 4011 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 4012 ins_encode %{ 4013 assert(UseAVX > 2, "required"); 4014 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 4015 %} 4016 ins_pipe( pipe_slow ); 4017 %} 4018 #endif // _LP64 4019 4020 instruct onspinwait() %{ 4021 match(OnSpinWait); 4022 ins_cost(200); 4023 4024 format %{ 4025 $$template 4026 $$emit$$"pause\t! membar_onspinwait" 4027 %} 4028 ins_encode %{ 4029 __ pause(); 4030 %} 4031 ins_pipe(pipe_slow); 4032 %} 4033 4034 // a * b + c 4035 instruct fmaD_reg(regD a, regD b, regD c) %{ 4036 predicate(UseFMA); 4037 match(Set c (FmaD c (Binary a b))); 4038 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 4039 ins_cost(150); 4040 ins_encode %{ 4041 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4042 %} 4043 ins_pipe( pipe_slow ); 4044 %} 4045 4046 // a * b + c 4047 instruct fmaF_reg(regF a, regF b, regF c) %{ 4048 predicate(UseFMA); 4049 match(Set c (FmaF c (Binary a b))); 4050 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 4051 ins_cost(150); 4052 ins_encode %{ 4053 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 4054 %} 4055 ins_pipe( pipe_slow ); 4056 %} 4057 4058 // ====================VECTOR INSTRUCTIONS===================================== 4059 4060 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 4061 instruct MoveVec2Leg(legVec dst, vec src) %{ 4062 match(Set dst src); 4063 format %{ "" %} 4064 ins_encode %{ 4065 ShouldNotReachHere(); 4066 %} 4067 ins_pipe( fpu_reg_reg ); 4068 %} 4069 4070 instruct MoveLeg2Vec(vec dst, legVec src) %{ 4071 match(Set dst src); 4072 format %{ "" %} 4073 ins_encode %{ 4074 ShouldNotReachHere(); 4075 %} 4076 ins_pipe( fpu_reg_reg ); 4077 %} 4078 4079 // ============================================================================ 4080 4081 // Load vectors generic operand pattern 4082 instruct loadV(vec dst, memory mem) %{ 4083 match(Set dst (LoadVector mem)); 4084 ins_cost(125); 4085 format %{ "load_vector $dst,$mem" %} 4086 ins_encode %{ 4087 __ load_vector($dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 4088 %} 4089 ins_pipe( pipe_slow ); 4090 %} 4091 4092 // Store vectors generic operand pattern. 4093 instruct storeV(memory mem, vec src) %{ 4094 match(Set mem (StoreVector mem src)); 4095 ins_cost(145); 4096 format %{ "store_vector $mem,$src\n\t" %} 4097 ins_encode %{ 4098 switch (Matcher::vector_length_in_bytes(this, $src)) { 4099 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 4100 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 4101 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 4102 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 4103 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 4104 default: ShouldNotReachHere(); 4105 } 4106 %} 4107 ins_pipe( pipe_slow ); 4108 %} 4109 4110 // ---------------------------------------- Gather ------------------------------------ 4111 4112 // Gather INT, LONG, FLOAT, DOUBLE 4113 4114 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 4115 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 4116 match(Set dst (LoadVectorGather mem idx)); 4117 effect(TEMP dst, TEMP tmp, TEMP mask); 4118 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 4119 ins_encode %{ 4120 assert(UseAVX >= 2, "sanity"); 4121 4122 int vlen_enc = vector_length_encoding(this); 4123 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4124 4125 assert(Matcher::vector_length_in_bytes(this) >= 16, "sanity"); 4126 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4127 4128 if (vlen_enc == Assembler::AVX_128bit) { 4129 __ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), noreg); 4130 } else { 4131 __ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), noreg); 4132 } 4133 __ lea($tmp$$Register, $mem$$Address); 4134 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 4135 %} 4136 ins_pipe( pipe_slow ); 4137 %} 4138 4139 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 4140 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 4141 match(Set dst (LoadVectorGather mem idx)); 4142 effect(TEMP dst, TEMP tmp, TEMP ktmp); 4143 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 4144 ins_encode %{ 4145 assert(UseAVX > 2, "sanity"); 4146 4147 int vlen_enc = vector_length_encoding(this); 4148 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4149 4150 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4151 4152 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4153 __ lea($tmp$$Register, $mem$$Address); 4154 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4155 %} 4156 ins_pipe( pipe_slow ); 4157 %} 4158 4159 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4160 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 4161 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 4162 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 4163 ins_encode %{ 4164 assert(UseAVX > 2, "sanity"); 4165 int vlen_enc = vector_length_encoding(this); 4166 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4167 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4168 // Note: Since gather instruction partially updates the opmask register used 4169 // for predication hense moving mask operand to a temporary. 4170 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4171 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4172 __ lea($tmp$$Register, $mem$$Address); 4173 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 4174 %} 4175 ins_pipe( pipe_slow ); 4176 %} 4177 // ====================Scatter======================================= 4178 4179 // Scatter INT, LONG, FLOAT, DOUBLE 4180 4181 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 4182 predicate(UseAVX > 2); 4183 match(Set mem (StoreVectorScatter mem (Binary src idx))); 4184 effect(TEMP tmp, TEMP ktmp); 4185 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 4186 ins_encode %{ 4187 int vlen_enc = vector_length_encoding(this, $src); 4188 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4189 4190 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4191 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4192 4193 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), noreg); 4194 __ lea($tmp$$Register, $mem$$Address); 4195 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4196 %} 4197 ins_pipe( pipe_slow ); 4198 %} 4199 4200 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 4201 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 4202 effect(TEMP tmp, TEMP ktmp); 4203 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 4204 ins_encode %{ 4205 int vlen_enc = vector_length_encoding(this, $src); 4206 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 4207 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 4208 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 4209 // Note: Since scatter instruction partially updates the opmask register used 4210 // for predication hense moving mask operand to a temporary. 4211 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 4212 __ lea($tmp$$Register, $mem$$Address); 4213 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 4214 %} 4215 ins_pipe( pipe_slow ); 4216 %} 4217 4218 // ====================REPLICATE======================================= 4219 4220 // Replicate byte scalar to be vector 4221 instruct vReplB_reg(vec dst, rRegI src) %{ 4222 predicate(UseAVX >= 2); 4223 match(Set dst (ReplicateB src)); 4224 format %{ "replicateB $dst,$src" %} 4225 ins_encode %{ 4226 uint vlen = Matcher::vector_length(this); 4227 int vlen_enc = vector_length_encoding(this); 4228 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4229 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 4230 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 4231 } else { 4232 __ movdl($dst$$XMMRegister, $src$$Register); 4233 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4234 } 4235 %} 4236 ins_pipe( pipe_slow ); 4237 %} 4238 4239 instruct ReplB_reg(vec dst, rRegI src) %{ 4240 predicate(UseAVX < 2); 4241 match(Set dst (ReplicateB src)); 4242 format %{ "replicateB $dst,$src" %} 4243 ins_encode %{ 4244 uint vlen = Matcher::vector_length(this); 4245 __ movdl($dst$$XMMRegister, $src$$Register); 4246 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4247 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4248 if (vlen >= 16) { 4249 assert(vlen == 16, ""); 4250 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4251 } 4252 %} 4253 ins_pipe( pipe_slow ); 4254 %} 4255 4256 instruct ReplB_mem(vec dst, memory mem) %{ 4257 predicate(UseAVX >= 2); 4258 match(Set dst (ReplicateB (LoadB mem))); 4259 format %{ "replicateB $dst,$mem" %} 4260 ins_encode %{ 4261 int vlen_enc = vector_length_encoding(this); 4262 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4263 %} 4264 ins_pipe( pipe_slow ); 4265 %} 4266 4267 // ====================ReplicateS======================================= 4268 4269 instruct vReplS_reg(vec dst, rRegI src) %{ 4270 predicate(UseAVX >= 2); 4271 match(Set dst (ReplicateS src)); 4272 format %{ "replicateS $dst,$src" %} 4273 ins_encode %{ 4274 uint vlen = Matcher::vector_length(this); 4275 int vlen_enc = vector_length_encoding(this); 4276 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4277 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4278 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4279 } else { 4280 __ movdl($dst$$XMMRegister, $src$$Register); 4281 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4282 } 4283 %} 4284 ins_pipe( pipe_slow ); 4285 %} 4286 4287 instruct ReplS_reg(vec dst, rRegI src) %{ 4288 predicate(UseAVX < 2); 4289 match(Set dst (ReplicateS src)); 4290 format %{ "replicateS $dst,$src" %} 4291 ins_encode %{ 4292 uint vlen = Matcher::vector_length(this); 4293 int vlen_enc = vector_length_encoding(this); 4294 __ movdl($dst$$XMMRegister, $src$$Register); 4295 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4296 if (vlen >= 8) { 4297 assert(vlen == 8, ""); 4298 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4299 } 4300 %} 4301 ins_pipe( pipe_slow ); 4302 %} 4303 4304 instruct ReplS_mem(vec dst, memory mem) %{ 4305 predicate(UseAVX >= 2); 4306 match(Set dst (ReplicateS (LoadS mem))); 4307 format %{ "replicateS $dst,$mem" %} 4308 ins_encode %{ 4309 int vlen_enc = vector_length_encoding(this); 4310 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4311 %} 4312 ins_pipe( pipe_slow ); 4313 %} 4314 4315 // ====================ReplicateI======================================= 4316 4317 instruct ReplI_reg(vec dst, rRegI src) %{ 4318 match(Set dst (ReplicateI src)); 4319 format %{ "replicateI $dst,$src" %} 4320 ins_encode %{ 4321 uint vlen = Matcher::vector_length(this); 4322 int vlen_enc = vector_length_encoding(this); 4323 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4324 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4325 } else if (VM_Version::supports_avx2()) { 4326 __ movdl($dst$$XMMRegister, $src$$Register); 4327 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4328 } else { 4329 __ movdl($dst$$XMMRegister, $src$$Register); 4330 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4331 } 4332 %} 4333 ins_pipe( pipe_slow ); 4334 %} 4335 4336 instruct ReplI_mem(vec dst, memory mem) %{ 4337 match(Set dst (ReplicateI (LoadI mem))); 4338 format %{ "replicateI $dst,$mem" %} 4339 ins_encode %{ 4340 int vlen_enc = vector_length_encoding(this); 4341 if (VM_Version::supports_avx2()) { 4342 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4343 } else if (VM_Version::supports_avx()) { 4344 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4345 } else { 4346 __ movdl($dst$$XMMRegister, $mem$$Address); 4347 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4348 } 4349 %} 4350 ins_pipe( pipe_slow ); 4351 %} 4352 4353 instruct ReplI_imm(vec dst, immI con) %{ 4354 match(Set dst (ReplicateB con)); 4355 match(Set dst (ReplicateS con)); 4356 match(Set dst (ReplicateI con)); 4357 format %{ "replicateI $dst,$con" %} 4358 ins_encode %{ 4359 InternalAddress addr = $constantaddress(Matcher::vector_element_basic_type(this), 4360 vreplicate_imm(Matcher::vector_element_basic_type(this), $con$$constant, 4361 (VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 4 : 8) : 8) / 4362 type2aelembytes(Matcher::vector_element_basic_type(this)))); 4363 BasicType bt = Matcher::vector_element_basic_type(this); 4364 int vlen = Matcher::vector_length_in_bytes(this); 4365 __ load_constant_vector(bt, $dst$$XMMRegister, addr, vlen); 4366 %} 4367 ins_pipe( pipe_slow ); 4368 %} 4369 4370 // Replicate scalar zero to be vector 4371 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4372 match(Set dst (ReplicateB zero)); 4373 match(Set dst (ReplicateS zero)); 4374 match(Set dst (ReplicateI zero)); 4375 format %{ "replicateI $dst,$zero" %} 4376 ins_encode %{ 4377 int vlen_enc = vector_length_encoding(this); 4378 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4379 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4380 } else { 4381 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4382 } 4383 %} 4384 ins_pipe( fpu_reg_reg ); 4385 %} 4386 4387 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4388 predicate(UseSSE >= 2); 4389 match(Set dst (ReplicateB con)); 4390 match(Set dst (ReplicateS con)); 4391 match(Set dst (ReplicateI con)); 4392 format %{ "vallones $dst" %} 4393 ins_encode %{ 4394 int vector_len = vector_length_encoding(this); 4395 __ vallones($dst$$XMMRegister, vector_len); 4396 %} 4397 ins_pipe( pipe_slow ); 4398 %} 4399 4400 // ====================ReplicateL======================================= 4401 4402 #ifdef _LP64 4403 // Replicate long (8 byte) scalar to be vector 4404 instruct ReplL_reg(vec dst, rRegL src) %{ 4405 match(Set dst (ReplicateL src)); 4406 format %{ "replicateL $dst,$src" %} 4407 ins_encode %{ 4408 int vlen = Matcher::vector_length(this); 4409 int vlen_enc = vector_length_encoding(this); 4410 if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4411 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4412 } else if (VM_Version::supports_avx2()) { 4413 __ movdq($dst$$XMMRegister, $src$$Register); 4414 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4415 } else { 4416 __ movdq($dst$$XMMRegister, $src$$Register); 4417 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4418 } 4419 %} 4420 ins_pipe( pipe_slow ); 4421 %} 4422 #else // _LP64 4423 // Replicate long (8 byte) scalar to be vector 4424 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 4425 predicate(Matcher::vector_length(n) <= 4); 4426 match(Set dst (ReplicateL src)); 4427 effect(TEMP dst, USE src, TEMP tmp); 4428 format %{ "replicateL $dst,$src" %} 4429 ins_encode %{ 4430 uint vlen = Matcher::vector_length(this); 4431 if (vlen == 2) { 4432 __ movdl($dst$$XMMRegister, $src$$Register); 4433 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4434 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4435 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4436 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4437 int vlen_enc = Assembler::AVX_256bit; 4438 __ movdl($dst$$XMMRegister, $src$$Register); 4439 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4440 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4441 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4442 } else { 4443 __ movdl($dst$$XMMRegister, $src$$Register); 4444 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4445 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4446 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4447 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4448 } 4449 %} 4450 ins_pipe( pipe_slow ); 4451 %} 4452 4453 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 4454 predicate(Matcher::vector_length(n) == 8); 4455 match(Set dst (ReplicateL src)); 4456 effect(TEMP dst, USE src, TEMP tmp); 4457 format %{ "replicateL $dst,$src" %} 4458 ins_encode %{ 4459 if (VM_Version::supports_avx512vl()) { 4460 __ movdl($dst$$XMMRegister, $src$$Register); 4461 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4462 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4463 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4464 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4465 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4466 } else { 4467 int vlen_enc = Assembler::AVX_512bit; 4468 __ movdl($dst$$XMMRegister, $src$$Register); 4469 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4470 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4471 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4472 } 4473 %} 4474 ins_pipe( pipe_slow ); 4475 %} 4476 #endif // _LP64 4477 4478 instruct ReplL_mem(vec dst, memory mem) %{ 4479 match(Set dst (ReplicateL (LoadL mem))); 4480 format %{ "replicateL $dst,$mem" %} 4481 ins_encode %{ 4482 int vlen_enc = vector_length_encoding(this); 4483 if (VM_Version::supports_avx2()) { 4484 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4485 } else if (VM_Version::supports_sse3()) { 4486 __ movddup($dst$$XMMRegister, $mem$$Address); 4487 } else { 4488 __ movq($dst$$XMMRegister, $mem$$Address); 4489 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4490 } 4491 %} 4492 ins_pipe( pipe_slow ); 4493 %} 4494 4495 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4496 instruct ReplL_imm(vec dst, immL con) %{ 4497 match(Set dst (ReplicateL con)); 4498 format %{ "replicateL $dst,$con" %} 4499 ins_encode %{ 4500 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, 1)); 4501 int vlen = Matcher::vector_length_in_bytes(this); 4502 __ load_constant_vector(T_LONG, $dst$$XMMRegister, addr, vlen); 4503 %} 4504 ins_pipe( pipe_slow ); 4505 %} 4506 4507 instruct ReplL_zero(vec dst, immL0 zero) %{ 4508 match(Set dst (ReplicateL zero)); 4509 format %{ "replicateL $dst,$zero" %} 4510 ins_encode %{ 4511 int vlen_enc = vector_length_encoding(this); 4512 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vl()) { 4513 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4514 } else { 4515 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4516 } 4517 %} 4518 ins_pipe( fpu_reg_reg ); 4519 %} 4520 4521 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4522 predicate(UseSSE >= 2); 4523 match(Set dst (ReplicateL con)); 4524 format %{ "vallones $dst" %} 4525 ins_encode %{ 4526 int vector_len = vector_length_encoding(this); 4527 __ vallones($dst$$XMMRegister, vector_len); 4528 %} 4529 ins_pipe( pipe_slow ); 4530 %} 4531 4532 // ====================ReplicateF======================================= 4533 4534 instruct vReplF_reg(vec dst, vlRegF src) %{ 4535 predicate(UseAVX > 0); 4536 match(Set dst (ReplicateF src)); 4537 format %{ "replicateF $dst,$src" %} 4538 ins_encode %{ 4539 uint vlen = Matcher::vector_length(this); 4540 int vlen_enc = vector_length_encoding(this); 4541 if (vlen <= 4) { 4542 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4543 } else if (VM_Version::supports_avx2()) { 4544 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4545 } else { 4546 assert(vlen == 8, "sanity"); 4547 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 0x00, Assembler::AVX_128bit); 4548 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4549 } 4550 %} 4551 ins_pipe( pipe_slow ); 4552 %} 4553 4554 instruct ReplF_reg(vec dst, vlRegF src) %{ 4555 predicate(UseAVX == 0); 4556 match(Set dst (ReplicateF src)); 4557 format %{ "replicateF $dst,$src" %} 4558 ins_encode %{ 4559 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4560 %} 4561 ins_pipe( pipe_slow ); 4562 %} 4563 4564 instruct ReplF_mem(vec dst, memory mem) %{ 4565 predicate(UseAVX > 0); 4566 match(Set dst (ReplicateF (LoadF mem))); 4567 format %{ "replicateF $dst,$mem" %} 4568 ins_encode %{ 4569 int vlen_enc = vector_length_encoding(this); 4570 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4571 %} 4572 ins_pipe( pipe_slow ); 4573 %} 4574 4575 // Replicate float scalar immediate to be vector by loading from const table. 4576 instruct ReplF_imm(vec dst, immF con) %{ 4577 match(Set dst (ReplicateF con)); 4578 format %{ "replicateF $dst,$con" %} 4579 ins_encode %{ 4580 InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant, 4581 VM_Version::supports_sse3() ? (VM_Version::supports_avx() ? 1 : 2) : 2)); 4582 int vlen = Matcher::vector_length_in_bytes(this); 4583 __ load_constant_vector(T_FLOAT, $dst$$XMMRegister, addr, vlen); 4584 %} 4585 ins_pipe( pipe_slow ); 4586 %} 4587 4588 instruct ReplF_zero(vec dst, immF0 zero) %{ 4589 match(Set dst (ReplicateF zero)); 4590 format %{ "replicateF $dst,$zero" %} 4591 ins_encode %{ 4592 int vlen_enc = vector_length_encoding(this); 4593 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4594 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4595 } else { 4596 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4597 } 4598 %} 4599 ins_pipe( fpu_reg_reg ); 4600 %} 4601 4602 // ====================ReplicateD======================================= 4603 4604 // Replicate double (8 bytes) scalar to be vector 4605 instruct vReplD_reg(vec dst, vlRegD src) %{ 4606 predicate(UseSSE >= 3); 4607 match(Set dst (ReplicateD src)); 4608 format %{ "replicateD $dst,$src" %} 4609 ins_encode %{ 4610 uint vlen = Matcher::vector_length(this); 4611 int vlen_enc = vector_length_encoding(this); 4612 if (vlen <= 2) { 4613 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4614 } else if (VM_Version::supports_avx2()) { 4615 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4616 } else { 4617 assert(vlen == 4, "sanity"); 4618 __ movddup($dst$$XMMRegister, $src$$XMMRegister); 4619 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4620 } 4621 %} 4622 ins_pipe( pipe_slow ); 4623 %} 4624 4625 instruct ReplD_reg(vec dst, vlRegD src) %{ 4626 predicate(UseSSE < 3); 4627 match(Set dst (ReplicateD src)); 4628 format %{ "replicateD $dst,$src" %} 4629 ins_encode %{ 4630 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4631 %} 4632 ins_pipe( pipe_slow ); 4633 %} 4634 4635 instruct ReplD_mem(vec dst, memory mem) %{ 4636 predicate(UseSSE >= 3); 4637 match(Set dst (ReplicateD (LoadD mem))); 4638 format %{ "replicateD $dst,$mem" %} 4639 ins_encode %{ 4640 if (Matcher::vector_length(this) >= 4) { 4641 int vlen_enc = vector_length_encoding(this); 4642 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4643 } else { 4644 __ movddup($dst$$XMMRegister, $mem$$Address); 4645 } 4646 %} 4647 ins_pipe( pipe_slow ); 4648 %} 4649 4650 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4651 instruct ReplD_imm(vec dst, immD con) %{ 4652 match(Set dst (ReplicateD con)); 4653 format %{ "replicateD $dst,$con" %} 4654 ins_encode %{ 4655 InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, 1)); 4656 int vlen = Matcher::vector_length_in_bytes(this); 4657 __ load_constant_vector(T_DOUBLE, $dst$$XMMRegister, addr, vlen); 4658 %} 4659 ins_pipe( pipe_slow ); 4660 %} 4661 4662 instruct ReplD_zero(vec dst, immD0 zero) %{ 4663 match(Set dst (ReplicateD zero)); 4664 format %{ "replicateD $dst,$zero" %} 4665 ins_encode %{ 4666 int vlen_enc = vector_length_encoding(this); 4667 if (VM_Version::supports_evex() && !VM_Version::supports_avx512vldq()) { 4668 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4669 } else { 4670 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4671 } 4672 %} 4673 ins_pipe( fpu_reg_reg ); 4674 %} 4675 4676 // ====================VECTOR INSERT======================================= 4677 4678 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4679 predicate(Matcher::vector_length_in_bytes(n) < 32); 4680 match(Set dst (VectorInsert (Binary dst val) idx)); 4681 format %{ "vector_insert $dst,$val,$idx" %} 4682 ins_encode %{ 4683 assert(UseSSE >= 4, "required"); 4684 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4685 4686 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4687 4688 assert(is_integral_type(elem_bt), ""); 4689 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4690 4691 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4692 %} 4693 ins_pipe( pipe_slow ); 4694 %} 4695 4696 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4697 predicate(Matcher::vector_length_in_bytes(n) == 32); 4698 match(Set dst (VectorInsert (Binary src val) idx)); 4699 effect(TEMP vtmp); 4700 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4701 ins_encode %{ 4702 int vlen_enc = Assembler::AVX_256bit; 4703 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4704 int elem_per_lane = 16/type2aelembytes(elem_bt); 4705 int log2epr = log2(elem_per_lane); 4706 4707 assert(is_integral_type(elem_bt), "sanity"); 4708 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4709 4710 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4711 uint y_idx = ($idx$$constant >> log2epr) & 1; 4712 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4713 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4714 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4715 %} 4716 ins_pipe( pipe_slow ); 4717 %} 4718 4719 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4720 predicate(Matcher::vector_length_in_bytes(n) == 64); 4721 match(Set dst (VectorInsert (Binary src val) idx)); 4722 effect(TEMP vtmp); 4723 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4724 ins_encode %{ 4725 assert(UseAVX > 2, "sanity"); 4726 4727 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4728 int elem_per_lane = 16/type2aelembytes(elem_bt); 4729 int log2epr = log2(elem_per_lane); 4730 4731 assert(is_integral_type(elem_bt), ""); 4732 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4733 4734 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4735 uint y_idx = ($idx$$constant >> log2epr) & 3; 4736 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4737 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4738 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4739 %} 4740 ins_pipe( pipe_slow ); 4741 %} 4742 4743 #ifdef _LP64 4744 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4745 predicate(Matcher::vector_length(n) == 2); 4746 match(Set dst (VectorInsert (Binary dst val) idx)); 4747 format %{ "vector_insert $dst,$val,$idx" %} 4748 ins_encode %{ 4749 assert(UseSSE >= 4, "required"); 4750 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4751 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4752 4753 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4754 %} 4755 ins_pipe( pipe_slow ); 4756 %} 4757 4758 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4759 predicate(Matcher::vector_length(n) == 4); 4760 match(Set dst (VectorInsert (Binary src val) idx)); 4761 effect(TEMP vtmp); 4762 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4763 ins_encode %{ 4764 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4765 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4766 4767 uint x_idx = $idx$$constant & right_n_bits(1); 4768 uint y_idx = ($idx$$constant >> 1) & 1; 4769 int vlen_enc = Assembler::AVX_256bit; 4770 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4771 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4772 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4773 %} 4774 ins_pipe( pipe_slow ); 4775 %} 4776 4777 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4778 predicate(Matcher::vector_length(n) == 8); 4779 match(Set dst (VectorInsert (Binary src val) idx)); 4780 effect(TEMP vtmp); 4781 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4782 ins_encode %{ 4783 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4784 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4785 4786 uint x_idx = $idx$$constant & right_n_bits(1); 4787 uint y_idx = ($idx$$constant >> 1) & 3; 4788 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4789 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4790 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4791 %} 4792 ins_pipe( pipe_slow ); 4793 %} 4794 #endif 4795 4796 instruct insertF(vec dst, regF val, immU8 idx) %{ 4797 predicate(Matcher::vector_length(n) < 8); 4798 match(Set dst (VectorInsert (Binary dst val) idx)); 4799 format %{ "vector_insert $dst,$val,$idx" %} 4800 ins_encode %{ 4801 assert(UseSSE >= 4, "sanity"); 4802 4803 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4804 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4805 4806 uint x_idx = $idx$$constant & right_n_bits(2); 4807 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4808 %} 4809 ins_pipe( pipe_slow ); 4810 %} 4811 4812 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4813 predicate(Matcher::vector_length(n) >= 8); 4814 match(Set dst (VectorInsert (Binary src val) idx)); 4815 effect(TEMP vtmp); 4816 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4817 ins_encode %{ 4818 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4819 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4820 4821 int vlen = Matcher::vector_length(this); 4822 uint x_idx = $idx$$constant & right_n_bits(2); 4823 if (vlen == 8) { 4824 uint y_idx = ($idx$$constant >> 2) & 1; 4825 int vlen_enc = Assembler::AVX_256bit; 4826 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4827 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4828 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4829 } else { 4830 assert(vlen == 16, "sanity"); 4831 uint y_idx = ($idx$$constant >> 2) & 3; 4832 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4833 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4834 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4835 } 4836 %} 4837 ins_pipe( pipe_slow ); 4838 %} 4839 4840 #ifdef _LP64 4841 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4842 predicate(Matcher::vector_length(n) == 2); 4843 match(Set dst (VectorInsert (Binary dst val) idx)); 4844 effect(TEMP tmp); 4845 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4846 ins_encode %{ 4847 assert(UseSSE >= 4, "sanity"); 4848 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4849 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4850 4851 __ movq($tmp$$Register, $val$$XMMRegister); 4852 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4853 %} 4854 ins_pipe( pipe_slow ); 4855 %} 4856 4857 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4858 predicate(Matcher::vector_length(n) == 4); 4859 match(Set dst (VectorInsert (Binary src val) idx)); 4860 effect(TEMP vtmp, TEMP tmp); 4861 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4862 ins_encode %{ 4863 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4864 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4865 4866 uint x_idx = $idx$$constant & right_n_bits(1); 4867 uint y_idx = ($idx$$constant >> 1) & 1; 4868 int vlen_enc = Assembler::AVX_256bit; 4869 __ movq($tmp$$Register, $val$$XMMRegister); 4870 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4871 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4872 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4873 %} 4874 ins_pipe( pipe_slow ); 4875 %} 4876 4877 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4878 predicate(Matcher::vector_length(n) == 8); 4879 match(Set dst (VectorInsert (Binary src val) idx)); 4880 effect(TEMP tmp, TEMP vtmp); 4881 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4882 ins_encode %{ 4883 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4884 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4885 4886 uint x_idx = $idx$$constant & right_n_bits(1); 4887 uint y_idx = ($idx$$constant >> 1) & 3; 4888 __ movq($tmp$$Register, $val$$XMMRegister); 4889 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4890 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4891 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4892 %} 4893 ins_pipe( pipe_slow ); 4894 %} 4895 #endif 4896 4897 // ====================REDUCTION ARITHMETIC======================================= 4898 4899 // =======================Int Reduction========================================== 4900 4901 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4902 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 4903 match(Set dst (AddReductionVI src1 src2)); 4904 match(Set dst (MulReductionVI src1 src2)); 4905 match(Set dst (AndReductionV src1 src2)); 4906 match(Set dst ( OrReductionV src1 src2)); 4907 match(Set dst (XorReductionV src1 src2)); 4908 match(Set dst (MinReductionV src1 src2)); 4909 match(Set dst (MaxReductionV src1 src2)); 4910 effect(TEMP vtmp1, TEMP vtmp2); 4911 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4912 ins_encode %{ 4913 int opcode = this->ideal_Opcode(); 4914 int vlen = Matcher::vector_length(this, $src2); 4915 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4916 %} 4917 ins_pipe( pipe_slow ); 4918 %} 4919 4920 // =======================Long Reduction========================================== 4921 4922 #ifdef _LP64 4923 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4924 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 4925 match(Set dst (AddReductionVL src1 src2)); 4926 match(Set dst (MulReductionVL src1 src2)); 4927 match(Set dst (AndReductionV src1 src2)); 4928 match(Set dst ( OrReductionV src1 src2)); 4929 match(Set dst (XorReductionV src1 src2)); 4930 match(Set dst (MinReductionV src1 src2)); 4931 match(Set dst (MaxReductionV src1 src2)); 4932 effect(TEMP vtmp1, TEMP vtmp2); 4933 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4934 ins_encode %{ 4935 int opcode = this->ideal_Opcode(); 4936 int vlen = Matcher::vector_length(this, $src2); 4937 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4938 %} 4939 ins_pipe( pipe_slow ); 4940 %} 4941 4942 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 4943 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 4944 match(Set dst (AddReductionVL src1 src2)); 4945 match(Set dst (MulReductionVL src1 src2)); 4946 match(Set dst (AndReductionV src1 src2)); 4947 match(Set dst ( OrReductionV src1 src2)); 4948 match(Set dst (XorReductionV src1 src2)); 4949 match(Set dst (MinReductionV src1 src2)); 4950 match(Set dst (MaxReductionV src1 src2)); 4951 effect(TEMP vtmp1, TEMP vtmp2); 4952 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4953 ins_encode %{ 4954 int opcode = this->ideal_Opcode(); 4955 int vlen = Matcher::vector_length(this, $src2); 4956 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4957 %} 4958 ins_pipe( pipe_slow ); 4959 %} 4960 #endif // _LP64 4961 4962 // =======================Float Reduction========================================== 4963 4964 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 4965 predicate(Matcher::vector_length(n->in(2)) <= 4); // src 4966 match(Set dst (AddReductionVF dst src)); 4967 match(Set dst (MulReductionVF dst src)); 4968 effect(TEMP dst, TEMP vtmp); 4969 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 4970 ins_encode %{ 4971 int opcode = this->ideal_Opcode(); 4972 int vlen = Matcher::vector_length(this, $src); 4973 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4974 %} 4975 ins_pipe( pipe_slow ); 4976 %} 4977 4978 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 4979 predicate(Matcher::vector_length(n->in(2)) == 8); // src 4980 match(Set dst (AddReductionVF dst src)); 4981 match(Set dst (MulReductionVF dst src)); 4982 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4983 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4984 ins_encode %{ 4985 int opcode = this->ideal_Opcode(); 4986 int vlen = Matcher::vector_length(this, $src); 4987 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4988 %} 4989 ins_pipe( pipe_slow ); 4990 %} 4991 4992 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4993 predicate(Matcher::vector_length(n->in(2)) == 16); // src 4994 match(Set dst (AddReductionVF dst src)); 4995 match(Set dst (MulReductionVF dst src)); 4996 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4997 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4998 ins_encode %{ 4999 int opcode = this->ideal_Opcode(); 5000 int vlen = Matcher::vector_length(this, $src); 5001 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5002 %} 5003 ins_pipe( pipe_slow ); 5004 %} 5005 5006 // =======================Double Reduction========================================== 5007 5008 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 5009 predicate(Matcher::vector_length(n->in(2)) == 2); // src 5010 match(Set dst (AddReductionVD dst src)); 5011 match(Set dst (MulReductionVD dst src)); 5012 effect(TEMP dst, TEMP vtmp); 5013 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 5014 ins_encode %{ 5015 int opcode = this->ideal_Opcode(); 5016 int vlen = Matcher::vector_length(this, $src); 5017 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 5018 %} 5019 ins_pipe( pipe_slow ); 5020 %} 5021 5022 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 5023 predicate(Matcher::vector_length(n->in(2)) == 4); // src 5024 match(Set dst (AddReductionVD dst src)); 5025 match(Set dst (MulReductionVD dst src)); 5026 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5027 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5028 ins_encode %{ 5029 int opcode = this->ideal_Opcode(); 5030 int vlen = Matcher::vector_length(this, $src); 5031 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5032 %} 5033 ins_pipe( pipe_slow ); 5034 %} 5035 5036 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 5037 predicate(Matcher::vector_length(n->in(2)) == 8); // src 5038 match(Set dst (AddReductionVD dst src)); 5039 match(Set dst (MulReductionVD dst src)); 5040 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5041 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 5042 ins_encode %{ 5043 int opcode = this->ideal_Opcode(); 5044 int vlen = Matcher::vector_length(this, $src); 5045 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5046 %} 5047 ins_pipe( pipe_slow ); 5048 %} 5049 5050 // =======================Byte Reduction========================================== 5051 5052 #ifdef _LP64 5053 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5054 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 5055 match(Set dst (AddReductionVI src1 src2)); 5056 match(Set dst (AndReductionV src1 src2)); 5057 match(Set dst ( OrReductionV src1 src2)); 5058 match(Set dst (XorReductionV src1 src2)); 5059 match(Set dst (MinReductionV src1 src2)); 5060 match(Set dst (MaxReductionV src1 src2)); 5061 effect(TEMP vtmp1, TEMP vtmp2); 5062 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5063 ins_encode %{ 5064 int opcode = this->ideal_Opcode(); 5065 int vlen = Matcher::vector_length(this, $src2); 5066 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5067 %} 5068 ins_pipe( pipe_slow ); 5069 %} 5070 5071 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5072 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 5073 match(Set dst (AddReductionVI src1 src2)); 5074 match(Set dst (AndReductionV src1 src2)); 5075 match(Set dst ( OrReductionV src1 src2)); 5076 match(Set dst (XorReductionV src1 src2)); 5077 match(Set dst (MinReductionV src1 src2)); 5078 match(Set dst (MaxReductionV src1 src2)); 5079 effect(TEMP vtmp1, TEMP vtmp2); 5080 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5081 ins_encode %{ 5082 int opcode = this->ideal_Opcode(); 5083 int vlen = Matcher::vector_length(this, $src2); 5084 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5085 %} 5086 ins_pipe( pipe_slow ); 5087 %} 5088 #endif 5089 5090 // =======================Short Reduction========================================== 5091 5092 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5093 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 5094 match(Set dst (AddReductionVI src1 src2)); 5095 match(Set dst (MulReductionVI src1 src2)); 5096 match(Set dst (AndReductionV src1 src2)); 5097 match(Set dst ( OrReductionV src1 src2)); 5098 match(Set dst (XorReductionV src1 src2)); 5099 match(Set dst (MinReductionV src1 src2)); 5100 match(Set dst (MaxReductionV src1 src2)); 5101 effect(TEMP vtmp1, TEMP vtmp2); 5102 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 5103 ins_encode %{ 5104 int opcode = this->ideal_Opcode(); 5105 int vlen = Matcher::vector_length(this, $src2); 5106 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5107 %} 5108 ins_pipe( pipe_slow ); 5109 %} 5110 5111 // =======================Mul Reduction========================================== 5112 5113 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 5114 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5115 Matcher::vector_length(n->in(2)) <= 32); // src2 5116 match(Set dst (MulReductionVI src1 src2)); 5117 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5118 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5119 ins_encode %{ 5120 int opcode = this->ideal_Opcode(); 5121 int vlen = Matcher::vector_length(this, $src2); 5122 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5123 %} 5124 ins_pipe( pipe_slow ); 5125 %} 5126 5127 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 5128 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 5129 Matcher::vector_length(n->in(2)) == 64); // src2 5130 match(Set dst (MulReductionVI src1 src2)); 5131 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 5132 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 5133 ins_encode %{ 5134 int opcode = this->ideal_Opcode(); 5135 int vlen = Matcher::vector_length(this, $src2); 5136 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 5137 %} 5138 ins_pipe( pipe_slow ); 5139 %} 5140 5141 //--------------------Min/Max Float Reduction -------------------- 5142 // Float Min Reduction 5143 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 5144 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5145 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5146 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5147 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5148 Matcher::vector_length(n->in(2)) == 2); 5149 match(Set dst (MinReductionV src1 src2)); 5150 match(Set dst (MaxReductionV src1 src2)); 5151 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5152 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5153 ins_encode %{ 5154 assert(UseAVX > 0, "sanity"); 5155 5156 int opcode = this->ideal_Opcode(); 5157 int vlen = Matcher::vector_length(this, $src2); 5158 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5159 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5160 %} 5161 ins_pipe( pipe_slow ); 5162 %} 5163 5164 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 5165 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5166 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5167 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 5168 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 5169 Matcher::vector_length(n->in(2)) >= 4); 5170 match(Set dst (MinReductionV src1 src2)); 5171 match(Set dst (MaxReductionV src1 src2)); 5172 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5173 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5174 ins_encode %{ 5175 assert(UseAVX > 0, "sanity"); 5176 5177 int opcode = this->ideal_Opcode(); 5178 int vlen = Matcher::vector_length(this, $src2); 5179 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 5180 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5181 %} 5182 ins_pipe( pipe_slow ); 5183 %} 5184 5185 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 5186 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 5187 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5188 Matcher::vector_length(n->in(2)) == 2); 5189 match(Set dst (MinReductionV dst src)); 5190 match(Set dst (MaxReductionV dst src)); 5191 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 5192 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 5193 ins_encode %{ 5194 assert(UseAVX > 0, "sanity"); 5195 5196 int opcode = this->ideal_Opcode(); 5197 int vlen = Matcher::vector_length(this, $src); 5198 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5199 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 5200 %} 5201 ins_pipe( pipe_slow ); 5202 %} 5203 5204 5205 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 5206 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 5207 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 5208 Matcher::vector_length(n->in(2)) >= 4); 5209 match(Set dst (MinReductionV dst src)); 5210 match(Set dst (MaxReductionV dst src)); 5211 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 5212 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 5213 ins_encode %{ 5214 assert(UseAVX > 0, "sanity"); 5215 5216 int opcode = this->ideal_Opcode(); 5217 int vlen = Matcher::vector_length(this, $src); 5218 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 5219 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 5220 %} 5221 ins_pipe( pipe_slow ); 5222 %} 5223 5224 5225 //--------------------Min Double Reduction -------------------- 5226 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 5227 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5228 rFlagsReg cr) %{ 5229 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5230 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5231 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5232 Matcher::vector_length(n->in(2)) == 2); 5233 match(Set dst (MinReductionV src1 src2)); 5234 match(Set dst (MaxReductionV src1 src2)); 5235 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5236 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5237 ins_encode %{ 5238 assert(UseAVX > 0, "sanity"); 5239 5240 int opcode = this->ideal_Opcode(); 5241 int vlen = Matcher::vector_length(this, $src2); 5242 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5243 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5244 %} 5245 ins_pipe( pipe_slow ); 5246 %} 5247 5248 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5249 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5250 rFlagsReg cr) %{ 5251 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5252 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5253 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5254 Matcher::vector_length(n->in(2)) >= 4); 5255 match(Set dst (MinReductionV src1 src2)); 5256 match(Set dst (MaxReductionV src1 src2)); 5257 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5258 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5259 ins_encode %{ 5260 assert(UseAVX > 0, "sanity"); 5261 5262 int opcode = this->ideal_Opcode(); 5263 int vlen = Matcher::vector_length(this, $src2); 5264 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5265 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5266 %} 5267 ins_pipe( pipe_slow ); 5268 %} 5269 5270 5271 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5272 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5273 rFlagsReg cr) %{ 5274 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5275 Matcher::vector_length(n->in(2)) == 2); 5276 match(Set dst (MinReductionV dst src)); 5277 match(Set dst (MaxReductionV dst src)); 5278 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5279 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5280 ins_encode %{ 5281 assert(UseAVX > 0, "sanity"); 5282 5283 int opcode = this->ideal_Opcode(); 5284 int vlen = Matcher::vector_length(this, $src); 5285 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5286 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5287 %} 5288 ins_pipe( pipe_slow ); 5289 %} 5290 5291 instruct minmax_reductionD_av(legRegD dst, legVec src, 5292 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5293 rFlagsReg cr) %{ 5294 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5295 Matcher::vector_length(n->in(2)) >= 4); 5296 match(Set dst (MinReductionV dst src)); 5297 match(Set dst (MaxReductionV dst src)); 5298 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5299 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5300 ins_encode %{ 5301 assert(UseAVX > 0, "sanity"); 5302 5303 int opcode = this->ideal_Opcode(); 5304 int vlen = Matcher::vector_length(this, $src); 5305 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5306 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5307 %} 5308 ins_pipe( pipe_slow ); 5309 %} 5310 5311 // ====================VECTOR ARITHMETIC======================================= 5312 5313 // --------------------------------- ADD -------------------------------------- 5314 5315 // Bytes vector add 5316 instruct vaddB(vec dst, vec src) %{ 5317 predicate(UseAVX == 0); 5318 match(Set dst (AddVB dst src)); 5319 format %{ "paddb $dst,$src\t! add packedB" %} 5320 ins_encode %{ 5321 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5322 %} 5323 ins_pipe( pipe_slow ); 5324 %} 5325 5326 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5327 predicate(UseAVX > 0); 5328 match(Set dst (AddVB src1 src2)); 5329 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5330 ins_encode %{ 5331 int vlen_enc = vector_length_encoding(this); 5332 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5333 %} 5334 ins_pipe( pipe_slow ); 5335 %} 5336 5337 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5338 predicate((UseAVX > 0) && 5339 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5340 match(Set dst (AddVB src (LoadVector mem))); 5341 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5342 ins_encode %{ 5343 int vlen_enc = vector_length_encoding(this); 5344 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5345 %} 5346 ins_pipe( pipe_slow ); 5347 %} 5348 5349 // Shorts/Chars vector add 5350 instruct vaddS(vec dst, vec src) %{ 5351 predicate(UseAVX == 0); 5352 match(Set dst (AddVS dst src)); 5353 format %{ "paddw $dst,$src\t! add packedS" %} 5354 ins_encode %{ 5355 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5356 %} 5357 ins_pipe( pipe_slow ); 5358 %} 5359 5360 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5361 predicate(UseAVX > 0); 5362 match(Set dst (AddVS src1 src2)); 5363 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5364 ins_encode %{ 5365 int vlen_enc = vector_length_encoding(this); 5366 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5367 %} 5368 ins_pipe( pipe_slow ); 5369 %} 5370 5371 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5372 predicate((UseAVX > 0) && 5373 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5374 match(Set dst (AddVS src (LoadVector mem))); 5375 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5376 ins_encode %{ 5377 int vlen_enc = vector_length_encoding(this); 5378 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5379 %} 5380 ins_pipe( pipe_slow ); 5381 %} 5382 5383 // Integers vector add 5384 instruct vaddI(vec dst, vec src) %{ 5385 predicate(UseAVX == 0); 5386 match(Set dst (AddVI dst src)); 5387 format %{ "paddd $dst,$src\t! add packedI" %} 5388 ins_encode %{ 5389 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5390 %} 5391 ins_pipe( pipe_slow ); 5392 %} 5393 5394 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5395 predicate(UseAVX > 0); 5396 match(Set dst (AddVI src1 src2)); 5397 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5398 ins_encode %{ 5399 int vlen_enc = vector_length_encoding(this); 5400 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5401 %} 5402 ins_pipe( pipe_slow ); 5403 %} 5404 5405 5406 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5407 predicate((UseAVX > 0) && 5408 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5409 match(Set dst (AddVI src (LoadVector mem))); 5410 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5411 ins_encode %{ 5412 int vlen_enc = vector_length_encoding(this); 5413 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5414 %} 5415 ins_pipe( pipe_slow ); 5416 %} 5417 5418 // Longs vector add 5419 instruct vaddL(vec dst, vec src) %{ 5420 predicate(UseAVX == 0); 5421 match(Set dst (AddVL dst src)); 5422 format %{ "paddq $dst,$src\t! add packedL" %} 5423 ins_encode %{ 5424 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5425 %} 5426 ins_pipe( pipe_slow ); 5427 %} 5428 5429 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5430 predicate(UseAVX > 0); 5431 match(Set dst (AddVL src1 src2)); 5432 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5433 ins_encode %{ 5434 int vlen_enc = vector_length_encoding(this); 5435 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5436 %} 5437 ins_pipe( pipe_slow ); 5438 %} 5439 5440 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5441 predicate((UseAVX > 0) && 5442 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5443 match(Set dst (AddVL src (LoadVector mem))); 5444 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5445 ins_encode %{ 5446 int vlen_enc = vector_length_encoding(this); 5447 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5448 %} 5449 ins_pipe( pipe_slow ); 5450 %} 5451 5452 // Floats vector add 5453 instruct vaddF(vec dst, vec src) %{ 5454 predicate(UseAVX == 0); 5455 match(Set dst (AddVF dst src)); 5456 format %{ "addps $dst,$src\t! add packedF" %} 5457 ins_encode %{ 5458 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5459 %} 5460 ins_pipe( pipe_slow ); 5461 %} 5462 5463 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5464 predicate(UseAVX > 0); 5465 match(Set dst (AddVF src1 src2)); 5466 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5467 ins_encode %{ 5468 int vlen_enc = vector_length_encoding(this); 5469 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5470 %} 5471 ins_pipe( pipe_slow ); 5472 %} 5473 5474 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5475 predicate((UseAVX > 0) && 5476 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5477 match(Set dst (AddVF src (LoadVector mem))); 5478 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5479 ins_encode %{ 5480 int vlen_enc = vector_length_encoding(this); 5481 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5482 %} 5483 ins_pipe( pipe_slow ); 5484 %} 5485 5486 // Doubles vector add 5487 instruct vaddD(vec dst, vec src) %{ 5488 predicate(UseAVX == 0); 5489 match(Set dst (AddVD dst src)); 5490 format %{ "addpd $dst,$src\t! add packedD" %} 5491 ins_encode %{ 5492 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5493 %} 5494 ins_pipe( pipe_slow ); 5495 %} 5496 5497 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5498 predicate(UseAVX > 0); 5499 match(Set dst (AddVD src1 src2)); 5500 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5501 ins_encode %{ 5502 int vlen_enc = vector_length_encoding(this); 5503 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5504 %} 5505 ins_pipe( pipe_slow ); 5506 %} 5507 5508 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5509 predicate((UseAVX > 0) && 5510 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5511 match(Set dst (AddVD src (LoadVector mem))); 5512 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5513 ins_encode %{ 5514 int vlen_enc = vector_length_encoding(this); 5515 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5516 %} 5517 ins_pipe( pipe_slow ); 5518 %} 5519 5520 // --------------------------------- SUB -------------------------------------- 5521 5522 // Bytes vector sub 5523 instruct vsubB(vec dst, vec src) %{ 5524 predicate(UseAVX == 0); 5525 match(Set dst (SubVB dst src)); 5526 format %{ "psubb $dst,$src\t! sub packedB" %} 5527 ins_encode %{ 5528 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5529 %} 5530 ins_pipe( pipe_slow ); 5531 %} 5532 5533 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5534 predicate(UseAVX > 0); 5535 match(Set dst (SubVB src1 src2)); 5536 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5537 ins_encode %{ 5538 int vlen_enc = vector_length_encoding(this); 5539 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5540 %} 5541 ins_pipe( pipe_slow ); 5542 %} 5543 5544 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5545 predicate((UseAVX > 0) && 5546 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5547 match(Set dst (SubVB src (LoadVector mem))); 5548 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5549 ins_encode %{ 5550 int vlen_enc = vector_length_encoding(this); 5551 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5552 %} 5553 ins_pipe( pipe_slow ); 5554 %} 5555 5556 // Shorts/Chars vector sub 5557 instruct vsubS(vec dst, vec src) %{ 5558 predicate(UseAVX == 0); 5559 match(Set dst (SubVS dst src)); 5560 format %{ "psubw $dst,$src\t! sub packedS" %} 5561 ins_encode %{ 5562 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5563 %} 5564 ins_pipe( pipe_slow ); 5565 %} 5566 5567 5568 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5569 predicate(UseAVX > 0); 5570 match(Set dst (SubVS src1 src2)); 5571 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5572 ins_encode %{ 5573 int vlen_enc = vector_length_encoding(this); 5574 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5575 %} 5576 ins_pipe( pipe_slow ); 5577 %} 5578 5579 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5580 predicate((UseAVX > 0) && 5581 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5582 match(Set dst (SubVS src (LoadVector mem))); 5583 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5584 ins_encode %{ 5585 int vlen_enc = vector_length_encoding(this); 5586 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5587 %} 5588 ins_pipe( pipe_slow ); 5589 %} 5590 5591 // Integers vector sub 5592 instruct vsubI(vec dst, vec src) %{ 5593 predicate(UseAVX == 0); 5594 match(Set dst (SubVI dst src)); 5595 format %{ "psubd $dst,$src\t! sub packedI" %} 5596 ins_encode %{ 5597 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5598 %} 5599 ins_pipe( pipe_slow ); 5600 %} 5601 5602 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5603 predicate(UseAVX > 0); 5604 match(Set dst (SubVI src1 src2)); 5605 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5606 ins_encode %{ 5607 int vlen_enc = vector_length_encoding(this); 5608 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5609 %} 5610 ins_pipe( pipe_slow ); 5611 %} 5612 5613 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5614 predicate((UseAVX > 0) && 5615 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5616 match(Set dst (SubVI src (LoadVector mem))); 5617 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5618 ins_encode %{ 5619 int vlen_enc = vector_length_encoding(this); 5620 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5621 %} 5622 ins_pipe( pipe_slow ); 5623 %} 5624 5625 // Longs vector sub 5626 instruct vsubL(vec dst, vec src) %{ 5627 predicate(UseAVX == 0); 5628 match(Set dst (SubVL dst src)); 5629 format %{ "psubq $dst,$src\t! sub packedL" %} 5630 ins_encode %{ 5631 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5632 %} 5633 ins_pipe( pipe_slow ); 5634 %} 5635 5636 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5637 predicate(UseAVX > 0); 5638 match(Set dst (SubVL src1 src2)); 5639 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5640 ins_encode %{ 5641 int vlen_enc = vector_length_encoding(this); 5642 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5643 %} 5644 ins_pipe( pipe_slow ); 5645 %} 5646 5647 5648 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5649 predicate((UseAVX > 0) && 5650 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5651 match(Set dst (SubVL src (LoadVector mem))); 5652 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5653 ins_encode %{ 5654 int vlen_enc = vector_length_encoding(this); 5655 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5656 %} 5657 ins_pipe( pipe_slow ); 5658 %} 5659 5660 // Floats vector sub 5661 instruct vsubF(vec dst, vec src) %{ 5662 predicate(UseAVX == 0); 5663 match(Set dst (SubVF dst src)); 5664 format %{ "subps $dst,$src\t! sub packedF" %} 5665 ins_encode %{ 5666 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5667 %} 5668 ins_pipe( pipe_slow ); 5669 %} 5670 5671 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5672 predicate(UseAVX > 0); 5673 match(Set dst (SubVF src1 src2)); 5674 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5675 ins_encode %{ 5676 int vlen_enc = vector_length_encoding(this); 5677 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5678 %} 5679 ins_pipe( pipe_slow ); 5680 %} 5681 5682 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5683 predicate((UseAVX > 0) && 5684 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5685 match(Set dst (SubVF src (LoadVector mem))); 5686 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5687 ins_encode %{ 5688 int vlen_enc = vector_length_encoding(this); 5689 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5690 %} 5691 ins_pipe( pipe_slow ); 5692 %} 5693 5694 // Doubles vector sub 5695 instruct vsubD(vec dst, vec src) %{ 5696 predicate(UseAVX == 0); 5697 match(Set dst (SubVD dst src)); 5698 format %{ "subpd $dst,$src\t! sub packedD" %} 5699 ins_encode %{ 5700 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5701 %} 5702 ins_pipe( pipe_slow ); 5703 %} 5704 5705 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5706 predicate(UseAVX > 0); 5707 match(Set dst (SubVD src1 src2)); 5708 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5709 ins_encode %{ 5710 int vlen_enc = vector_length_encoding(this); 5711 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5712 %} 5713 ins_pipe( pipe_slow ); 5714 %} 5715 5716 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 5717 predicate((UseAVX > 0) && 5718 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5719 match(Set dst (SubVD src (LoadVector mem))); 5720 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 5721 ins_encode %{ 5722 int vlen_enc = vector_length_encoding(this); 5723 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5724 %} 5725 ins_pipe( pipe_slow ); 5726 %} 5727 5728 // --------------------------------- MUL -------------------------------------- 5729 5730 // Byte vector mul 5731 instruct vmul8B(vec dst, vec src1, vec src2, vec xtmp) %{ 5732 predicate(Matcher::vector_length_in_bytes(n) <= 8); 5733 match(Set dst (MulVB src1 src2)); 5734 effect(TEMP dst, TEMP xtmp); 5735 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5736 ins_encode %{ 5737 assert(UseSSE > 3, "required"); 5738 __ pmovsxbw($dst$$XMMRegister, $src1$$XMMRegister); 5739 __ pmovsxbw($xtmp$$XMMRegister, $src2$$XMMRegister); 5740 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5741 __ psllw($dst$$XMMRegister, 8); 5742 __ psrlw($dst$$XMMRegister, 8); 5743 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5744 %} 5745 ins_pipe( pipe_slow ); 5746 %} 5747 5748 instruct vmulB(vec dst, vec src1, vec src2, vec xtmp) %{ 5749 predicate(UseAVX == 0 && Matcher::vector_length_in_bytes(n) > 8); 5750 match(Set dst (MulVB src1 src2)); 5751 effect(TEMP dst, TEMP xtmp); 5752 format %{ "mulVB $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5753 ins_encode %{ 5754 assert(UseSSE > 3, "required"); 5755 // Odd-index elements 5756 __ movdqu($dst$$XMMRegister, $src1$$XMMRegister); 5757 __ psrlw($dst$$XMMRegister, 8); 5758 __ movdqu($xtmp$$XMMRegister, $src2$$XMMRegister); 5759 __ psrlw($xtmp$$XMMRegister, 8); 5760 __ pmullw($dst$$XMMRegister, $xtmp$$XMMRegister); 5761 __ psllw($dst$$XMMRegister, 8); 5762 // Even-index elements 5763 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 5764 __ pmullw($xtmp$$XMMRegister, $src2$$XMMRegister); 5765 __ psllw($xtmp$$XMMRegister, 8); 5766 __ psrlw($xtmp$$XMMRegister, 8); 5767 // Combine 5768 __ por($dst$$XMMRegister, $xtmp$$XMMRegister); 5769 %} 5770 ins_pipe( pipe_slow ); 5771 %} 5772 5773 instruct vmulB_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 5774 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) > 8); 5775 match(Set dst (MulVB src1 src2)); 5776 effect(TEMP xtmp1, TEMP xtmp2); 5777 format %{ "vmulVB $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 5778 ins_encode %{ 5779 int vlen_enc = vector_length_encoding(this); 5780 // Odd-index elements 5781 __ vpsrlw($xtmp2$$XMMRegister, $src1$$XMMRegister, 8, vlen_enc); 5782 __ vpsrlw($xtmp1$$XMMRegister, $src2$$XMMRegister, 8, vlen_enc); 5783 __ vpmullw($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5784 __ vpsllw($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 8, vlen_enc); 5785 // Even-index elements 5786 __ vpmullw($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5787 __ vpsllw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5788 __ vpsrlw($xtmp1$$XMMRegister, $xtmp1$$XMMRegister, 8, vlen_enc); 5789 // Combine 5790 __ vpor($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5791 %} 5792 ins_pipe( pipe_slow ); 5793 %} 5794 5795 // Shorts/Chars vector mul 5796 instruct vmulS(vec dst, vec src) %{ 5797 predicate(UseAVX == 0); 5798 match(Set dst (MulVS dst src)); 5799 format %{ "pmullw $dst,$src\t! mul packedS" %} 5800 ins_encode %{ 5801 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 5802 %} 5803 ins_pipe( pipe_slow ); 5804 %} 5805 5806 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 5807 predicate(UseAVX > 0); 5808 match(Set dst (MulVS src1 src2)); 5809 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 5810 ins_encode %{ 5811 int vlen_enc = vector_length_encoding(this); 5812 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5813 %} 5814 ins_pipe( pipe_slow ); 5815 %} 5816 5817 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 5818 predicate((UseAVX > 0) && 5819 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5820 match(Set dst (MulVS src (LoadVector mem))); 5821 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 5822 ins_encode %{ 5823 int vlen_enc = vector_length_encoding(this); 5824 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5825 %} 5826 ins_pipe( pipe_slow ); 5827 %} 5828 5829 // Integers vector mul 5830 instruct vmulI(vec dst, vec src) %{ 5831 predicate(UseAVX == 0); 5832 match(Set dst (MulVI dst src)); 5833 format %{ "pmulld $dst,$src\t! mul packedI" %} 5834 ins_encode %{ 5835 assert(UseSSE > 3, "required"); 5836 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 5837 %} 5838 ins_pipe( pipe_slow ); 5839 %} 5840 5841 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 5842 predicate(UseAVX > 0); 5843 match(Set dst (MulVI src1 src2)); 5844 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 5845 ins_encode %{ 5846 int vlen_enc = vector_length_encoding(this); 5847 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5848 %} 5849 ins_pipe( pipe_slow ); 5850 %} 5851 5852 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 5853 predicate((UseAVX > 0) && 5854 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5855 match(Set dst (MulVI src (LoadVector mem))); 5856 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 5857 ins_encode %{ 5858 int vlen_enc = vector_length_encoding(this); 5859 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5860 %} 5861 ins_pipe( pipe_slow ); 5862 %} 5863 5864 // Longs vector mul 5865 instruct evmulL_reg(vec dst, vec src1, vec src2) %{ 5866 predicate((Matcher::vector_length_in_bytes(n) == 64 && 5867 VM_Version::supports_avx512dq()) || 5868 VM_Version::supports_avx512vldq()); 5869 match(Set dst (MulVL src1 src2)); 5870 format %{ "evpmullq $dst,$src1,$src2\t! mul packedL" %} 5871 ins_encode %{ 5872 assert(UseAVX > 2, "required"); 5873 int vlen_enc = vector_length_encoding(this); 5874 __ evpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5875 %} 5876 ins_pipe( pipe_slow ); 5877 %} 5878 5879 instruct evmulL_mem(vec dst, vec src, memory mem) %{ 5880 predicate((Matcher::vector_length_in_bytes(n) == 64 && 5881 VM_Version::supports_avx512dq()) || 5882 (Matcher::vector_length_in_bytes(n) > 8 && 5883 VM_Version::supports_avx512vldq())); 5884 match(Set dst (MulVL src (LoadVector mem))); 5885 format %{ "evpmullq $dst,$src,$mem\t! mul packedL" %} 5886 ins_encode %{ 5887 assert(UseAVX > 2, "required"); 5888 int vlen_enc = vector_length_encoding(this); 5889 __ evpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5890 %} 5891 ins_pipe( pipe_slow ); 5892 %} 5893 5894 instruct vmulL(vec dst, vec src1, vec src2, vec xtmp) %{ 5895 predicate(UseAVX == 0); 5896 match(Set dst (MulVL src1 src2)); 5897 effect(TEMP dst, TEMP xtmp); 5898 format %{ "mulVL $dst, $src1, $src2\t! using $xtmp as TEMP" %} 5899 ins_encode %{ 5900 assert(VM_Version::supports_sse4_1(), "required"); 5901 // Get the lo-hi products, only the lower 32 bits is in concerns 5902 __ pshufd($xtmp$$XMMRegister, $src2$$XMMRegister, 0xB1); 5903 __ pmulld($xtmp$$XMMRegister, $src1$$XMMRegister); 5904 __ pshufd($dst$$XMMRegister, $xtmp$$XMMRegister, 0xB1); 5905 __ paddd($dst$$XMMRegister, $xtmp$$XMMRegister); 5906 __ psllq($dst$$XMMRegister, 32); 5907 // Get the lo-lo products 5908 __ movdqu($xtmp$$XMMRegister, $src1$$XMMRegister); 5909 __ pmuludq($xtmp$$XMMRegister, $src2$$XMMRegister); 5910 __ paddq($dst$$XMMRegister, $xtmp$$XMMRegister); 5911 %} 5912 ins_pipe( pipe_slow ); 5913 %} 5914 5915 instruct vmulL_reg(vec dst, vec src1, vec src2, vec xtmp1, vec xtmp2) %{ 5916 predicate(UseAVX > 0 && 5917 ((Matcher::vector_length_in_bytes(n) == 64 && 5918 !VM_Version::supports_avx512dq()) || 5919 (Matcher::vector_length_in_bytes(n) < 64 && 5920 !VM_Version::supports_avx512vldq()))); 5921 match(Set dst (MulVL src1 src2)); 5922 effect(TEMP xtmp1, TEMP xtmp2); 5923 format %{ "vmulVL $dst, $src1, $src2\t! using $xtmp1, $xtmp2 as TEMP" %} 5924 ins_encode %{ 5925 int vlen_enc = vector_length_encoding(this); 5926 // Get the lo-hi products, only the lower 32 bits is in concerns 5927 __ vpshufd($xtmp1$$XMMRegister, $src2$$XMMRegister, 0xB1, vlen_enc); 5928 __ vpmulld($xtmp1$$XMMRegister, $src1$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 5929 __ vpshufd($xtmp2$$XMMRegister, $xtmp1$$XMMRegister, 0xB1, vlen_enc); 5930 __ vpaddd($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, $xtmp1$$XMMRegister, vlen_enc); 5931 __ vpsllq($xtmp2$$XMMRegister, $xtmp2$$XMMRegister, 32, vlen_enc); 5932 // Get the lo-lo products 5933 __ vpmuludq($xtmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5934 __ vpaddq($dst$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, vlen_enc); 5935 %} 5936 ins_pipe( pipe_slow ); 5937 %} 5938 5939 // Floats vector mul 5940 instruct vmulF(vec dst, vec src) %{ 5941 predicate(UseAVX == 0); 5942 match(Set dst (MulVF dst src)); 5943 format %{ "mulps $dst,$src\t! mul packedF" %} 5944 ins_encode %{ 5945 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 5946 %} 5947 ins_pipe( pipe_slow ); 5948 %} 5949 5950 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 5951 predicate(UseAVX > 0); 5952 match(Set dst (MulVF src1 src2)); 5953 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 5954 ins_encode %{ 5955 int vlen_enc = vector_length_encoding(this); 5956 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5957 %} 5958 ins_pipe( pipe_slow ); 5959 %} 5960 5961 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 5962 predicate((UseAVX > 0) && 5963 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5964 match(Set dst (MulVF src (LoadVector mem))); 5965 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 5966 ins_encode %{ 5967 int vlen_enc = vector_length_encoding(this); 5968 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5969 %} 5970 ins_pipe( pipe_slow ); 5971 %} 5972 5973 // Doubles vector mul 5974 instruct vmulD(vec dst, vec src) %{ 5975 predicate(UseAVX == 0); 5976 match(Set dst (MulVD dst src)); 5977 format %{ "mulpd $dst,$src\t! mul packedD" %} 5978 ins_encode %{ 5979 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 5980 %} 5981 ins_pipe( pipe_slow ); 5982 %} 5983 5984 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 5985 predicate(UseAVX > 0); 5986 match(Set dst (MulVD src1 src2)); 5987 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 5988 ins_encode %{ 5989 int vlen_enc = vector_length_encoding(this); 5990 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5991 %} 5992 ins_pipe( pipe_slow ); 5993 %} 5994 5995 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 5996 predicate((UseAVX > 0) && 5997 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5998 match(Set dst (MulVD src (LoadVector mem))); 5999 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 6000 ins_encode %{ 6001 int vlen_enc = vector_length_encoding(this); 6002 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6003 %} 6004 ins_pipe( pipe_slow ); 6005 %} 6006 6007 instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 6008 predicate(Matcher::vector_length(n) == 8); 6009 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 6010 effect(TEMP dst, USE src1, USE src2); 6011 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 6012 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 6013 %} 6014 ins_encode %{ 6015 assert(UseAVX > 0, "required"); 6016 6017 int vlen_enc = Assembler::AVX_256bit; 6018 int cond = (Assembler::Condition)($copnd$$cmpcode); 6019 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); 6020 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6021 %} 6022 ins_pipe( pipe_slow ); 6023 %} 6024 6025 instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 6026 predicate(Matcher::vector_length(n) == 4); 6027 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 6028 effect(TEMP dst, USE src1, USE src2); 6029 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 6030 "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 6031 %} 6032 ins_encode %{ 6033 assert(UseAVX > 0, "required"); 6034 6035 int vlen_enc = Assembler::AVX_256bit; 6036 int cond = (Assembler::Condition)($copnd$$cmpcode); 6037 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); 6038 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6039 %} 6040 ins_pipe( pipe_slow ); 6041 %} 6042 6043 // --------------------------------- DIV -------------------------------------- 6044 6045 // Floats vector div 6046 instruct vdivF(vec dst, vec src) %{ 6047 predicate(UseAVX == 0); 6048 match(Set dst (DivVF dst src)); 6049 format %{ "divps $dst,$src\t! div packedF" %} 6050 ins_encode %{ 6051 __ divps($dst$$XMMRegister, $src$$XMMRegister); 6052 %} 6053 ins_pipe( pipe_slow ); 6054 %} 6055 6056 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 6057 predicate(UseAVX > 0); 6058 match(Set dst (DivVF src1 src2)); 6059 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 6060 ins_encode %{ 6061 int vlen_enc = vector_length_encoding(this); 6062 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6063 %} 6064 ins_pipe( pipe_slow ); 6065 %} 6066 6067 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 6068 predicate((UseAVX > 0) && 6069 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6070 match(Set dst (DivVF src (LoadVector mem))); 6071 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 6072 ins_encode %{ 6073 int vlen_enc = vector_length_encoding(this); 6074 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6075 %} 6076 ins_pipe( pipe_slow ); 6077 %} 6078 6079 // Doubles vector div 6080 instruct vdivD(vec dst, vec src) %{ 6081 predicate(UseAVX == 0); 6082 match(Set dst (DivVD dst src)); 6083 format %{ "divpd $dst,$src\t! div packedD" %} 6084 ins_encode %{ 6085 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 6086 %} 6087 ins_pipe( pipe_slow ); 6088 %} 6089 6090 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 6091 predicate(UseAVX > 0); 6092 match(Set dst (DivVD src1 src2)); 6093 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 6094 ins_encode %{ 6095 int vlen_enc = vector_length_encoding(this); 6096 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6097 %} 6098 ins_pipe( pipe_slow ); 6099 %} 6100 6101 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 6102 predicate((UseAVX > 0) && 6103 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6104 match(Set dst (DivVD src (LoadVector mem))); 6105 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 6106 ins_encode %{ 6107 int vlen_enc = vector_length_encoding(this); 6108 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6109 %} 6110 ins_pipe( pipe_slow ); 6111 %} 6112 6113 // ------------------------------ MinMax --------------------------------------- 6114 6115 // Byte, Short, Int vector Min/Max 6116 instruct minmax_reg_sse(vec dst, vec src) %{ 6117 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6118 UseAVX == 0); 6119 match(Set dst (MinV dst src)); 6120 match(Set dst (MaxV dst src)); 6121 format %{ "vector_minmax $dst,$src\t! " %} 6122 ins_encode %{ 6123 assert(UseSSE >= 4, "required"); 6124 6125 int opcode = this->ideal_Opcode(); 6126 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6127 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 6128 %} 6129 ins_pipe( pipe_slow ); 6130 %} 6131 6132 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 6133 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 6134 UseAVX > 0); 6135 match(Set dst (MinV src1 src2)); 6136 match(Set dst (MaxV src1 src2)); 6137 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 6138 ins_encode %{ 6139 int opcode = this->ideal_Opcode(); 6140 int vlen_enc = vector_length_encoding(this); 6141 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6142 6143 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6144 %} 6145 ins_pipe( pipe_slow ); 6146 %} 6147 6148 // Long vector Min/Max 6149 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 6150 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 6151 UseAVX == 0); 6152 match(Set dst (MinV dst src)); 6153 match(Set dst (MaxV src dst)); 6154 effect(TEMP dst, TEMP tmp); 6155 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 6156 ins_encode %{ 6157 assert(UseSSE >= 4, "required"); 6158 6159 int opcode = this->ideal_Opcode(); 6160 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6161 assert(elem_bt == T_LONG, "sanity"); 6162 6163 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 6164 %} 6165 ins_pipe( pipe_slow ); 6166 %} 6167 6168 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 6169 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 6170 UseAVX > 0 && !VM_Version::supports_avx512vl()); 6171 match(Set dst (MinV src1 src2)); 6172 match(Set dst (MaxV src1 src2)); 6173 effect(TEMP dst); 6174 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 6175 ins_encode %{ 6176 int vlen_enc = vector_length_encoding(this); 6177 int opcode = this->ideal_Opcode(); 6178 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6179 assert(elem_bt == T_LONG, "sanity"); 6180 6181 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6182 %} 6183 ins_pipe( pipe_slow ); 6184 %} 6185 6186 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6187 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6188 Matcher::vector_element_basic_type(n) == T_LONG); 6189 match(Set dst (MinV src1 src2)); 6190 match(Set dst (MaxV src1 src2)); 6191 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6192 ins_encode %{ 6193 assert(UseAVX > 2, "required"); 6194 6195 int vlen_enc = vector_length_encoding(this); 6196 int opcode = this->ideal_Opcode(); 6197 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6198 assert(elem_bt == T_LONG, "sanity"); 6199 6200 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6201 %} 6202 ins_pipe( pipe_slow ); 6203 %} 6204 6205 // Float/Double vector Min/Max 6206 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6207 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6208 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6209 UseAVX > 0); 6210 match(Set dst (MinV a b)); 6211 match(Set dst (MaxV a b)); 6212 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6213 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6214 ins_encode %{ 6215 assert(UseAVX > 0, "required"); 6216 6217 int opcode = this->ideal_Opcode(); 6218 int vlen_enc = vector_length_encoding(this); 6219 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6220 6221 __ vminmax_fp(opcode, elem_bt, 6222 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6223 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6224 %} 6225 ins_pipe( pipe_slow ); 6226 %} 6227 6228 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6229 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6230 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6231 match(Set dst (MinV a b)); 6232 match(Set dst (MaxV a b)); 6233 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6234 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6235 ins_encode %{ 6236 assert(UseAVX > 2, "required"); 6237 6238 int opcode = this->ideal_Opcode(); 6239 int vlen_enc = vector_length_encoding(this); 6240 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6241 6242 __ evminmax_fp(opcode, elem_bt, 6243 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6244 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6245 %} 6246 ins_pipe( pipe_slow ); 6247 %} 6248 6249 // --------------------------------- Signum/CopySign --------------------------- 6250 6251 instruct signumF_reg(regF dst, regF zero, regF one, rFlagsReg cr) %{ 6252 match(Set dst (SignumF dst (Binary zero one))); 6253 effect(KILL cr); 6254 format %{ "signumF $dst, $dst" %} 6255 ins_encode %{ 6256 int opcode = this->ideal_Opcode(); 6257 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6258 %} 6259 ins_pipe( pipe_slow ); 6260 %} 6261 6262 instruct signumD_reg(regD dst, regD zero, regD one, rFlagsReg cr) %{ 6263 match(Set dst (SignumD dst (Binary zero one))); 6264 effect(KILL cr); 6265 format %{ "signumD $dst, $dst" %} 6266 ins_encode %{ 6267 int opcode = this->ideal_Opcode(); 6268 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister); 6269 %} 6270 ins_pipe( pipe_slow ); 6271 %} 6272 6273 instruct signumV_reg_avx(vec dst, vec src, vec zero, vec one, vec xtmp1) %{ 6274 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 6275 match(Set dst (SignumVF src (Binary zero one))); 6276 match(Set dst (SignumVD src (Binary zero one))); 6277 effect(TEMP dst, TEMP xtmp1); 6278 format %{ "vector_signum_avx $dst, $src\t! using $xtmp1 as TEMP" %} 6279 ins_encode %{ 6280 int opcode = this->ideal_Opcode(); 6281 int vec_enc = vector_length_encoding(this); 6282 __ vector_signum_avx(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6283 $xtmp1$$XMMRegister, vec_enc); 6284 %} 6285 ins_pipe( pipe_slow ); 6286 %} 6287 6288 instruct signumV_reg_evex(vec dst, vec src, vec zero, vec one, kReg ktmp1) %{ 6289 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 6290 match(Set dst (SignumVF src (Binary zero one))); 6291 match(Set dst (SignumVD src (Binary zero one))); 6292 effect(TEMP dst, TEMP ktmp1); 6293 format %{ "vector_signum_evex $dst, $src\t! using $ktmp1 as TEMP" %} 6294 ins_encode %{ 6295 int opcode = this->ideal_Opcode(); 6296 int vec_enc = vector_length_encoding(this); 6297 __ vector_signum_evex(opcode, $dst$$XMMRegister, $src$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, 6298 $ktmp1$$KRegister, vec_enc); 6299 %} 6300 ins_pipe( pipe_slow ); 6301 %} 6302 6303 // --------------------------------------- 6304 // For copySign use 0xE4 as writemask for vpternlog 6305 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6306 // C (xmm2) is set to 0x7FFFFFFF 6307 // Wherever xmm2 is 0, we want to pick from B (sign) 6308 // Wherever xmm2 is 1, we want to pick from A (src) 6309 // 6310 // A B C Result 6311 // 0 0 0 0 6312 // 0 0 1 0 6313 // 0 1 0 1 6314 // 0 1 1 0 6315 // 1 0 0 0 6316 // 1 0 1 1 6317 // 1 1 0 1 6318 // 1 1 1 1 6319 // 6320 // Result going from high bit to low bit is 0x11100100 = 0xe4 6321 // --------------------------------------- 6322 6323 #ifdef _LP64 6324 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6325 match(Set dst (CopySignF dst src)); 6326 effect(TEMP tmp1, TEMP tmp2); 6327 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6328 ins_encode %{ 6329 __ movl($tmp2$$Register, 0x7FFFFFFF); 6330 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6331 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6332 %} 6333 ins_pipe( pipe_slow ); 6334 %} 6335 6336 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6337 match(Set dst (CopySignD dst (Binary src zero))); 6338 ins_cost(100); 6339 effect(TEMP tmp1, TEMP tmp2); 6340 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6341 ins_encode %{ 6342 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6343 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6344 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6345 %} 6346 ins_pipe( pipe_slow ); 6347 %} 6348 6349 #endif // _LP64 6350 6351 //----------------------------- CompressBits/ExpandBits ------------------------ 6352 6353 instruct compressBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6354 predicate(n->bottom_type()->isa_int()); 6355 match(Set dst (CompressBits src mask)); 6356 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6357 ins_encode %{ 6358 __ pextl($dst$$Register, $src$$Register, $mask$$Register); 6359 %} 6360 ins_pipe( pipe_slow ); 6361 %} 6362 6363 instruct expandBitsI_reg(rRegI dst, rRegI src, rRegI mask) %{ 6364 predicate(n->bottom_type()->isa_int()); 6365 match(Set dst (ExpandBits src mask)); 6366 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6367 ins_encode %{ 6368 __ pdepl($dst$$Register, $src$$Register, $mask$$Register); 6369 %} 6370 ins_pipe( pipe_slow ); 6371 %} 6372 6373 instruct compressBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6374 predicate(n->bottom_type()->isa_int()); 6375 match(Set dst (CompressBits src (LoadI mask))); 6376 format %{ "pextl $dst, $src, $mask\t! parallel bit extract" %} 6377 ins_encode %{ 6378 __ pextl($dst$$Register, $src$$Register, $mask$$Address); 6379 %} 6380 ins_pipe( pipe_slow ); 6381 %} 6382 6383 instruct expandBitsI_mem(rRegI dst, rRegI src, memory mask) %{ 6384 predicate(n->bottom_type()->isa_int()); 6385 match(Set dst (ExpandBits src (LoadI mask))); 6386 format %{ "pdepl $dst, $src, $mask\t! parallel bit deposit" %} 6387 ins_encode %{ 6388 __ pdepl($dst$$Register, $src$$Register, $mask$$Address); 6389 %} 6390 ins_pipe( pipe_slow ); 6391 %} 6392 6393 // --------------------------------- Sqrt -------------------------------------- 6394 6395 instruct vsqrtF_reg(vec dst, vec src) %{ 6396 match(Set dst (SqrtVF src)); 6397 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6398 ins_encode %{ 6399 assert(UseAVX > 0, "required"); 6400 int vlen_enc = vector_length_encoding(this); 6401 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6402 %} 6403 ins_pipe( pipe_slow ); 6404 %} 6405 6406 instruct vsqrtF_mem(vec dst, memory mem) %{ 6407 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6408 match(Set dst (SqrtVF (LoadVector mem))); 6409 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6410 ins_encode %{ 6411 assert(UseAVX > 0, "required"); 6412 int vlen_enc = vector_length_encoding(this); 6413 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6414 %} 6415 ins_pipe( pipe_slow ); 6416 %} 6417 6418 // Floating point vector sqrt 6419 instruct vsqrtD_reg(vec dst, vec src) %{ 6420 match(Set dst (SqrtVD src)); 6421 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6422 ins_encode %{ 6423 assert(UseAVX > 0, "required"); 6424 int vlen_enc = vector_length_encoding(this); 6425 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6426 %} 6427 ins_pipe( pipe_slow ); 6428 %} 6429 6430 instruct vsqrtD_mem(vec dst, memory mem) %{ 6431 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6432 match(Set dst (SqrtVD (LoadVector mem))); 6433 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6434 ins_encode %{ 6435 assert(UseAVX > 0, "required"); 6436 int vlen_enc = vector_length_encoding(this); 6437 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6438 %} 6439 ins_pipe( pipe_slow ); 6440 %} 6441 6442 // ------------------------------ Shift --------------------------------------- 6443 6444 // Left and right shift count vectors are the same on x86 6445 // (only lowest bits of xmm reg are used for count). 6446 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6447 match(Set dst (LShiftCntV cnt)); 6448 match(Set dst (RShiftCntV cnt)); 6449 format %{ "movdl $dst,$cnt\t! load shift count" %} 6450 ins_encode %{ 6451 __ movdl($dst$$XMMRegister, $cnt$$Register); 6452 %} 6453 ins_pipe( pipe_slow ); 6454 %} 6455 6456 // Byte vector shift 6457 instruct vshiftB(vec dst, vec src, vec shift, vec tmp) %{ 6458 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6459 match(Set dst ( LShiftVB src shift)); 6460 match(Set dst ( RShiftVB src shift)); 6461 match(Set dst (URShiftVB src shift)); 6462 effect(TEMP dst, USE src, USE shift, TEMP tmp); 6463 format %{"vector_byte_shift $dst,$src,$shift" %} 6464 ins_encode %{ 6465 assert(UseSSE > 3, "required"); 6466 int opcode = this->ideal_Opcode(); 6467 bool sign = (opcode != Op_URShiftVB); 6468 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6469 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6470 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6471 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6472 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6473 %} 6474 ins_pipe( pipe_slow ); 6475 %} 6476 6477 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6478 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6479 UseAVX <= 1); 6480 match(Set dst ( LShiftVB src shift)); 6481 match(Set dst ( RShiftVB src shift)); 6482 match(Set dst (URShiftVB src shift)); 6483 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2); 6484 format %{"vector_byte_shift $dst,$src,$shift" %} 6485 ins_encode %{ 6486 assert(UseSSE > 3, "required"); 6487 int opcode = this->ideal_Opcode(); 6488 bool sign = (opcode != Op_URShiftVB); 6489 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6490 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6491 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6492 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6493 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6494 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6495 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6496 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6497 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6498 %} 6499 ins_pipe( pipe_slow ); 6500 %} 6501 6502 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6503 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6504 UseAVX > 1); 6505 match(Set dst ( LShiftVB src shift)); 6506 match(Set dst ( RShiftVB src shift)); 6507 match(Set dst (URShiftVB src shift)); 6508 effect(TEMP dst, TEMP tmp); 6509 format %{"vector_byte_shift $dst,$src,$shift" %} 6510 ins_encode %{ 6511 int opcode = this->ideal_Opcode(); 6512 bool sign = (opcode != Op_URShiftVB); 6513 int vlen_enc = Assembler::AVX_256bit; 6514 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6515 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6516 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6517 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6518 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6519 %} 6520 ins_pipe( pipe_slow ); 6521 %} 6522 6523 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp) %{ 6524 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6525 match(Set dst ( LShiftVB src shift)); 6526 match(Set dst ( RShiftVB src shift)); 6527 match(Set dst (URShiftVB src shift)); 6528 effect(TEMP dst, TEMP tmp); 6529 format %{"vector_byte_shift $dst,$src,$shift" %} 6530 ins_encode %{ 6531 assert(UseAVX > 1, "required"); 6532 int opcode = this->ideal_Opcode(); 6533 bool sign = (opcode != Op_URShiftVB); 6534 int vlen_enc = Assembler::AVX_256bit; 6535 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6536 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6537 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6538 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6539 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6540 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6541 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 6542 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6543 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6544 %} 6545 ins_pipe( pipe_slow ); 6546 %} 6547 6548 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2) %{ 6549 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6550 match(Set dst ( LShiftVB src shift)); 6551 match(Set dst (RShiftVB src shift)); 6552 match(Set dst (URShiftVB src shift)); 6553 effect(TEMP dst, TEMP tmp1, TEMP tmp2); 6554 format %{"vector_byte_shift $dst,$src,$shift" %} 6555 ins_encode %{ 6556 assert(UseAVX > 2, "required"); 6557 int opcode = this->ideal_Opcode(); 6558 bool sign = (opcode != Op_URShiftVB); 6559 int vlen_enc = Assembler::AVX_512bit; 6560 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6561 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6562 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6563 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6564 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6565 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), noreg); 6566 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6567 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6568 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6569 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6570 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, noreg); 6571 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6572 %} 6573 ins_pipe( pipe_slow ); 6574 %} 6575 6576 // Shorts vector logical right shift produces incorrect Java result 6577 // for negative data because java code convert short value into int with 6578 // sign extension before a shift. But char vectors are fine since chars are 6579 // unsigned values. 6580 // Shorts/Chars vector left shift 6581 instruct vshiftS(vec dst, vec src, vec shift) %{ 6582 predicate(!n->as_ShiftV()->is_var_shift()); 6583 match(Set dst ( LShiftVS src shift)); 6584 match(Set dst ( RShiftVS src shift)); 6585 match(Set dst (URShiftVS src shift)); 6586 effect(TEMP dst, USE src, USE shift); 6587 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6588 ins_encode %{ 6589 int opcode = this->ideal_Opcode(); 6590 if (UseAVX > 0) { 6591 int vlen_enc = vector_length_encoding(this); 6592 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6593 } else { 6594 int vlen = Matcher::vector_length(this); 6595 if (vlen == 2) { 6596 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6597 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6598 } else if (vlen == 4) { 6599 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6600 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6601 } else { 6602 assert (vlen == 8, "sanity"); 6603 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6604 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6605 } 6606 } 6607 %} 6608 ins_pipe( pipe_slow ); 6609 %} 6610 6611 // Integers vector left shift 6612 instruct vshiftI(vec dst, vec src, vec shift) %{ 6613 predicate(!n->as_ShiftV()->is_var_shift()); 6614 match(Set dst ( LShiftVI src shift)); 6615 match(Set dst ( RShiftVI src shift)); 6616 match(Set dst (URShiftVI src shift)); 6617 effect(TEMP dst, USE src, USE shift); 6618 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6619 ins_encode %{ 6620 int opcode = this->ideal_Opcode(); 6621 if (UseAVX > 0) { 6622 int vlen_enc = vector_length_encoding(this); 6623 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6624 } else { 6625 int vlen = Matcher::vector_length(this); 6626 if (vlen == 2) { 6627 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6628 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6629 } else { 6630 assert(vlen == 4, "sanity"); 6631 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6632 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6633 } 6634 } 6635 %} 6636 ins_pipe( pipe_slow ); 6637 %} 6638 6639 // Integers vector left constant shift 6640 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6641 match(Set dst (LShiftVI src (LShiftCntV shift))); 6642 match(Set dst (RShiftVI src (RShiftCntV shift))); 6643 match(Set dst (URShiftVI src (RShiftCntV shift))); 6644 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6645 ins_encode %{ 6646 int opcode = this->ideal_Opcode(); 6647 if (UseAVX > 0) { 6648 int vector_len = vector_length_encoding(this); 6649 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6650 } else { 6651 int vlen = Matcher::vector_length(this); 6652 if (vlen == 2) { 6653 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6654 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6655 } else { 6656 assert(vlen == 4, "sanity"); 6657 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6658 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6659 } 6660 } 6661 %} 6662 ins_pipe( pipe_slow ); 6663 %} 6664 6665 // Longs vector shift 6666 instruct vshiftL(vec dst, vec src, vec shift) %{ 6667 predicate(!n->as_ShiftV()->is_var_shift()); 6668 match(Set dst ( LShiftVL src shift)); 6669 match(Set dst (URShiftVL src shift)); 6670 effect(TEMP dst, USE src, USE shift); 6671 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6672 ins_encode %{ 6673 int opcode = this->ideal_Opcode(); 6674 if (UseAVX > 0) { 6675 int vlen_enc = vector_length_encoding(this); 6676 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6677 } else { 6678 assert(Matcher::vector_length(this) == 2, ""); 6679 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6680 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6681 } 6682 %} 6683 ins_pipe( pipe_slow ); 6684 %} 6685 6686 // Longs vector constant shift 6687 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 6688 match(Set dst (LShiftVL src (LShiftCntV shift))); 6689 match(Set dst (URShiftVL src (RShiftCntV shift))); 6690 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 6691 ins_encode %{ 6692 int opcode = this->ideal_Opcode(); 6693 if (UseAVX > 0) { 6694 int vector_len = vector_length_encoding(this); 6695 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6696 } else { 6697 assert(Matcher::vector_length(this) == 2, ""); 6698 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6699 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6700 } 6701 %} 6702 ins_pipe( pipe_slow ); 6703 %} 6704 6705 // -------------------ArithmeticRightShift ----------------------------------- 6706 // Long vector arithmetic right shift 6707 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp) %{ 6708 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 6709 match(Set dst (RShiftVL src shift)); 6710 effect(TEMP dst, TEMP tmp); 6711 format %{ "vshiftq $dst,$src,$shift" %} 6712 ins_encode %{ 6713 uint vlen = Matcher::vector_length(this); 6714 if (vlen == 2) { 6715 assert(UseSSE >= 2, "required"); 6716 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6717 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6718 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6719 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6720 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6721 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 6722 } else { 6723 assert(vlen == 4, "sanity"); 6724 assert(UseAVX > 1, "required"); 6725 int vlen_enc = Assembler::AVX_256bit; 6726 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6727 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), noreg); 6728 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6729 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6730 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6731 } 6732 %} 6733 ins_pipe( pipe_slow ); 6734 %} 6735 6736 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 6737 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 6738 match(Set dst (RShiftVL src shift)); 6739 format %{ "vshiftq $dst,$src,$shift" %} 6740 ins_encode %{ 6741 int vlen_enc = vector_length_encoding(this); 6742 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6743 %} 6744 ins_pipe( pipe_slow ); 6745 %} 6746 6747 // ------------------- Variable Shift ----------------------------- 6748 // Byte variable shift 6749 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 6750 predicate(Matcher::vector_length(n) <= 8 && 6751 n->as_ShiftV()->is_var_shift() && 6752 !VM_Version::supports_avx512bw()); 6753 match(Set dst ( LShiftVB src shift)); 6754 match(Set dst ( RShiftVB src shift)); 6755 match(Set dst (URShiftVB src shift)); 6756 effect(TEMP dst, TEMP vtmp); 6757 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 6758 ins_encode %{ 6759 assert(UseAVX >= 2, "required"); 6760 6761 int opcode = this->ideal_Opcode(); 6762 int vlen_enc = Assembler::AVX_128bit; 6763 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 6764 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 6765 %} 6766 ins_pipe( pipe_slow ); 6767 %} 6768 6769 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 6770 predicate(Matcher::vector_length(n) == 16 && 6771 n->as_ShiftV()->is_var_shift() && 6772 !VM_Version::supports_avx512bw()); 6773 match(Set dst ( LShiftVB src shift)); 6774 match(Set dst ( RShiftVB src shift)); 6775 match(Set dst (URShiftVB src shift)); 6776 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 6777 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 6778 ins_encode %{ 6779 assert(UseAVX >= 2, "required"); 6780 6781 int opcode = this->ideal_Opcode(); 6782 int vlen_enc = Assembler::AVX_128bit; 6783 // Shift lower half and get word result in dst 6784 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 6785 6786 // Shift upper half and get word result in vtmp1 6787 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6788 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6789 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6790 6791 // Merge and down convert the two word results to byte in dst 6792 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6793 %} 6794 ins_pipe( pipe_slow ); 6795 %} 6796 6797 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4) %{ 6798 predicate(Matcher::vector_length(n) == 32 && 6799 n->as_ShiftV()->is_var_shift() && 6800 !VM_Version::supports_avx512bw()); 6801 match(Set dst ( LShiftVB src shift)); 6802 match(Set dst ( RShiftVB src shift)); 6803 match(Set dst (URShiftVB src shift)); 6804 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4); 6805 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 as TEMP" %} 6806 ins_encode %{ 6807 assert(UseAVX >= 2, "required"); 6808 6809 int opcode = this->ideal_Opcode(); 6810 int vlen_enc = Assembler::AVX_128bit; 6811 // Process lower 128 bits and get result in dst 6812 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 6813 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6814 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6815 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6816 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6817 6818 // Process higher 128 bits and get result in vtmp3 6819 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6820 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6821 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister); 6822 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 6823 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 6824 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6825 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 6826 6827 // Merge the two results in dst 6828 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6829 %} 6830 ins_pipe( pipe_slow ); 6831 %} 6832 6833 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp) %{ 6834 predicate(Matcher::vector_length(n) <= 32 && 6835 n->as_ShiftV()->is_var_shift() && 6836 VM_Version::supports_avx512bw()); 6837 match(Set dst ( LShiftVB src shift)); 6838 match(Set dst ( RShiftVB src shift)); 6839 match(Set dst (URShiftVB src shift)); 6840 effect(TEMP dst, TEMP vtmp); 6841 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp as TEMP" %} 6842 ins_encode %{ 6843 assert(UseAVX > 2, "required"); 6844 6845 int opcode = this->ideal_Opcode(); 6846 int vlen_enc = vector_length_encoding(this); 6847 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister); 6848 %} 6849 ins_pipe( pipe_slow ); 6850 %} 6851 6852 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 6853 predicate(Matcher::vector_length(n) == 64 && 6854 n->as_ShiftV()->is_var_shift() && 6855 VM_Version::supports_avx512bw()); 6856 match(Set dst ( LShiftVB src shift)); 6857 match(Set dst ( RShiftVB src shift)); 6858 match(Set dst (URShiftVB src shift)); 6859 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 6860 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 as TEMP" %} 6861 ins_encode %{ 6862 assert(UseAVX > 2, "required"); 6863 6864 int opcode = this->ideal_Opcode(); 6865 int vlen_enc = Assembler::AVX_256bit; 6866 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister); 6867 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6868 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6869 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister); 6870 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6871 %} 6872 ins_pipe( pipe_slow ); 6873 %} 6874 6875 // Short variable shift 6876 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp) %{ 6877 predicate(Matcher::vector_length(n) <= 8 && 6878 n->as_ShiftV()->is_var_shift() && 6879 !VM_Version::supports_avx512bw()); 6880 match(Set dst ( LShiftVS src shift)); 6881 match(Set dst ( RShiftVS src shift)); 6882 match(Set dst (URShiftVS src shift)); 6883 effect(TEMP dst, TEMP vtmp); 6884 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6885 ins_encode %{ 6886 assert(UseAVX >= 2, "required"); 6887 6888 int opcode = this->ideal_Opcode(); 6889 bool sign = (opcode != Op_URShiftVS); 6890 int vlen_enc = Assembler::AVX_256bit; 6891 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 6892 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 6893 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 6894 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 6895 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 6896 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 6897 %} 6898 ins_pipe( pipe_slow ); 6899 %} 6900 6901 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2) %{ 6902 predicate(Matcher::vector_length(n) == 16 && 6903 n->as_ShiftV()->is_var_shift() && 6904 !VM_Version::supports_avx512bw()); 6905 match(Set dst ( LShiftVS src shift)); 6906 match(Set dst ( RShiftVS src shift)); 6907 match(Set dst (URShiftVS src shift)); 6908 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 6909 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6910 ins_encode %{ 6911 assert(UseAVX >= 2, "required"); 6912 6913 int opcode = this->ideal_Opcode(); 6914 bool sign = (opcode != Op_URShiftVS); 6915 int vlen_enc = Assembler::AVX_256bit; 6916 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 6917 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6918 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6919 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6920 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 6921 6922 // Shift upper half, with result in dst using vtmp1 as TEMP 6923 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 6924 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 6925 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6926 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6927 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6928 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 6929 6930 // Merge lower and upper half result into dst 6931 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6932 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6933 %} 6934 ins_pipe( pipe_slow ); 6935 %} 6936 6937 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 6938 predicate(n->as_ShiftV()->is_var_shift() && 6939 VM_Version::supports_avx512bw()); 6940 match(Set dst ( LShiftVS src shift)); 6941 match(Set dst ( RShiftVS src shift)); 6942 match(Set dst (URShiftVS src shift)); 6943 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 6944 ins_encode %{ 6945 assert(UseAVX > 2, "required"); 6946 6947 int opcode = this->ideal_Opcode(); 6948 int vlen_enc = vector_length_encoding(this); 6949 if (!VM_Version::supports_avx512vl()) { 6950 vlen_enc = Assembler::AVX_512bit; 6951 } 6952 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6953 %} 6954 ins_pipe( pipe_slow ); 6955 %} 6956 6957 //Integer variable shift 6958 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 6959 predicate(n->as_ShiftV()->is_var_shift()); 6960 match(Set dst ( LShiftVI src shift)); 6961 match(Set dst ( RShiftVI src shift)); 6962 match(Set dst (URShiftVI src shift)); 6963 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 6964 ins_encode %{ 6965 assert(UseAVX >= 2, "required"); 6966 6967 int opcode = this->ideal_Opcode(); 6968 int vlen_enc = vector_length_encoding(this); 6969 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6970 %} 6971 ins_pipe( pipe_slow ); 6972 %} 6973 6974 //Long variable shift 6975 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 6976 predicate(n->as_ShiftV()->is_var_shift()); 6977 match(Set dst ( LShiftVL src shift)); 6978 match(Set dst (URShiftVL src shift)); 6979 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 6980 ins_encode %{ 6981 assert(UseAVX >= 2, "required"); 6982 6983 int opcode = this->ideal_Opcode(); 6984 int vlen_enc = vector_length_encoding(this); 6985 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6986 %} 6987 ins_pipe( pipe_slow ); 6988 %} 6989 6990 //Long variable right shift arithmetic 6991 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 6992 predicate(Matcher::vector_length(n) <= 4 && 6993 n->as_ShiftV()->is_var_shift() && 6994 UseAVX == 2); 6995 match(Set dst (RShiftVL src shift)); 6996 effect(TEMP dst, TEMP vtmp); 6997 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 6998 ins_encode %{ 6999 int opcode = this->ideal_Opcode(); 7000 int vlen_enc = vector_length_encoding(this); 7001 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 7002 $vtmp$$XMMRegister); 7003 %} 7004 ins_pipe( pipe_slow ); 7005 %} 7006 7007 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 7008 predicate(n->as_ShiftV()->is_var_shift() && 7009 UseAVX > 2); 7010 match(Set dst (RShiftVL src shift)); 7011 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 7012 ins_encode %{ 7013 int opcode = this->ideal_Opcode(); 7014 int vlen_enc = vector_length_encoding(this); 7015 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 7016 %} 7017 ins_pipe( pipe_slow ); 7018 %} 7019 7020 // --------------------------------- AND -------------------------------------- 7021 7022 instruct vand(vec dst, vec src) %{ 7023 predicate(UseAVX == 0); 7024 match(Set dst (AndV dst src)); 7025 format %{ "pand $dst,$src\t! and vectors" %} 7026 ins_encode %{ 7027 __ pand($dst$$XMMRegister, $src$$XMMRegister); 7028 %} 7029 ins_pipe( pipe_slow ); 7030 %} 7031 7032 instruct vand_reg(vec dst, vec src1, vec src2) %{ 7033 predicate(UseAVX > 0); 7034 match(Set dst (AndV src1 src2)); 7035 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 7036 ins_encode %{ 7037 int vlen_enc = vector_length_encoding(this); 7038 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7039 %} 7040 ins_pipe( pipe_slow ); 7041 %} 7042 7043 instruct vand_mem(vec dst, vec src, memory mem) %{ 7044 predicate((UseAVX > 0) && 7045 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7046 match(Set dst (AndV src (LoadVector mem))); 7047 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 7048 ins_encode %{ 7049 int vlen_enc = vector_length_encoding(this); 7050 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7051 %} 7052 ins_pipe( pipe_slow ); 7053 %} 7054 7055 // --------------------------------- OR --------------------------------------- 7056 7057 instruct vor(vec dst, vec src) %{ 7058 predicate(UseAVX == 0); 7059 match(Set dst (OrV dst src)); 7060 format %{ "por $dst,$src\t! or vectors" %} 7061 ins_encode %{ 7062 __ por($dst$$XMMRegister, $src$$XMMRegister); 7063 %} 7064 ins_pipe( pipe_slow ); 7065 %} 7066 7067 instruct vor_reg(vec dst, vec src1, vec src2) %{ 7068 predicate(UseAVX > 0); 7069 match(Set dst (OrV src1 src2)); 7070 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 7071 ins_encode %{ 7072 int vlen_enc = vector_length_encoding(this); 7073 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7074 %} 7075 ins_pipe( pipe_slow ); 7076 %} 7077 7078 instruct vor_mem(vec dst, vec src, memory mem) %{ 7079 predicate((UseAVX > 0) && 7080 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7081 match(Set dst (OrV src (LoadVector mem))); 7082 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 7083 ins_encode %{ 7084 int vlen_enc = vector_length_encoding(this); 7085 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7086 %} 7087 ins_pipe( pipe_slow ); 7088 %} 7089 7090 // --------------------------------- XOR -------------------------------------- 7091 7092 instruct vxor(vec dst, vec src) %{ 7093 predicate(UseAVX == 0); 7094 match(Set dst (XorV dst src)); 7095 format %{ "pxor $dst,$src\t! xor vectors" %} 7096 ins_encode %{ 7097 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 7098 %} 7099 ins_pipe( pipe_slow ); 7100 %} 7101 7102 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 7103 predicate(UseAVX > 0); 7104 match(Set dst (XorV src1 src2)); 7105 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 7106 ins_encode %{ 7107 int vlen_enc = vector_length_encoding(this); 7108 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7109 %} 7110 ins_pipe( pipe_slow ); 7111 %} 7112 7113 instruct vxor_mem(vec dst, vec src, memory mem) %{ 7114 predicate((UseAVX > 0) && 7115 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 7116 match(Set dst (XorV src (LoadVector mem))); 7117 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 7118 ins_encode %{ 7119 int vlen_enc = vector_length_encoding(this); 7120 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 7121 %} 7122 ins_pipe( pipe_slow ); 7123 %} 7124 7125 // --------------------------------- VectorCast -------------------------------------- 7126 7127 instruct vcastBtoX(vec dst, vec src) %{ 7128 match(Set dst (VectorCastB2X src)); 7129 format %{ "vector_cast_b2x $dst,$src\t!" %} 7130 ins_encode %{ 7131 assert(UseAVX > 0, "required"); 7132 7133 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7134 int vlen_enc = vector_length_encoding(this); 7135 __ vconvert_b2x(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7136 %} 7137 ins_pipe( pipe_slow ); 7138 %} 7139 7140 instruct castStoX(vec dst, vec src) %{ 7141 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7142 Matcher::vector_length(n->in(1)) <= 8 && // src 7143 Matcher::vector_element_basic_type(n) == T_BYTE); 7144 match(Set dst (VectorCastS2X src)); 7145 format %{ "vector_cast_s2x $dst,$src" %} 7146 ins_encode %{ 7147 assert(UseAVX > 0, "required"); 7148 7149 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, noreg); 7150 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 7151 %} 7152 ins_pipe( pipe_slow ); 7153 %} 7154 7155 instruct vcastStoX(vec dst, vec src, vec vtmp) %{ 7156 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 7157 Matcher::vector_length(n->in(1)) == 16 && // src 7158 Matcher::vector_element_basic_type(n) == T_BYTE); 7159 effect(TEMP dst, TEMP vtmp); 7160 match(Set dst (VectorCastS2X src)); 7161 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp as TEMP" %} 7162 ins_encode %{ 7163 assert(UseAVX > 0, "required"); 7164 7165 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 7166 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, noreg); 7167 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 7168 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 7169 %} 7170 ins_pipe( pipe_slow ); 7171 %} 7172 7173 instruct vcastStoX_evex(vec dst, vec src) %{ 7174 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 7175 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7176 match(Set dst (VectorCastS2X src)); 7177 format %{ "vector_cast_s2x $dst,$src\t!" %} 7178 ins_encode %{ 7179 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7180 int src_vlen_enc = vector_length_encoding(this, $src); 7181 int vlen_enc = vector_length_encoding(this); 7182 switch (to_elem_bt) { 7183 case T_BYTE: 7184 if (!VM_Version::supports_avx512vl()) { 7185 vlen_enc = Assembler::AVX_512bit; 7186 } 7187 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7188 break; 7189 case T_INT: 7190 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7191 break; 7192 case T_FLOAT: 7193 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7194 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7195 break; 7196 case T_LONG: 7197 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7198 break; 7199 case T_DOUBLE: { 7200 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 7201 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 7202 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7203 break; 7204 } 7205 default: 7206 ShouldNotReachHere(); 7207 } 7208 %} 7209 ins_pipe( pipe_slow ); 7210 %} 7211 7212 instruct castItoX(vec dst, vec src) %{ 7213 predicate(UseAVX <= 2 && 7214 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 7215 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7216 match(Set dst (VectorCastI2X src)); 7217 format %{ "vector_cast_i2x $dst,$src" %} 7218 ins_encode %{ 7219 assert(UseAVX > 0, "required"); 7220 7221 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7222 int vlen_enc = vector_length_encoding(this, $src); 7223 7224 if (to_elem_bt == T_BYTE) { 7225 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7226 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7227 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7228 } else { 7229 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7230 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7231 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7232 } 7233 %} 7234 ins_pipe( pipe_slow ); 7235 %} 7236 7237 instruct vcastItoX(vec dst, vec src, vec vtmp) %{ 7238 predicate(UseAVX <= 2 && 7239 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7240 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7241 match(Set dst (VectorCastI2X src)); 7242 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp as TEMP" %} 7243 effect(TEMP dst, TEMP vtmp); 7244 ins_encode %{ 7245 assert(UseAVX > 0, "required"); 7246 7247 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7248 int vlen_enc = vector_length_encoding(this, $src); 7249 7250 if (to_elem_bt == T_BYTE) { 7251 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, noreg); 7252 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7253 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7254 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7255 } else { 7256 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7257 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, noreg); 7258 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7259 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7260 } 7261 %} 7262 ins_pipe( pipe_slow ); 7263 %} 7264 7265 instruct vcastItoX_evex(vec dst, vec src) %{ 7266 predicate(UseAVX > 2 || 7267 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7268 match(Set dst (VectorCastI2X src)); 7269 format %{ "vector_cast_i2x $dst,$src\t!" %} 7270 ins_encode %{ 7271 assert(UseAVX > 0, "required"); 7272 7273 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7274 int src_vlen_enc = vector_length_encoding(this, $src); 7275 int dst_vlen_enc = vector_length_encoding(this); 7276 switch (dst_elem_bt) { 7277 case T_BYTE: 7278 if (!VM_Version::supports_avx512vl()) { 7279 src_vlen_enc = Assembler::AVX_512bit; 7280 } 7281 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7282 break; 7283 case T_SHORT: 7284 if (!VM_Version::supports_avx512vl()) { 7285 src_vlen_enc = Assembler::AVX_512bit; 7286 } 7287 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7288 break; 7289 case T_FLOAT: 7290 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7291 break; 7292 case T_LONG: 7293 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7294 break; 7295 case T_DOUBLE: 7296 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7297 break; 7298 default: 7299 ShouldNotReachHere(); 7300 } 7301 %} 7302 ins_pipe( pipe_slow ); 7303 %} 7304 7305 instruct vcastLtoBS(vec dst, vec src) %{ 7306 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7307 UseAVX <= 2); 7308 match(Set dst (VectorCastL2X src)); 7309 format %{ "vector_cast_l2x $dst,$src" %} 7310 ins_encode %{ 7311 assert(UseAVX > 0, "required"); 7312 7313 int vlen = Matcher::vector_length_in_bytes(this, $src); 7314 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7315 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7316 : ExternalAddress(vector_int_to_short_mask()); 7317 if (vlen <= 16) { 7318 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7319 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7320 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7321 } else { 7322 assert(vlen <= 32, "required"); 7323 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7324 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7325 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, noreg); 7326 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7327 } 7328 if (to_elem_bt == T_BYTE) { 7329 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7330 } 7331 %} 7332 ins_pipe( pipe_slow ); 7333 %} 7334 7335 instruct vcastLtoX_evex(vec dst, vec src) %{ 7336 predicate(UseAVX > 2 || 7337 (Matcher::vector_element_basic_type(n) == T_INT || 7338 Matcher::vector_element_basic_type(n) == T_FLOAT || 7339 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7340 match(Set dst (VectorCastL2X src)); 7341 format %{ "vector_cast_l2x $dst,$src\t!" %} 7342 ins_encode %{ 7343 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7344 int vlen = Matcher::vector_length_in_bytes(this, $src); 7345 int vlen_enc = vector_length_encoding(this, $src); 7346 switch (to_elem_bt) { 7347 case T_BYTE: 7348 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7349 vlen_enc = Assembler::AVX_512bit; 7350 } 7351 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7352 break; 7353 case T_SHORT: 7354 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7355 vlen_enc = Assembler::AVX_512bit; 7356 } 7357 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7358 break; 7359 case T_INT: 7360 if (vlen == 8) { 7361 if ($dst$$XMMRegister != $src$$XMMRegister) { 7362 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7363 } 7364 } else if (vlen == 16) { 7365 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7366 } else if (vlen == 32) { 7367 if (UseAVX > 2) { 7368 if (!VM_Version::supports_avx512vl()) { 7369 vlen_enc = Assembler::AVX_512bit; 7370 } 7371 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7372 } else { 7373 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7374 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7375 } 7376 } else { // vlen == 64 7377 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7378 } 7379 break; 7380 case T_FLOAT: 7381 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7382 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7383 break; 7384 case T_DOUBLE: 7385 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7386 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7387 break; 7388 7389 default: assert(false, "%s", type2name(to_elem_bt)); 7390 } 7391 %} 7392 ins_pipe( pipe_slow ); 7393 %} 7394 7395 instruct vcastFtoD_reg(vec dst, vec src) %{ 7396 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7397 match(Set dst (VectorCastF2X src)); 7398 format %{ "vector_cast_f2d $dst,$src\t!" %} 7399 ins_encode %{ 7400 int vlen_enc = vector_length_encoding(this); 7401 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7402 %} 7403 ins_pipe( pipe_slow ); 7404 %} 7405 7406 7407 instruct castFtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7408 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7409 type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4); 7410 match(Set dst (VectorCastF2X src)); 7411 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7412 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3 and $xtmp4 as TEMP" %} 7413 ins_encode %{ 7414 int vlen_enc = vector_length_encoding(this, $src); 7415 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7416 // JDK-8292878 removed the need for an explicit scratch register needed to load greater than 7417 // 32 bit addresses for register indirect addressing mode since stub constants 7418 // are part of code cache and there is a cap of 2G on ReservedCodeCacheSize currently. 7419 // However, targets are free to increase this limit, but having a large code cache size 7420 // greater than 2G looks unreasonable in practical scenario, on the hind side with given 7421 // cap we save a temporary register allocation which in limiting case can prevent 7422 // spilling in high register pressure blocks. 7423 __ vector_castF2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7424 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7425 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7426 %} 7427 ins_pipe( pipe_slow ); 7428 %} 7429 7430 instruct castFtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7431 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7432 is_integral_type(Matcher::vector_element_basic_type(n))); 7433 match(Set dst (VectorCastF2X src)); 7434 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7435 format %{ "vector_cast_f2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7436 ins_encode %{ 7437 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7438 if (to_elem_bt == T_LONG) { 7439 int vlen_enc = vector_length_encoding(this); 7440 __ vector_castF2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7441 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7442 ExternalAddress(vector_double_signflip()), noreg, vlen_enc); 7443 } else { 7444 int vlen_enc = vector_length_encoding(this, $src); 7445 __ vector_castF2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7446 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7447 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7448 } 7449 %} 7450 ins_pipe( pipe_slow ); 7451 %} 7452 7453 instruct vcastDtoF_reg(vec dst, vec src) %{ 7454 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7455 match(Set dst (VectorCastD2X src)); 7456 format %{ "vector_cast_d2x $dst,$src\t!" %} 7457 ins_encode %{ 7458 int vlen_enc = vector_length_encoding(this, $src); 7459 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7460 %} 7461 ins_pipe( pipe_slow ); 7462 %} 7463 7464 instruct castDtoX_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, vec xtmp5, rFlagsReg cr) %{ 7465 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7466 is_integral_type(Matcher::vector_element_basic_type(n))); 7467 match(Set dst (VectorCastD2X src)); 7468 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP xtmp5, KILL cr); 7469 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $xtmp5 as TEMP" %} 7470 ins_encode %{ 7471 int vlen_enc = vector_length_encoding(this, $src); 7472 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7473 __ vector_castD2X_avx(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7474 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, $xtmp5$$XMMRegister, 7475 ExternalAddress(vector_float_signflip()), noreg, vlen_enc); 7476 %} 7477 ins_pipe( pipe_slow ); 7478 %} 7479 7480 instruct castDtoX_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7481 predicate((VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n->in(1)) == 64) && 7482 is_integral_type(Matcher::vector_element_basic_type(n))); 7483 match(Set dst (VectorCastD2X src)); 7484 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7485 format %{ "vector_cast_d2x $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1 and $ktmp2 as TEMP" %} 7486 ins_encode %{ 7487 int vlen_enc = vector_length_encoding(this, $src); 7488 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7489 AddressLiteral signflip = VM_Version::supports_avx512dq() ? ExternalAddress(vector_double_signflip()) : 7490 ExternalAddress(vector_float_signflip()); 7491 __ vector_castD2X_evex(to_elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7492 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, signflip, noreg, vlen_enc); 7493 %} 7494 ins_pipe( pipe_slow ); 7495 %} 7496 7497 instruct vucast(vec dst, vec src) %{ 7498 match(Set dst (VectorUCastB2X src)); 7499 match(Set dst (VectorUCastS2X src)); 7500 match(Set dst (VectorUCastI2X src)); 7501 format %{ "vector_ucast $dst,$src\t!" %} 7502 ins_encode %{ 7503 assert(UseAVX > 0, "required"); 7504 7505 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7506 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7507 int vlen_enc = vector_length_encoding(this); 7508 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7509 %} 7510 ins_pipe( pipe_slow ); 7511 %} 7512 7513 #ifdef _LP64 7514 instruct vround_float_avx(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rFlagsReg cr) %{ 7515 predicate(!VM_Version::supports_avx512vl() && 7516 Matcher::vector_length_in_bytes(n) < 64 && 7517 Matcher::vector_element_basic_type(n) == T_INT); 7518 match(Set dst (RoundVF src)); 7519 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, KILL cr); 7520 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $xtmp3, $xtmp4 as TEMP" %} 7521 ins_encode %{ 7522 int vlen_enc = vector_length_encoding(this); 7523 InternalAddress new_mxcsr = $constantaddress((jint)0x3F80); 7524 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, 7525 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7526 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister); 7527 %} 7528 ins_pipe( pipe_slow ); 7529 %} 7530 7531 instruct vround_float_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7532 predicate((VM_Version::supports_avx512vl() || 7533 Matcher::vector_length_in_bytes(n) == 64) && 7534 Matcher::vector_element_basic_type(n) == T_INT); 7535 match(Set dst (RoundVF src)); 7536 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7537 format %{ "vector_round_float $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7538 ins_encode %{ 7539 int vlen_enc = vector_length_encoding(this); 7540 InternalAddress new_mxcsr = $constantaddress((jint)0x3F80); 7541 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, 7542 ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), new_mxcsr, vlen_enc, 7543 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7544 %} 7545 ins_pipe( pipe_slow ); 7546 %} 7547 7548 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 7549 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7550 match(Set dst (RoundVD src)); 7551 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr); 7552 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %} 7553 ins_encode %{ 7554 int vlen_enc = vector_length_encoding(this); 7555 InternalAddress new_mxcsr = $constantaddress((jint)0x3F80); 7556 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, 7557 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc, 7558 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister); 7559 %} 7560 ins_pipe( pipe_slow ); 7561 %} 7562 7563 #endif // _LP64 7564 7565 // --------------------------------- VectorMaskCmp -------------------------------------- 7566 7567 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7568 predicate(n->bottom_type()->isa_vectmask() == NULL && 7569 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7570 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7571 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7572 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7573 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7574 ins_encode %{ 7575 int vlen_enc = vector_length_encoding(this, $src1); 7576 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7577 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7578 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7579 } else { 7580 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7581 } 7582 %} 7583 ins_pipe( pipe_slow ); 7584 %} 7585 7586 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7587 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7588 n->bottom_type()->isa_vectmask() == NULL && 7589 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7590 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7591 effect(TEMP ktmp); 7592 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7593 ins_encode %{ 7594 int vlen_enc = Assembler::AVX_512bit; 7595 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7596 KRegister mask = k0; // The comparison itself is not being masked. 7597 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7598 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7599 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7600 } else { 7601 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7602 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg); 7603 } 7604 %} 7605 ins_pipe( pipe_slow ); 7606 %} 7607 7608 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7609 predicate(n->bottom_type()->isa_vectmask() && 7610 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7611 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7612 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7613 ins_encode %{ 7614 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7615 int vlen_enc = vector_length_encoding(this, $src1); 7616 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7617 KRegister mask = k0; // The comparison itself is not being masked. 7618 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7619 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7620 } else { 7621 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7622 } 7623 %} 7624 ins_pipe( pipe_slow ); 7625 %} 7626 7627 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7628 predicate(n->bottom_type()->isa_vectmask() == NULL && 7629 !is_unsigned_booltest_pred(n->in(2)->get_int()) && 7630 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7631 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7632 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7633 (n->in(2)->get_int() == BoolTest::eq || 7634 n->in(2)->get_int() == BoolTest::lt || 7635 n->in(2)->get_int() == BoolTest::gt)); // cond 7636 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7637 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7638 ins_encode %{ 7639 int vlen_enc = vector_length_encoding(this, $src1); 7640 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7641 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7642 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 7643 %} 7644 ins_pipe( pipe_slow ); 7645 %} 7646 7647 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7648 predicate(n->bottom_type()->isa_vectmask() == NULL && 7649 !is_unsigned_booltest_pred(n->in(2)->get_int()) && 7650 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7651 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7652 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7653 (n->in(2)->get_int() == BoolTest::ne || 7654 n->in(2)->get_int() == BoolTest::le || 7655 n->in(2)->get_int() == BoolTest::ge)); // cond 7656 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7657 effect(TEMP dst, TEMP xtmp); 7658 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7659 ins_encode %{ 7660 int vlen_enc = vector_length_encoding(this, $src1); 7661 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7662 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7663 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7664 %} 7665 ins_pipe( pipe_slow ); 7666 %} 7667 7668 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7669 predicate(n->bottom_type()->isa_vectmask() == NULL && 7670 is_unsigned_booltest_pred(n->in(2)->get_int()) && 7671 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7672 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7673 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7674 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7675 effect(TEMP dst, TEMP xtmp); 7676 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7677 ins_encode %{ 7678 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 7679 int vlen_enc = vector_length_encoding(this, $src1); 7680 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7681 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7682 7683 if (vlen_enc == Assembler::AVX_128bit) { 7684 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7685 } else { 7686 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7687 } 7688 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7689 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7690 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7691 %} 7692 ins_pipe( pipe_slow ); 7693 %} 7694 7695 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{ 7696 predicate((n->bottom_type()->isa_vectmask() == NULL && 7697 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 7698 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7699 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7700 effect(TEMP ktmp); 7701 format %{ "vector_compare $dst,$src1,$src2,$cond" %} 7702 ins_encode %{ 7703 assert(UseAVX > 2, "required"); 7704 7705 int vlen_enc = vector_length_encoding(this, $src1); 7706 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7707 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 7708 KRegister mask = k0; // The comparison itself is not being masked. 7709 bool merge = false; 7710 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7711 7712 switch (src1_elem_bt) { 7713 case T_INT: { 7714 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7715 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7716 break; 7717 } 7718 case T_LONG: { 7719 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7720 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg); 7721 break; 7722 } 7723 default: assert(false, "%s", type2name(src1_elem_bt)); 7724 } 7725 %} 7726 ins_pipe( pipe_slow ); 7727 %} 7728 7729 7730 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 7731 predicate(n->bottom_type()->isa_vectmask() && 7732 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7733 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7734 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 7735 ins_encode %{ 7736 assert(UseAVX > 2, "required"); 7737 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7738 7739 int vlen_enc = vector_length_encoding(this, $src1); 7740 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7741 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 7742 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7743 7744 // Comparison i 7745 switch (src1_elem_bt) { 7746 case T_BYTE: { 7747 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7748 break; 7749 } 7750 case T_SHORT: { 7751 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7752 break; 7753 } 7754 case T_INT: { 7755 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7756 break; 7757 } 7758 case T_LONG: { 7759 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7760 break; 7761 } 7762 default: assert(false, "%s", type2name(src1_elem_bt)); 7763 } 7764 %} 7765 ins_pipe( pipe_slow ); 7766 %} 7767 7768 // Extract 7769 7770 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 7771 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 7772 match(Set dst (ExtractI src idx)); 7773 match(Set dst (ExtractS src idx)); 7774 #ifdef _LP64 7775 match(Set dst (ExtractB src idx)); 7776 #endif 7777 format %{ "extractI $dst,$src,$idx\t!" %} 7778 ins_encode %{ 7779 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7780 7781 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 7782 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7783 %} 7784 ins_pipe( pipe_slow ); 7785 %} 7786 7787 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 7788 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 7789 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 7790 match(Set dst (ExtractI src idx)); 7791 match(Set dst (ExtractS src idx)); 7792 #ifdef _LP64 7793 match(Set dst (ExtractB src idx)); 7794 #endif 7795 effect(TEMP vtmp); 7796 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 7797 ins_encode %{ 7798 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7799 7800 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 7801 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7802 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 7803 %} 7804 ins_pipe( pipe_slow ); 7805 %} 7806 7807 #ifdef _LP64 7808 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 7809 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 7810 match(Set dst (ExtractL src idx)); 7811 format %{ "extractL $dst,$src,$idx\t!" %} 7812 ins_encode %{ 7813 assert(UseSSE >= 4, "required"); 7814 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7815 7816 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7817 %} 7818 ins_pipe( pipe_slow ); 7819 %} 7820 7821 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 7822 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 7823 Matcher::vector_length(n->in(1)) == 8); // src 7824 match(Set dst (ExtractL src idx)); 7825 effect(TEMP vtmp); 7826 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 7827 ins_encode %{ 7828 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7829 7830 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7831 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 7832 %} 7833 ins_pipe( pipe_slow ); 7834 %} 7835 #endif 7836 7837 instruct extractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 7838 predicate(Matcher::vector_length(n->in(1)) <= 4); 7839 match(Set dst (ExtractF src idx)); 7840 effect(TEMP dst, TEMP vtmp); 7841 format %{ "extractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 7842 ins_encode %{ 7843 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7844 7845 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $vtmp$$XMMRegister); 7846 %} 7847 ins_pipe( pipe_slow ); 7848 %} 7849 7850 instruct vextractF(legRegF dst, legVec src, immU8 idx, legVec vtmp) %{ 7851 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 7852 Matcher::vector_length(n->in(1)/*src*/) == 16); 7853 match(Set dst (ExtractF src idx)); 7854 effect(TEMP vtmp); 7855 format %{ "vextractF $dst,$src,$idx\t! using $vtmp as TEMP" %} 7856 ins_encode %{ 7857 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7858 7859 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7860 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant); 7861 %} 7862 ins_pipe( pipe_slow ); 7863 %} 7864 7865 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 7866 predicate(Matcher::vector_length(n->in(1)) == 2); // src 7867 match(Set dst (ExtractD src idx)); 7868 format %{ "extractD $dst,$src,$idx\t!" %} 7869 ins_encode %{ 7870 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7871 7872 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7873 %} 7874 ins_pipe( pipe_slow ); 7875 %} 7876 7877 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 7878 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 7879 Matcher::vector_length(n->in(1)) == 8); // src 7880 match(Set dst (ExtractD src idx)); 7881 effect(TEMP vtmp); 7882 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 7883 ins_encode %{ 7884 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7885 7886 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7887 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 7888 %} 7889 ins_pipe( pipe_slow ); 7890 %} 7891 7892 // --------------------------------- Vector Blend -------------------------------------- 7893 7894 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 7895 predicate(UseAVX == 0); 7896 match(Set dst (VectorBlend (Binary dst src) mask)); 7897 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 7898 effect(TEMP tmp); 7899 ins_encode %{ 7900 assert(UseSSE >= 4, "required"); 7901 7902 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 7903 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 7904 } 7905 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 7906 %} 7907 ins_pipe( pipe_slow ); 7908 %} 7909 7910 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7911 predicate(UseAVX > 0 && 7912 n->in(2)->bottom_type()->isa_vectmask() == NULL && 7913 Matcher::vector_length_in_bytes(n) <= 32 && 7914 is_integral_type(Matcher::vector_element_basic_type(n))); 7915 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7916 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7917 ins_encode %{ 7918 int vlen_enc = vector_length_encoding(this); 7919 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7920 %} 7921 ins_pipe( pipe_slow ); 7922 %} 7923 7924 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7925 predicate(UseAVX > 0 && 7926 n->in(2)->bottom_type()->isa_vectmask() == NULL && 7927 Matcher::vector_length_in_bytes(n) <= 32 && 7928 !is_integral_type(Matcher::vector_element_basic_type(n))); 7929 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7930 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7931 ins_encode %{ 7932 int vlen_enc = vector_length_encoding(this); 7933 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7934 %} 7935 ins_pipe( pipe_slow ); 7936 %} 7937 7938 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{ 7939 predicate(Matcher::vector_length_in_bytes(n) == 64 && 7940 n->in(2)->bottom_type()->isa_vectmask() == NULL); 7941 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7942 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 7943 effect(TEMP ktmp); 7944 ins_encode %{ 7945 int vlen_enc = Assembler::AVX_512bit; 7946 BasicType elem_bt = Matcher::vector_element_basic_type(this); 7947 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg); 7948 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7949 %} 7950 ins_pipe( pipe_slow ); 7951 %} 7952 7953 7954 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{ 7955 predicate(n->in(2)->bottom_type()->isa_vectmask() && 7956 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 7957 VM_Version::supports_avx512bw())); 7958 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7959 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %} 7960 ins_encode %{ 7961 int vlen_enc = vector_length_encoding(this); 7962 BasicType elem_bt = Matcher::vector_element_basic_type(this); 7963 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7964 %} 7965 ins_pipe( pipe_slow ); 7966 %} 7967 7968 // --------------------------------- ABS -------------------------------------- 7969 // a = |a| 7970 instruct vabsB_reg(vec dst, vec src) %{ 7971 match(Set dst (AbsVB src)); 7972 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 7973 ins_encode %{ 7974 uint vlen = Matcher::vector_length(this); 7975 if (vlen <= 16) { 7976 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 7977 } else { 7978 int vlen_enc = vector_length_encoding(this); 7979 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7980 } 7981 %} 7982 ins_pipe( pipe_slow ); 7983 %} 7984 7985 instruct vabsS_reg(vec dst, vec src) %{ 7986 match(Set dst (AbsVS src)); 7987 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 7988 ins_encode %{ 7989 uint vlen = Matcher::vector_length(this); 7990 if (vlen <= 8) { 7991 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 7992 } else { 7993 int vlen_enc = vector_length_encoding(this); 7994 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7995 } 7996 %} 7997 ins_pipe( pipe_slow ); 7998 %} 7999 8000 instruct vabsI_reg(vec dst, vec src) %{ 8001 match(Set dst (AbsVI src)); 8002 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 8003 ins_encode %{ 8004 uint vlen = Matcher::vector_length(this); 8005 if (vlen <= 4) { 8006 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8007 } else { 8008 int vlen_enc = vector_length_encoding(this); 8009 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8010 } 8011 %} 8012 ins_pipe( pipe_slow ); 8013 %} 8014 8015 instruct vabsL_reg(vec dst, vec src) %{ 8016 match(Set dst (AbsVL src)); 8017 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 8018 ins_encode %{ 8019 assert(UseAVX > 2, "required"); 8020 int vlen_enc = vector_length_encoding(this); 8021 if (!VM_Version::supports_avx512vl()) { 8022 vlen_enc = Assembler::AVX_512bit; 8023 } 8024 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8025 %} 8026 ins_pipe( pipe_slow ); 8027 %} 8028 8029 // --------------------------------- ABSNEG -------------------------------------- 8030 8031 instruct vabsnegF(vec dst, vec src) %{ 8032 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 8033 match(Set dst (AbsVF src)); 8034 match(Set dst (NegVF src)); 8035 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 8036 ins_cost(150); 8037 ins_encode %{ 8038 int opcode = this->ideal_Opcode(); 8039 int vlen = Matcher::vector_length(this); 8040 if (vlen == 2) { 8041 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8042 } else { 8043 assert(vlen == 8 || vlen == 16, "required"); 8044 int vlen_enc = vector_length_encoding(this); 8045 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8046 } 8047 %} 8048 ins_pipe( pipe_slow ); 8049 %} 8050 8051 instruct vabsneg4F(vec dst) %{ 8052 predicate(Matcher::vector_length(n) == 4); 8053 match(Set dst (AbsVF dst)); 8054 match(Set dst (NegVF dst)); 8055 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 8056 ins_cost(150); 8057 ins_encode %{ 8058 int opcode = this->ideal_Opcode(); 8059 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister); 8060 %} 8061 ins_pipe( pipe_slow ); 8062 %} 8063 8064 instruct vabsnegD(vec dst, vec src) %{ 8065 match(Set dst (AbsVD src)); 8066 match(Set dst (NegVD src)); 8067 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 8068 ins_encode %{ 8069 int opcode = this->ideal_Opcode(); 8070 uint vlen = Matcher::vector_length(this); 8071 if (vlen == 2) { 8072 assert(UseSSE >= 2, "required"); 8073 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister); 8074 } else { 8075 int vlen_enc = vector_length_encoding(this); 8076 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8077 } 8078 %} 8079 ins_pipe( pipe_slow ); 8080 %} 8081 8082 //------------------------------------- VectorTest -------------------------------------------- 8083 8084 #ifdef _LP64 8085 instruct vptest_lt16(rFlagsRegU cr, legVec src1, legVec src2, legVec vtmp) %{ 8086 predicate(Matcher::vector_length_in_bytes(n->in(1)) < 16); 8087 match(Set cr (VectorTest src1 src2)); 8088 effect(TEMP vtmp); 8089 format %{ "vptest_lt16 $src1, $src2\t! using $vtmp as TEMP" %} 8090 ins_encode %{ 8091 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8092 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8093 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister, vlen); 8094 %} 8095 ins_pipe( pipe_slow ); 8096 %} 8097 8098 instruct vptest_ge16(rFlagsRegU cr, legVec src1, legVec src2) %{ 8099 predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16); 8100 match(Set cr (VectorTest src1 src2)); 8101 format %{ "vptest_ge16 $src1, $src2\n\t" %} 8102 ins_encode %{ 8103 BasicType bt = Matcher::vector_element_basic_type(this, $src1); 8104 int vlen = Matcher::vector_length_in_bytes(this, $src1); 8105 __ vectortest(bt, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, vlen); 8106 %} 8107 ins_pipe( pipe_slow ); 8108 %} 8109 8110 instruct ktest_alltrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8111 predicate((Matcher::vector_length(n->in(1)) < 8 || 8112 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8113 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 8114 match(Set cr (VectorTest src1 src2)); 8115 effect(TEMP tmp); 8116 format %{ "ktest_alltrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8117 ins_encode %{ 8118 uint masklen = Matcher::vector_length(this, $src1); 8119 __ kmovwl($tmp$$Register, $src1$$KRegister); 8120 __ andl($tmp$$Register, (1 << masklen) - 1); 8121 __ cmpl($tmp$$Register, (1 << masklen) - 1); 8122 %} 8123 ins_pipe( pipe_slow ); 8124 %} 8125 8126 instruct ktest_anytrue_le8(rFlagsRegU cr, kReg src1, kReg src2, rRegI tmp) %{ 8127 predicate((Matcher::vector_length(n->in(1)) < 8 || 8128 (Matcher::vector_length(n->in(1)) == 8 && !VM_Version::supports_avx512dq())) && 8129 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 8130 match(Set cr (VectorTest src1 src2)); 8131 effect(TEMP tmp); 8132 format %{ "ktest_anytrue_le8 $src1, $src2\t! using $tmp as TEMP" %} 8133 ins_encode %{ 8134 uint masklen = Matcher::vector_length(this, $src1); 8135 __ kmovwl($tmp$$Register, $src1$$KRegister); 8136 __ andl($tmp$$Register, (1 << masklen) - 1); 8137 %} 8138 ins_pipe( pipe_slow ); 8139 %} 8140 8141 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{ 8142 predicate(Matcher::vector_length(n->in(1)) >= 16 || 8143 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq())); 8144 match(Set cr (VectorTest src1 src2)); 8145 format %{ "ktest_ge8 $src1, $src2\n\t" %} 8146 ins_encode %{ 8147 uint masklen = Matcher::vector_length(this, $src1); 8148 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister); 8149 %} 8150 ins_pipe( pipe_slow ); 8151 %} 8152 #endif 8153 8154 //------------------------------------- LoadMask -------------------------------------------- 8155 8156 instruct loadMask(legVec dst, legVec src) %{ 8157 predicate(n->bottom_type()->isa_vectmask() == NULL && !VM_Version::supports_avx512vlbw()); 8158 match(Set dst (VectorLoadMask src)); 8159 effect(TEMP dst); 8160 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8161 ins_encode %{ 8162 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8163 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8164 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8165 %} 8166 ins_pipe( pipe_slow ); 8167 %} 8168 8169 instruct loadMask64(kReg dst, vec src, vec xtmp) %{ 8170 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8171 match(Set dst (VectorLoadMask src)); 8172 effect(TEMP xtmp); 8173 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %} 8174 ins_encode %{ 8175 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8176 true, Assembler::AVX_512bit); 8177 %} 8178 ins_pipe( pipe_slow ); 8179 %} 8180 8181 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8182 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8183 match(Set dst (VectorLoadMask src)); 8184 effect(TEMP xtmp); 8185 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8186 ins_encode %{ 8187 int vlen_enc = vector_length_encoding(in(1)); 8188 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8189 false, vlen_enc); 8190 %} 8191 ins_pipe( pipe_slow ); 8192 %} 8193 8194 //------------------------------------- StoreMask -------------------------------------------- 8195 8196 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8197 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8198 match(Set dst (VectorStoreMask src size)); 8199 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8200 ins_encode %{ 8201 int vlen = Matcher::vector_length(this); 8202 if (vlen <= 16 && UseAVX <= 2) { 8203 assert(UseSSE >= 3, "required"); 8204 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8205 } else { 8206 assert(UseAVX > 0, "required"); 8207 int src_vlen_enc = vector_length_encoding(this, $src); 8208 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8209 } 8210 %} 8211 ins_pipe( pipe_slow ); 8212 %} 8213 8214 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8215 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8216 match(Set dst (VectorStoreMask src size)); 8217 effect(TEMP_DEF dst, TEMP xtmp); 8218 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8219 ins_encode %{ 8220 int vlen_enc = Assembler::AVX_128bit; 8221 int vlen = Matcher::vector_length(this); 8222 if (vlen <= 8) { 8223 assert(UseSSE >= 3, "required"); 8224 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8225 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8226 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8227 } else { 8228 assert(UseAVX > 0, "required"); 8229 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8230 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8231 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8232 } 8233 %} 8234 ins_pipe( pipe_slow ); 8235 %} 8236 8237 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8238 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8239 match(Set dst (VectorStoreMask src size)); 8240 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8241 effect(TEMP_DEF dst, TEMP xtmp); 8242 ins_encode %{ 8243 int vlen_enc = Assembler::AVX_128bit; 8244 int vlen = Matcher::vector_length(this); 8245 if (vlen <= 4) { 8246 assert(UseSSE >= 3, "required"); 8247 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8248 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8249 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8250 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8251 } else { 8252 assert(UseAVX > 0, "required"); 8253 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8254 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8255 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8256 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8257 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8258 } 8259 %} 8260 ins_pipe( pipe_slow ); 8261 %} 8262 8263 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8264 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8265 match(Set dst (VectorStoreMask src size)); 8266 effect(TEMP_DEF dst, TEMP xtmp); 8267 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8268 ins_encode %{ 8269 assert(UseSSE >= 3, "required"); 8270 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8271 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8272 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8273 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8274 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8275 %} 8276 ins_pipe( pipe_slow ); 8277 %} 8278 8279 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8280 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8281 match(Set dst (VectorStoreMask src size)); 8282 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8283 effect(TEMP_DEF dst, TEMP vtmp); 8284 ins_encode %{ 8285 int vlen_enc = Assembler::AVX_128bit; 8286 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8287 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8288 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8289 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8290 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8291 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8292 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8293 %} 8294 ins_pipe( pipe_slow ); 8295 %} 8296 8297 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8298 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8299 match(Set dst (VectorStoreMask src size)); 8300 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8301 ins_encode %{ 8302 int src_vlen_enc = vector_length_encoding(this, $src); 8303 int dst_vlen_enc = vector_length_encoding(this); 8304 if (!VM_Version::supports_avx512vl()) { 8305 src_vlen_enc = Assembler::AVX_512bit; 8306 } 8307 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8308 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8309 %} 8310 ins_pipe( pipe_slow ); 8311 %} 8312 8313 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8314 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8315 match(Set dst (VectorStoreMask src size)); 8316 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8317 ins_encode %{ 8318 int src_vlen_enc = vector_length_encoding(this, $src); 8319 int dst_vlen_enc = vector_length_encoding(this); 8320 if (!VM_Version::supports_avx512vl()) { 8321 src_vlen_enc = Assembler::AVX_512bit; 8322 } 8323 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8324 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8325 %} 8326 ins_pipe( pipe_slow ); 8327 %} 8328 8329 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{ 8330 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8331 match(Set dst (VectorStoreMask mask size)); 8332 effect(TEMP_DEF dst); 8333 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8334 ins_encode %{ 8335 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8336 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8337 false, Assembler::AVX_512bit, noreg); 8338 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8339 %} 8340 ins_pipe( pipe_slow ); 8341 %} 8342 8343 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8344 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8345 match(Set dst (VectorStoreMask mask size)); 8346 effect(TEMP_DEF dst); 8347 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8348 ins_encode %{ 8349 int dst_vlen_enc = vector_length_encoding(this); 8350 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8351 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8352 %} 8353 ins_pipe( pipe_slow ); 8354 %} 8355 8356 instruct vmaskcast_evex(kReg dst) %{ 8357 match(Set dst (VectorMaskCast dst)); 8358 ins_cost(0); 8359 format %{ "vector_mask_cast $dst" %} 8360 ins_encode %{ 8361 // empty 8362 %} 8363 ins_pipe(empty); 8364 %} 8365 8366 instruct vmaskcast(vec dst) %{ 8367 predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); 8368 match(Set dst (VectorMaskCast dst)); 8369 ins_cost(0); 8370 format %{ "vector_mask_cast $dst" %} 8371 ins_encode %{ 8372 // empty 8373 %} 8374 ins_pipe(empty); 8375 %} 8376 8377 instruct vmaskcast_avx(vec dst, vec src) %{ 8378 predicate(Matcher::vector_length_in_bytes(n) != Matcher::vector_length_in_bytes(n->in(1))); 8379 match(Set dst (VectorMaskCast src)); 8380 format %{ "vector_mask_cast $dst, $src" %} 8381 ins_encode %{ 8382 int vlen = Matcher::vector_length(this); 8383 BasicType src_bt = Matcher::vector_element_basic_type(this, $src); 8384 BasicType dst_bt = Matcher::vector_element_basic_type(this); 8385 __ vector_mask_cast($dst$$XMMRegister, $src$$XMMRegister, dst_bt, src_bt, vlen); 8386 %} 8387 ins_pipe(pipe_slow); 8388 %} 8389 8390 //-------------------------------- Load Iota Indices ---------------------------------- 8391 8392 instruct loadIotaIndices(vec dst, immI_0 src) %{ 8393 match(Set dst (VectorLoadConst src)); 8394 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8395 ins_encode %{ 8396 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8397 BasicType bt = Matcher::vector_element_basic_type(this); 8398 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, bt); 8399 %} 8400 ins_pipe( pipe_slow ); 8401 %} 8402 8403 #ifdef _LP64 8404 instruct VectorPopulateIndex(vec dst, rRegI src1, immI_1 src2, vec vtmp) %{ 8405 match(Set dst (PopulateIndex src1 src2)); 8406 effect(TEMP dst, TEMP vtmp); 8407 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8408 ins_encode %{ 8409 assert($src2$$constant == 1, "required"); 8410 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8411 int vlen_enc = vector_length_encoding(this); 8412 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8413 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8414 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8415 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8416 %} 8417 ins_pipe( pipe_slow ); 8418 %} 8419 8420 instruct VectorPopulateLIndex(vec dst, rRegL src1, immI_1 src2, vec vtmp) %{ 8421 match(Set dst (PopulateIndex src1 src2)); 8422 effect(TEMP dst, TEMP vtmp); 8423 format %{ "vector_populate_index $dst $src1 $src2\t! using $vtmp as TEMP" %} 8424 ins_encode %{ 8425 assert($src2$$constant == 1, "required"); 8426 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8427 int vlen_enc = vector_length_encoding(this); 8428 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8429 __ vpbroadcast(elem_bt, $vtmp$$XMMRegister, $src1$$Register, vlen_enc); 8430 __ load_iota_indices($dst$$XMMRegister, vlen_in_bytes, elem_bt); 8431 __ vpadd(elem_bt, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8432 %} 8433 ins_pipe( pipe_slow ); 8434 %} 8435 #endif 8436 //-------------------------------- Rearrange ---------------------------------- 8437 8438 // LoadShuffle/Rearrange for Byte 8439 8440 instruct loadShuffleB(vec dst) %{ 8441 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 8442 match(Set dst (VectorLoadShuffle dst)); 8443 format %{ "vector_load_shuffle $dst, $dst" %} 8444 ins_encode %{ 8445 // empty 8446 %} 8447 ins_pipe( pipe_slow ); 8448 %} 8449 8450 instruct rearrangeB(vec dst, vec shuffle) %{ 8451 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8452 Matcher::vector_length(n) < 32); 8453 match(Set dst (VectorRearrange dst shuffle)); 8454 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8455 ins_encode %{ 8456 assert(UseSSE >= 4, "required"); 8457 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8458 %} 8459 ins_pipe( pipe_slow ); 8460 %} 8461 8462 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8463 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8464 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8465 match(Set dst (VectorRearrange src shuffle)); 8466 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8467 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8468 ins_encode %{ 8469 assert(UseAVX >= 2, "required"); 8470 // Swap src into vtmp1 8471 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8472 // Shuffle swapped src to get entries from other 128 bit lane 8473 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8474 // Shuffle original src to get entries from self 128 bit lane 8475 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8476 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8477 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8478 // Perform the blend 8479 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8480 %} 8481 ins_pipe( pipe_slow ); 8482 %} 8483 8484 8485 instruct rearrangeB_evex(vec dst, vec src, vec shuffle, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegI rtmp) %{ 8486 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8487 Matcher::vector_length(n) > 32 && !VM_Version::supports_avx512_vbmi()); 8488 match(Set dst (VectorRearrange src shuffle)); 8489 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 8490 format %{ "vector_rearrange $dst, $shuffle, $src!\t using $xtmp1, $xtmp2, $xtmp3, $rtmp and $ktmp as TEMP" %} 8491 ins_encode %{ 8492 int vlen_enc = vector_length_encoding(this); 8493 __ rearrange_bytes($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, 8494 $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, 8495 $rtmp$$Register, $ktmp$$KRegister, vlen_enc); 8496 %} 8497 ins_pipe( pipe_slow ); 8498 %} 8499 8500 instruct rearrangeB_evex_vbmi(vec dst, vec src, vec shuffle) %{ 8501 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8502 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8503 match(Set dst (VectorRearrange src shuffle)); 8504 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8505 ins_encode %{ 8506 int vlen_enc = vector_length_encoding(this); 8507 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8508 %} 8509 ins_pipe( pipe_slow ); 8510 %} 8511 8512 // LoadShuffle/Rearrange for Short 8513 8514 instruct loadShuffleS(vec dst, vec src, vec vtmp) %{ 8515 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8516 Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS 8517 match(Set dst (VectorLoadShuffle src)); 8518 effect(TEMP dst, TEMP vtmp); 8519 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8520 ins_encode %{ 8521 // Create a byte shuffle mask from short shuffle mask 8522 // only byte shuffle instruction available on these platforms 8523 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8524 if (UseAVX == 0) { 8525 assert(vlen_in_bytes <= 16, "required"); 8526 // Multiply each shuffle by two to get byte index 8527 __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); 8528 __ psllw($vtmp$$XMMRegister, 1); 8529 8530 // Duplicate to create 2 copies of byte index 8531 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8532 __ psllw($dst$$XMMRegister, 8); 8533 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8534 8535 // Add one to get alternate byte index 8536 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), noreg); 8537 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8538 } else { 8539 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8540 int vlen_enc = vector_length_encoding(this); 8541 // Multiply each shuffle by two to get byte index 8542 __ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8543 __ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8544 8545 // Duplicate to create 2 copies of byte index 8546 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8547 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8548 8549 // Add one to get alternate byte index 8550 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, noreg); 8551 } 8552 %} 8553 ins_pipe( pipe_slow ); 8554 %} 8555 8556 instruct rearrangeS(vec dst, vec shuffle) %{ 8557 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8558 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8559 match(Set dst (VectorRearrange dst shuffle)); 8560 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8561 ins_encode %{ 8562 assert(UseSSE >= 4, "required"); 8563 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8564 %} 8565 ins_pipe( pipe_slow ); 8566 %} 8567 8568 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2) %{ 8569 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8570 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8571 match(Set dst (VectorRearrange src shuffle)); 8572 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 8573 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2 as TEMP" %} 8574 ins_encode %{ 8575 assert(UseAVX >= 2, "required"); 8576 // Swap src into vtmp1 8577 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8578 // Shuffle swapped src to get entries from other 128 bit lane 8579 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8580 // Shuffle original src to get entries from self 128 bit lane 8581 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8582 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8583 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, noreg); 8584 // Perform the blend 8585 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8586 %} 8587 ins_pipe( pipe_slow ); 8588 %} 8589 8590 instruct loadShuffleS_evex(vec dst, vec src) %{ 8591 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8592 VM_Version::supports_avx512bw()); 8593 match(Set dst (VectorLoadShuffle src)); 8594 format %{ "vector_load_shuffle $dst, $src" %} 8595 ins_encode %{ 8596 int vlen_enc = vector_length_encoding(this); 8597 if (!VM_Version::supports_avx512vl()) { 8598 vlen_enc = Assembler::AVX_512bit; 8599 } 8600 __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8601 %} 8602 ins_pipe( pipe_slow ); 8603 %} 8604 8605 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8606 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8607 VM_Version::supports_avx512bw()); 8608 match(Set dst (VectorRearrange src shuffle)); 8609 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8610 ins_encode %{ 8611 int vlen_enc = vector_length_encoding(this); 8612 if (!VM_Version::supports_avx512vl()) { 8613 vlen_enc = Assembler::AVX_512bit; 8614 } 8615 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8616 %} 8617 ins_pipe( pipe_slow ); 8618 %} 8619 8620 // LoadShuffle/Rearrange for Integer and Float 8621 8622 instruct loadShuffleI(vec dst, vec src, vec vtmp) %{ 8623 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8624 Matcher::vector_length(n) == 4 && UseAVX < 2); 8625 match(Set dst (VectorLoadShuffle src)); 8626 effect(TEMP dst, TEMP vtmp); 8627 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8628 ins_encode %{ 8629 assert(UseSSE >= 4, "required"); 8630 8631 // Create a byte shuffle mask from int shuffle mask 8632 // only byte shuffle instruction available on these platforms 8633 8634 // Duplicate and multiply each shuffle by 4 8635 __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); 8636 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8637 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8638 __ psllw($vtmp$$XMMRegister, 2); 8639 8640 // Duplicate again to create 4 copies of byte index 8641 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8642 __ psllw($dst$$XMMRegister, 8); 8643 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 8644 8645 // Add 3,2,1,0 to get alternate byte index 8646 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), noreg); 8647 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8648 %} 8649 ins_pipe( pipe_slow ); 8650 %} 8651 8652 instruct rearrangeI(vec dst, vec shuffle) %{ 8653 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8654 Matcher::vector_length(n) == 4 && UseAVX < 2); 8655 match(Set dst (VectorRearrange dst shuffle)); 8656 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8657 ins_encode %{ 8658 assert(UseSSE >= 4, "required"); 8659 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8660 %} 8661 ins_pipe( pipe_slow ); 8662 %} 8663 8664 instruct loadShuffleI_avx(vec dst, vec src) %{ 8665 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8666 UseAVX >= 2); 8667 match(Set dst (VectorLoadShuffle src)); 8668 format %{ "vector_load_shuffle $dst, $src" %} 8669 ins_encode %{ 8670 int vlen_enc = vector_length_encoding(this); 8671 __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8672 %} 8673 ins_pipe( pipe_slow ); 8674 %} 8675 8676 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 8677 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8678 UseAVX >= 2); 8679 match(Set dst (VectorRearrange src shuffle)); 8680 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8681 ins_encode %{ 8682 int vlen_enc = vector_length_encoding(this); 8683 if (vlen_enc == Assembler::AVX_128bit) { 8684 vlen_enc = Assembler::AVX_256bit; 8685 } 8686 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8687 %} 8688 ins_pipe( pipe_slow ); 8689 %} 8690 8691 // LoadShuffle/Rearrange for Long and Double 8692 8693 instruct loadShuffleL(vec dst, vec src, vec vtmp) %{ 8694 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8695 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8696 match(Set dst (VectorLoadShuffle src)); 8697 effect(TEMP dst, TEMP vtmp); 8698 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp as TEMP" %} 8699 ins_encode %{ 8700 assert(UseAVX >= 2, "required"); 8701 8702 int vlen_enc = vector_length_encoding(this); 8703 // Create a double word shuffle mask from long shuffle mask 8704 // only double word shuffle instruction available on these platforms 8705 8706 // Multiply each shuffle by two to get double word index 8707 __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8708 __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8709 8710 // Duplicate each double word shuffle 8711 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 8712 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8713 8714 // Add one to get alternate double word index 8715 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, noreg); 8716 %} 8717 ins_pipe( pipe_slow ); 8718 %} 8719 8720 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 8721 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8722 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8723 match(Set dst (VectorRearrange src shuffle)); 8724 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8725 ins_encode %{ 8726 assert(UseAVX >= 2, "required"); 8727 8728 int vlen_enc = vector_length_encoding(this); 8729 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8730 %} 8731 ins_pipe( pipe_slow ); 8732 %} 8733 8734 instruct loadShuffleL_evex(vec dst, vec src) %{ 8735 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8736 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8737 match(Set dst (VectorLoadShuffle src)); 8738 format %{ "vector_load_shuffle $dst, $src" %} 8739 ins_encode %{ 8740 assert(UseAVX > 2, "required"); 8741 8742 int vlen_enc = vector_length_encoding(this); 8743 __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8744 %} 8745 ins_pipe( pipe_slow ); 8746 %} 8747 8748 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 8749 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8750 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8751 match(Set dst (VectorRearrange src shuffle)); 8752 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8753 ins_encode %{ 8754 assert(UseAVX > 2, "required"); 8755 8756 int vlen_enc = vector_length_encoding(this); 8757 if (vlen_enc == Assembler::AVX_128bit) { 8758 vlen_enc = Assembler::AVX_256bit; 8759 } 8760 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8761 %} 8762 ins_pipe( pipe_slow ); 8763 %} 8764 8765 // --------------------------------- FMA -------------------------------------- 8766 // a * b + c 8767 8768 instruct vfmaF_reg(vec a, vec b, vec c) %{ 8769 match(Set c (FmaVF c (Binary a b))); 8770 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8771 ins_cost(150); 8772 ins_encode %{ 8773 assert(UseFMA, "not enabled"); 8774 int vlen_enc = vector_length_encoding(this); 8775 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8776 %} 8777 ins_pipe( pipe_slow ); 8778 %} 8779 8780 instruct vfmaF_mem(vec a, memory b, vec c) %{ 8781 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8782 match(Set c (FmaVF c (Binary a (LoadVector b)))); 8783 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8784 ins_cost(150); 8785 ins_encode %{ 8786 assert(UseFMA, "not enabled"); 8787 int vlen_enc = vector_length_encoding(this); 8788 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8789 %} 8790 ins_pipe( pipe_slow ); 8791 %} 8792 8793 instruct vfmaD_reg(vec a, vec b, vec c) %{ 8794 match(Set c (FmaVD c (Binary a b))); 8795 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8796 ins_cost(150); 8797 ins_encode %{ 8798 assert(UseFMA, "not enabled"); 8799 int vlen_enc = vector_length_encoding(this); 8800 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8801 %} 8802 ins_pipe( pipe_slow ); 8803 %} 8804 8805 instruct vfmaD_mem(vec a, memory b, vec c) %{ 8806 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8807 match(Set c (FmaVD c (Binary a (LoadVector b)))); 8808 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8809 ins_cost(150); 8810 ins_encode %{ 8811 assert(UseFMA, "not enabled"); 8812 int vlen_enc = vector_length_encoding(this); 8813 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8814 %} 8815 ins_pipe( pipe_slow ); 8816 %} 8817 8818 // --------------------------------- Vector Multiply Add -------------------------------------- 8819 8820 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 8821 predicate(UseAVX == 0); 8822 match(Set dst (MulAddVS2VI dst src1)); 8823 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 8824 ins_encode %{ 8825 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 8826 %} 8827 ins_pipe( pipe_slow ); 8828 %} 8829 8830 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 8831 predicate(UseAVX > 0); 8832 match(Set dst (MulAddVS2VI src1 src2)); 8833 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 8834 ins_encode %{ 8835 int vlen_enc = vector_length_encoding(this); 8836 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8837 %} 8838 ins_pipe( pipe_slow ); 8839 %} 8840 8841 // --------------------------------- Vector Multiply Add Add ---------------------------------- 8842 8843 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 8844 predicate(VM_Version::supports_avx512_vnni()); 8845 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 8846 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 8847 ins_encode %{ 8848 assert(UseAVX > 2, "required"); 8849 int vlen_enc = vector_length_encoding(this); 8850 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8851 %} 8852 ins_pipe( pipe_slow ); 8853 ins_cost(10); 8854 %} 8855 8856 // --------------------------------- PopCount -------------------------------------- 8857 8858 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{ 8859 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8860 match(Set dst (PopCountVI src)); 8861 match(Set dst (PopCountVL src)); 8862 format %{ "vector_popcount_integral $dst, $src" %} 8863 ins_encode %{ 8864 int opcode = this->ideal_Opcode(); 8865 int vlen_enc = vector_length_encoding(this, $src); 8866 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8867 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc); 8868 %} 8869 ins_pipe( pipe_slow ); 8870 %} 8871 8872 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{ 8873 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8874 match(Set dst (PopCountVI src mask)); 8875 match(Set dst (PopCountVL src mask)); 8876 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %} 8877 ins_encode %{ 8878 int vlen_enc = vector_length_encoding(this, $src); 8879 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8880 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8881 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc); 8882 %} 8883 ins_pipe( pipe_slow ); 8884 %} 8885 8886 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{ 8887 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1)))); 8888 match(Set dst (PopCountVI src)); 8889 match(Set dst (PopCountVL src)); 8890 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 8891 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %} 8892 ins_encode %{ 8893 int opcode = this->ideal_Opcode(); 8894 int vlen_enc = vector_length_encoding(this, $src); 8895 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8896 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8897 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc); 8898 %} 8899 ins_pipe( pipe_slow ); 8900 %} 8901 8902 // --------------------------------- Vector Trailing Zeros Count -------------------------------------- 8903 8904 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{ 8905 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 8906 Matcher::vector_length_in_bytes(n->in(1)))); 8907 match(Set dst (CountTrailingZerosV src)); 8908 effect(TEMP dst, TEMP xtmp, TEMP rtmp); 8909 ins_cost(400); 8910 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %} 8911 ins_encode %{ 8912 int vlen_enc = vector_length_encoding(this, $src); 8913 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8914 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 8915 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 8916 %} 8917 ins_pipe( pipe_slow ); 8918 %} 8919 8920 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 8921 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 8922 VM_Version::supports_avx512cd() && 8923 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 8924 match(Set dst (CountTrailingZerosV src)); 8925 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 8926 ins_cost(400); 8927 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %} 8928 ins_encode %{ 8929 int vlen_enc = vector_length_encoding(this, $src); 8930 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8931 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8932 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc); 8933 %} 8934 ins_pipe( pipe_slow ); 8935 %} 8936 8937 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{ 8938 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 8939 match(Set dst (CountTrailingZerosV src)); 8940 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp); 8941 ins_cost(400); 8942 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %} 8943 ins_encode %{ 8944 int vlen_enc = vector_length_encoding(this, $src); 8945 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8946 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8947 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 8948 $ktmp$$KRegister, $rtmp$$Register, vlen_enc); 8949 %} 8950 ins_pipe( pipe_slow ); 8951 %} 8952 8953 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 8954 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 8955 match(Set dst (CountTrailingZerosV src)); 8956 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 8957 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 8958 ins_encode %{ 8959 int vlen_enc = vector_length_encoding(this, $src); 8960 BasicType bt = Matcher::vector_element_basic_type(this, $src); 8961 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 8962 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 8963 %} 8964 ins_pipe( pipe_slow ); 8965 %} 8966 8967 8968 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 8969 8970 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 8971 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 8972 effect(TEMP dst); 8973 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8974 ins_encode %{ 8975 int vector_len = vector_length_encoding(this); 8976 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 8977 %} 8978 ins_pipe( pipe_slow ); 8979 %} 8980 8981 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 8982 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 8983 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 8984 effect(TEMP dst); 8985 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8986 ins_encode %{ 8987 int vector_len = vector_length_encoding(this); 8988 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 8989 %} 8990 ins_pipe( pipe_slow ); 8991 %} 8992 8993 // --------------------------------- Rotation Operations ---------------------------------- 8994 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 8995 match(Set dst (RotateLeftV src shift)); 8996 match(Set dst (RotateRightV src shift)); 8997 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 8998 ins_encode %{ 8999 int opcode = this->ideal_Opcode(); 9000 int vector_len = vector_length_encoding(this); 9001 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9002 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 9003 %} 9004 ins_pipe( pipe_slow ); 9005 %} 9006 9007 instruct vprorate(vec dst, vec src, vec shift) %{ 9008 match(Set dst (RotateLeftV src shift)); 9009 match(Set dst (RotateRightV src shift)); 9010 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 9011 ins_encode %{ 9012 int opcode = this->ideal_Opcode(); 9013 int vector_len = vector_length_encoding(this); 9014 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 9015 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 9016 %} 9017 ins_pipe( pipe_slow ); 9018 %} 9019 9020 // ---------------------------------- Masked Operations ------------------------------------ 9021 instruct vmasked_load_avx_non_subword(vec dst, memory mem, vec mask) %{ 9022 predicate(!n->in(3)->bottom_type()->isa_vectmask()); 9023 match(Set dst (LoadVectorMasked mem mask)); 9024 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9025 ins_encode %{ 9026 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9027 int vlen_enc = vector_length_encoding(this); 9028 __ vmovmask(elmType, $dst$$XMMRegister, $mem$$Address, $mask$$XMMRegister, vlen_enc); 9029 %} 9030 ins_pipe( pipe_slow ); 9031 %} 9032 9033 9034 instruct vmasked_load_evex(vec dst, memory mem, kReg mask) %{ 9035 predicate(n->in(3)->bottom_type()->isa_vectmask()); 9036 match(Set dst (LoadVectorMasked mem mask)); 9037 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 9038 ins_encode %{ 9039 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 9040 int vector_len = vector_length_encoding(this); 9041 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, false, vector_len); 9042 %} 9043 ins_pipe( pipe_slow ); 9044 %} 9045 9046 instruct vmasked_store_avx_non_subword(memory mem, vec src, vec mask) %{ 9047 predicate(!n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9048 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9049 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9050 ins_encode %{ 9051 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9052 int vlen_enc = vector_length_encoding(src_node); 9053 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9054 __ vmovmask(elmType, $mem$$Address, $src$$XMMRegister, $mask$$XMMRegister, vlen_enc); 9055 %} 9056 ins_pipe( pipe_slow ); 9057 %} 9058 9059 instruct vmasked_store_evex(memory mem, vec src, kReg mask) %{ 9060 predicate(n->in(3)->in(2)->bottom_type()->isa_vectmask()); 9061 match(Set mem (StoreVectorMasked mem (Binary src mask))); 9062 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 9063 ins_encode %{ 9064 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 9065 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 9066 int vlen_enc = vector_length_encoding(src_node); 9067 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, true, vlen_enc); 9068 %} 9069 ins_pipe( pipe_slow ); 9070 %} 9071 9072 #ifdef _LP64 9073 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 9074 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 9075 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 9076 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 9077 ins_encode %{ 9078 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 9079 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 9080 9081 Label DONE; 9082 int vlen_enc = vector_length_encoding(this, $src1); 9083 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 9084 9085 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 9086 __ mov64($dst$$Register, -1L); 9087 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 9088 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 9089 __ jccb(Assembler::carrySet, DONE); 9090 __ kmovql($dst$$Register, $ktmp1$$KRegister); 9091 __ notq($dst$$Register); 9092 __ tzcntq($dst$$Register, $dst$$Register); 9093 __ bind(DONE); 9094 %} 9095 ins_pipe( pipe_slow ); 9096 %} 9097 9098 9099 instruct vmask_gen(kReg dst, rRegL len, rRegL temp) %{ 9100 match(Set dst (VectorMaskGen len)); 9101 effect(TEMP temp); 9102 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 9103 ins_encode %{ 9104 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 9105 %} 9106 ins_pipe( pipe_slow ); 9107 %} 9108 9109 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 9110 match(Set dst (VectorMaskGen len)); 9111 format %{ "vector_mask_gen $len \t! vector mask generator" %} 9112 effect(TEMP temp); 9113 ins_encode %{ 9114 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 9115 __ kmovql($dst$$KRegister, $temp$$Register); 9116 %} 9117 ins_pipe( pipe_slow ); 9118 %} 9119 9120 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 9121 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9122 match(Set dst (VectorMaskToLong mask)); 9123 effect(TEMP dst, KILL cr); 9124 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 9125 ins_encode %{ 9126 int opcode = this->ideal_Opcode(); 9127 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9128 int mask_len = Matcher::vector_length(this, $mask); 9129 int mask_size = mask_len * type2aelembytes(mbt); 9130 int vlen_enc = vector_length_encoding(this, $mask); 9131 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9132 $dst$$Register, mask_len, mask_size, vlen_enc); 9133 %} 9134 ins_pipe( pipe_slow ); 9135 %} 9136 9137 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 9138 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL); 9139 match(Set dst (VectorMaskToLong mask)); 9140 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 9141 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9142 ins_encode %{ 9143 int opcode = this->ideal_Opcode(); 9144 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9145 int mask_len = Matcher::vector_length(this, $mask); 9146 int vlen_enc = vector_length_encoding(this, $mask); 9147 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9148 $dst$$Register, mask_len, mbt, vlen_enc); 9149 %} 9150 ins_pipe( pipe_slow ); 9151 %} 9152 9153 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 9154 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL); 9155 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 9156 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 9157 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 9158 ins_encode %{ 9159 int opcode = this->ideal_Opcode(); 9160 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9161 int mask_len = Matcher::vector_length(this, $mask); 9162 int vlen_enc = vector_length_encoding(this, $mask); 9163 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9164 $dst$$Register, mask_len, mbt, vlen_enc); 9165 %} 9166 ins_pipe( pipe_slow ); 9167 %} 9168 9169 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9170 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9171 match(Set dst (VectorMaskTrueCount mask)); 9172 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9173 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 9174 ins_encode %{ 9175 int opcode = this->ideal_Opcode(); 9176 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9177 int mask_len = Matcher::vector_length(this, $mask); 9178 int mask_size = mask_len * type2aelembytes(mbt); 9179 int vlen_enc = vector_length_encoding(this, $mask); 9180 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9181 $tmp$$Register, mask_len, mask_size, vlen_enc); 9182 %} 9183 ins_pipe( pipe_slow ); 9184 %} 9185 9186 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9187 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL); 9188 match(Set dst (VectorMaskTrueCount mask)); 9189 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9190 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9191 ins_encode %{ 9192 int opcode = this->ideal_Opcode(); 9193 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9194 int mask_len = Matcher::vector_length(this, $mask); 9195 int vlen_enc = vector_length_encoding(this, $mask); 9196 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9197 $tmp$$Register, mask_len, mbt, vlen_enc); 9198 %} 9199 ins_pipe( pipe_slow ); 9200 %} 9201 9202 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9203 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL); 9204 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 9205 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9206 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9207 ins_encode %{ 9208 int opcode = this->ideal_Opcode(); 9209 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9210 int mask_len = Matcher::vector_length(this, $mask); 9211 int vlen_enc = vector_length_encoding(this, $mask); 9212 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9213 $tmp$$Register, mask_len, mbt, vlen_enc); 9214 %} 9215 ins_pipe( pipe_slow ); 9216 %} 9217 9218 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 9219 predicate(n->in(1)->bottom_type()->isa_vectmask()); 9220 match(Set dst (VectorMaskFirstTrue mask)); 9221 match(Set dst (VectorMaskLastTrue mask)); 9222 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 9223 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 9224 ins_encode %{ 9225 int opcode = this->ideal_Opcode(); 9226 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9227 int mask_len = Matcher::vector_length(this, $mask); 9228 int mask_size = mask_len * type2aelembytes(mbt); 9229 int vlen_enc = vector_length_encoding(this, $mask); 9230 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 9231 $tmp$$Register, mask_len, mask_size, vlen_enc); 9232 %} 9233 ins_pipe( pipe_slow ); 9234 %} 9235 9236 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9237 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL); 9238 match(Set dst (VectorMaskFirstTrue mask)); 9239 match(Set dst (VectorMaskLastTrue mask)); 9240 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9241 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9242 ins_encode %{ 9243 int opcode = this->ideal_Opcode(); 9244 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9245 int mask_len = Matcher::vector_length(this, $mask); 9246 int vlen_enc = vector_length_encoding(this, $mask); 9247 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9248 $tmp$$Register, mask_len, mbt, vlen_enc); 9249 %} 9250 ins_pipe( pipe_slow ); 9251 %} 9252 9253 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 9254 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL); 9255 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 9256 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 9257 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 9258 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 9259 ins_encode %{ 9260 int opcode = this->ideal_Opcode(); 9261 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 9262 int mask_len = Matcher::vector_length(this, $mask); 9263 int vlen_enc = vector_length_encoding(this, $mask); 9264 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 9265 $tmp$$Register, mask_len, mbt, vlen_enc); 9266 %} 9267 ins_pipe( pipe_slow ); 9268 %} 9269 9270 // --------------------------------- Compress/Expand Operations --------------------------- 9271 9272 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{ 9273 match(Set dst (CompressV src mask)); 9274 match(Set dst (ExpandV src mask)); 9275 format %{ "vector_compress_expand $dst, $src, $mask" %} 9276 ins_encode %{ 9277 int opcode = this->ideal_Opcode(); 9278 int vector_len = vector_length_encoding(this); 9279 BasicType bt = Matcher::vector_element_basic_type(this); 9280 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len); 9281 %} 9282 ins_pipe( pipe_slow ); 9283 %} 9284 9285 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{ 9286 match(Set dst (CompressM mask)); 9287 effect(TEMP rtmp1, TEMP rtmp2, KILL cr); 9288 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %} 9289 ins_encode %{ 9290 assert(this->in(1)->bottom_type()->isa_vectmask(), ""); 9291 int mask_len = Matcher::vector_length(this); 9292 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len); 9293 %} 9294 ins_pipe( pipe_slow ); 9295 %} 9296 9297 #endif // _LP64 9298 9299 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------ 9300 9301 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9302 predicate(!VM_Version::supports_gfni()); 9303 match(Set dst (ReverseV src)); 9304 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9305 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9306 ins_encode %{ 9307 int vec_enc = vector_length_encoding(this); 9308 BasicType bt = Matcher::vector_element_basic_type(this); 9309 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9310 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9311 %} 9312 ins_pipe( pipe_slow ); 9313 %} 9314 9315 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp) %{ 9316 predicate(VM_Version::supports_gfni()); 9317 match(Set dst (ReverseV src)); 9318 effect(TEMP dst, TEMP xtmp); 9319 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $xtmp as TEMP" %} 9320 ins_encode %{ 9321 int vec_enc = vector_length_encoding(this); 9322 BasicType bt = Matcher::vector_element_basic_type(this); 9323 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1)); 9324 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, addr, vec_enc, 9325 $xtmp$$XMMRegister); 9326 %} 9327 ins_pipe( pipe_slow ); 9328 %} 9329 9330 instruct vreverse_byte_reg(vec dst, vec src) %{ 9331 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64); 9332 match(Set dst (ReverseBytesV src)); 9333 effect(TEMP dst); 9334 format %{ "vector_reverse_byte $dst, $src" %} 9335 ins_encode %{ 9336 int vec_enc = vector_length_encoding(this); 9337 BasicType bt = Matcher::vector_element_basic_type(this); 9338 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, vec_enc); 9339 %} 9340 ins_pipe( pipe_slow ); 9341 %} 9342 9343 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{ 9344 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64); 9345 match(Set dst (ReverseBytesV src)); 9346 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp); 9347 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %} 9348 ins_encode %{ 9349 int vec_enc = vector_length_encoding(this); 9350 BasicType bt = Matcher::vector_element_basic_type(this); 9351 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9352 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc); 9353 %} 9354 ins_pipe( pipe_slow ); 9355 %} 9356 9357 // ---------------------------------- Vector Count Leading Zeros ----------------------------------- 9358 9359 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{ 9360 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9361 Matcher::vector_length_in_bytes(n->in(1)))); 9362 match(Set dst (CountLeadingZerosV src)); 9363 format %{ "vector_count_leading_zeros $dst, $src" %} 9364 ins_encode %{ 9365 int vlen_enc = vector_length_encoding(this, $src); 9366 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9367 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, 9368 xnoreg, xnoreg, k0, noreg, true, vlen_enc); 9369 %} 9370 ins_pipe( pipe_slow ); 9371 %} 9372 9373 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{ 9374 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)), 9375 Matcher::vector_length_in_bytes(n->in(1)))); 9376 match(Set dst (CountLeadingZerosV src mask)); 9377 format %{ "vector_count_leading_zeros $dst, $src, $mask" %} 9378 ins_encode %{ 9379 int vlen_enc = vector_length_encoding(this, $src); 9380 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9381 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 9382 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, 9383 xnoreg, $mask$$KRegister, noreg, true, vlen_enc); 9384 %} 9385 ins_pipe( pipe_slow ); 9386 %} 9387 9388 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{ 9389 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT && 9390 VM_Version::supports_avx512cd() && 9391 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64)); 9392 match(Set dst (CountLeadingZerosV src)); 9393 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2); 9394 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %} 9395 ins_encode %{ 9396 int vlen_enc = vector_length_encoding(this, $src); 9397 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9398 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9399 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc); 9400 %} 9401 ins_pipe( pipe_slow ); 9402 %} 9403 9404 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{ 9405 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw()); 9406 match(Set dst (CountLeadingZerosV src)); 9407 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp); 9408 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %} 9409 ins_encode %{ 9410 int vlen_enc = vector_length_encoding(this, $src); 9411 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9412 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9413 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister, 9414 $rtmp$$Register, true, vlen_enc); 9415 %} 9416 ins_pipe( pipe_slow ); 9417 %} 9418 9419 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{ 9420 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT && 9421 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9422 match(Set dst (CountLeadingZerosV src)); 9423 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3); 9424 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %} 9425 ins_encode %{ 9426 int vlen_enc = vector_length_encoding(this, $src); 9427 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9428 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9429 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc); 9430 %} 9431 ins_pipe( pipe_slow ); 9432 %} 9433 9434 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{ 9435 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT && 9436 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64); 9437 match(Set dst (CountLeadingZerosV src)); 9438 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp); 9439 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 9440 ins_encode %{ 9441 int vlen_enc = vector_length_encoding(this, $src); 9442 BasicType bt = Matcher::vector_element_basic_type(this, $src); 9443 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 9444 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 9445 %} 9446 ins_pipe( pipe_slow ); 9447 %} 9448 9449 // ---------------------------------- Vector Masked Operations ------------------------------------ 9450 9451 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 9452 match(Set dst (AddVB (Binary dst src2) mask)); 9453 match(Set dst (AddVS (Binary dst src2) mask)); 9454 match(Set dst (AddVI (Binary dst src2) mask)); 9455 match(Set dst (AddVL (Binary dst src2) mask)); 9456 match(Set dst (AddVF (Binary dst src2) mask)); 9457 match(Set dst (AddVD (Binary dst src2) mask)); 9458 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9459 ins_encode %{ 9460 int vlen_enc = vector_length_encoding(this); 9461 BasicType bt = Matcher::vector_element_basic_type(this); 9462 int opc = this->ideal_Opcode(); 9463 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9464 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9465 %} 9466 ins_pipe( pipe_slow ); 9467 %} 9468 9469 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 9470 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 9471 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 9472 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 9473 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 9474 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 9475 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9476 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9477 ins_encode %{ 9478 int vlen_enc = vector_length_encoding(this); 9479 BasicType bt = Matcher::vector_element_basic_type(this); 9480 int opc = this->ideal_Opcode(); 9481 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9482 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9483 %} 9484 ins_pipe( pipe_slow ); 9485 %} 9486 9487 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9488 match(Set dst (XorV (Binary dst src2) mask)); 9489 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9490 ins_encode %{ 9491 int vlen_enc = vector_length_encoding(this); 9492 BasicType bt = Matcher::vector_element_basic_type(this); 9493 int opc = this->ideal_Opcode(); 9494 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9495 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9496 %} 9497 ins_pipe( pipe_slow ); 9498 %} 9499 9500 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9501 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9502 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9503 ins_encode %{ 9504 int vlen_enc = vector_length_encoding(this); 9505 BasicType bt = Matcher::vector_element_basic_type(this); 9506 int opc = this->ideal_Opcode(); 9507 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9508 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9509 %} 9510 ins_pipe( pipe_slow ); 9511 %} 9512 9513 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9514 match(Set dst (OrV (Binary dst src2) mask)); 9515 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9516 ins_encode %{ 9517 int vlen_enc = vector_length_encoding(this); 9518 BasicType bt = Matcher::vector_element_basic_type(this); 9519 int opc = this->ideal_Opcode(); 9520 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9521 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9522 %} 9523 ins_pipe( pipe_slow ); 9524 %} 9525 9526 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9527 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9528 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9529 ins_encode %{ 9530 int vlen_enc = vector_length_encoding(this); 9531 BasicType bt = Matcher::vector_element_basic_type(this); 9532 int opc = this->ideal_Opcode(); 9533 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9534 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9535 %} 9536 ins_pipe( pipe_slow ); 9537 %} 9538 9539 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9540 match(Set dst (AndV (Binary dst src2) mask)); 9541 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9542 ins_encode %{ 9543 int vlen_enc = vector_length_encoding(this); 9544 BasicType bt = Matcher::vector_element_basic_type(this); 9545 int opc = this->ideal_Opcode(); 9546 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9547 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9548 %} 9549 ins_pipe( pipe_slow ); 9550 %} 9551 9552 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9553 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9554 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9555 ins_encode %{ 9556 int vlen_enc = vector_length_encoding(this); 9557 BasicType bt = Matcher::vector_element_basic_type(this); 9558 int opc = this->ideal_Opcode(); 9559 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9560 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9561 %} 9562 ins_pipe( pipe_slow ); 9563 %} 9564 9565 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9566 match(Set dst (SubVB (Binary dst src2) mask)); 9567 match(Set dst (SubVS (Binary dst src2) mask)); 9568 match(Set dst (SubVI (Binary dst src2) mask)); 9569 match(Set dst (SubVL (Binary dst src2) mask)); 9570 match(Set dst (SubVF (Binary dst src2) mask)); 9571 match(Set dst (SubVD (Binary dst src2) mask)); 9572 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9573 ins_encode %{ 9574 int vlen_enc = vector_length_encoding(this); 9575 BasicType bt = Matcher::vector_element_basic_type(this); 9576 int opc = this->ideal_Opcode(); 9577 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9578 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9579 %} 9580 ins_pipe( pipe_slow ); 9581 %} 9582 9583 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9584 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9585 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9586 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9587 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9588 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9589 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9590 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9591 ins_encode %{ 9592 int vlen_enc = vector_length_encoding(this); 9593 BasicType bt = Matcher::vector_element_basic_type(this); 9594 int opc = this->ideal_Opcode(); 9595 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9596 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9597 %} 9598 ins_pipe( pipe_slow ); 9599 %} 9600 9601 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9602 match(Set dst (MulVS (Binary dst src2) mask)); 9603 match(Set dst (MulVI (Binary dst src2) mask)); 9604 match(Set dst (MulVL (Binary dst src2) mask)); 9605 match(Set dst (MulVF (Binary dst src2) mask)); 9606 match(Set dst (MulVD (Binary dst src2) mask)); 9607 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9608 ins_encode %{ 9609 int vlen_enc = vector_length_encoding(this); 9610 BasicType bt = Matcher::vector_element_basic_type(this); 9611 int opc = this->ideal_Opcode(); 9612 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9613 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9614 %} 9615 ins_pipe( pipe_slow ); 9616 %} 9617 9618 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 9619 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 9620 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 9621 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 9622 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 9623 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 9624 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9625 ins_encode %{ 9626 int vlen_enc = vector_length_encoding(this); 9627 BasicType bt = Matcher::vector_element_basic_type(this); 9628 int opc = this->ideal_Opcode(); 9629 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9630 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9631 %} 9632 ins_pipe( pipe_slow ); 9633 %} 9634 9635 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 9636 match(Set dst (SqrtVF dst mask)); 9637 match(Set dst (SqrtVD dst mask)); 9638 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 9639 ins_encode %{ 9640 int vlen_enc = vector_length_encoding(this); 9641 BasicType bt = Matcher::vector_element_basic_type(this); 9642 int opc = this->ideal_Opcode(); 9643 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9644 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9645 %} 9646 ins_pipe( pipe_slow ); 9647 %} 9648 9649 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 9650 match(Set dst (DivVF (Binary dst src2) mask)); 9651 match(Set dst (DivVD (Binary dst src2) mask)); 9652 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9653 ins_encode %{ 9654 int vlen_enc = vector_length_encoding(this); 9655 BasicType bt = Matcher::vector_element_basic_type(this); 9656 int opc = this->ideal_Opcode(); 9657 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9658 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9659 %} 9660 ins_pipe( pipe_slow ); 9661 %} 9662 9663 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 9664 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 9665 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 9666 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9667 ins_encode %{ 9668 int vlen_enc = vector_length_encoding(this); 9669 BasicType bt = Matcher::vector_element_basic_type(this); 9670 int opc = this->ideal_Opcode(); 9671 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9672 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9673 %} 9674 ins_pipe( pipe_slow ); 9675 %} 9676 9677 9678 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9679 match(Set dst (RotateLeftV (Binary dst shift) mask)); 9680 match(Set dst (RotateRightV (Binary dst shift) mask)); 9681 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 9682 ins_encode %{ 9683 int vlen_enc = vector_length_encoding(this); 9684 BasicType bt = Matcher::vector_element_basic_type(this); 9685 int opc = this->ideal_Opcode(); 9686 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9687 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9688 %} 9689 ins_pipe( pipe_slow ); 9690 %} 9691 9692 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 9693 match(Set dst (RotateLeftV (Binary dst src2) mask)); 9694 match(Set dst (RotateRightV (Binary dst src2) mask)); 9695 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 9696 ins_encode %{ 9697 int vlen_enc = vector_length_encoding(this); 9698 BasicType bt = Matcher::vector_element_basic_type(this); 9699 int opc = this->ideal_Opcode(); 9700 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9701 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9702 %} 9703 ins_pipe( pipe_slow ); 9704 %} 9705 9706 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9707 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 9708 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 9709 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 9710 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 9711 ins_encode %{ 9712 int vlen_enc = vector_length_encoding(this); 9713 BasicType bt = Matcher::vector_element_basic_type(this); 9714 int opc = this->ideal_Opcode(); 9715 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9716 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9717 %} 9718 ins_pipe( pipe_slow ); 9719 %} 9720 9721 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9722 predicate(!n->as_ShiftV()->is_var_shift()); 9723 match(Set dst (LShiftVS (Binary dst src2) mask)); 9724 match(Set dst (LShiftVI (Binary dst src2) mask)); 9725 match(Set dst (LShiftVL (Binary dst src2) mask)); 9726 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9727 ins_encode %{ 9728 int vlen_enc = vector_length_encoding(this); 9729 BasicType bt = Matcher::vector_element_basic_type(this); 9730 int opc = this->ideal_Opcode(); 9731 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9732 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9733 %} 9734 ins_pipe( pipe_slow ); 9735 %} 9736 9737 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9738 predicate(n->as_ShiftV()->is_var_shift()); 9739 match(Set dst (LShiftVS (Binary dst src2) mask)); 9740 match(Set dst (LShiftVI (Binary dst src2) mask)); 9741 match(Set dst (LShiftVL (Binary dst src2) mask)); 9742 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9743 ins_encode %{ 9744 int vlen_enc = vector_length_encoding(this); 9745 BasicType bt = Matcher::vector_element_basic_type(this); 9746 int opc = this->ideal_Opcode(); 9747 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9748 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9749 %} 9750 ins_pipe( pipe_slow ); 9751 %} 9752 9753 instruct vlshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9754 match(Set dst (LShiftVS (Binary dst (LoadVector src2)) mask)); 9755 match(Set dst (LShiftVI (Binary dst (LoadVector src2)) mask)); 9756 match(Set dst (LShiftVL (Binary dst (LoadVector src2)) mask)); 9757 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9758 ins_encode %{ 9759 int vlen_enc = vector_length_encoding(this); 9760 BasicType bt = Matcher::vector_element_basic_type(this); 9761 int opc = this->ideal_Opcode(); 9762 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9763 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9764 %} 9765 ins_pipe( pipe_slow ); 9766 %} 9767 9768 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9769 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 9770 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 9771 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 9772 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 9773 ins_encode %{ 9774 int vlen_enc = vector_length_encoding(this); 9775 BasicType bt = Matcher::vector_element_basic_type(this); 9776 int opc = this->ideal_Opcode(); 9777 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9778 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9779 %} 9780 ins_pipe( pipe_slow ); 9781 %} 9782 9783 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9784 predicate(!n->as_ShiftV()->is_var_shift()); 9785 match(Set dst (RShiftVS (Binary dst src2) mask)); 9786 match(Set dst (RShiftVI (Binary dst src2) mask)); 9787 match(Set dst (RShiftVL (Binary dst src2) mask)); 9788 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9789 ins_encode %{ 9790 int vlen_enc = vector_length_encoding(this); 9791 BasicType bt = Matcher::vector_element_basic_type(this); 9792 int opc = this->ideal_Opcode(); 9793 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9794 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9795 %} 9796 ins_pipe( pipe_slow ); 9797 %} 9798 9799 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9800 predicate(n->as_ShiftV()->is_var_shift()); 9801 match(Set dst (RShiftVS (Binary dst src2) mask)); 9802 match(Set dst (RShiftVI (Binary dst src2) mask)); 9803 match(Set dst (RShiftVL (Binary dst src2) mask)); 9804 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9805 ins_encode %{ 9806 int vlen_enc = vector_length_encoding(this); 9807 BasicType bt = Matcher::vector_element_basic_type(this); 9808 int opc = this->ideal_Opcode(); 9809 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9810 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9811 %} 9812 ins_pipe( pipe_slow ); 9813 %} 9814 9815 instruct vrshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9816 match(Set dst (RShiftVS (Binary dst (LoadVector src2)) mask)); 9817 match(Set dst (RShiftVI (Binary dst (LoadVector src2)) mask)); 9818 match(Set dst (RShiftVL (Binary dst (LoadVector src2)) mask)); 9819 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9820 ins_encode %{ 9821 int vlen_enc = vector_length_encoding(this); 9822 BasicType bt = Matcher::vector_element_basic_type(this); 9823 int opc = this->ideal_Opcode(); 9824 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9825 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9826 %} 9827 ins_pipe( pipe_slow ); 9828 %} 9829 9830 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9831 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 9832 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 9833 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 9834 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 9835 ins_encode %{ 9836 int vlen_enc = vector_length_encoding(this); 9837 BasicType bt = Matcher::vector_element_basic_type(this); 9838 int opc = this->ideal_Opcode(); 9839 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9840 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9841 %} 9842 ins_pipe( pipe_slow ); 9843 %} 9844 9845 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9846 predicate(!n->as_ShiftV()->is_var_shift()); 9847 match(Set dst (URShiftVS (Binary dst src2) mask)); 9848 match(Set dst (URShiftVI (Binary dst src2) mask)); 9849 match(Set dst (URShiftVL (Binary dst src2) mask)); 9850 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9851 ins_encode %{ 9852 int vlen_enc = vector_length_encoding(this); 9853 BasicType bt = Matcher::vector_element_basic_type(this); 9854 int opc = this->ideal_Opcode(); 9855 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9856 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9857 %} 9858 ins_pipe( pipe_slow ); 9859 %} 9860 9861 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9862 predicate(n->as_ShiftV()->is_var_shift()); 9863 match(Set dst (URShiftVS (Binary dst src2) mask)); 9864 match(Set dst (URShiftVI (Binary dst src2) mask)); 9865 match(Set dst (URShiftVL (Binary dst src2) mask)); 9866 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9867 ins_encode %{ 9868 int vlen_enc = vector_length_encoding(this); 9869 BasicType bt = Matcher::vector_element_basic_type(this); 9870 int opc = this->ideal_Opcode(); 9871 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9872 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9873 %} 9874 ins_pipe( pipe_slow ); 9875 %} 9876 9877 instruct vurshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9878 match(Set dst (URShiftVS (Binary dst (LoadVector src2)) mask)); 9879 match(Set dst (URShiftVI (Binary dst (LoadVector src2)) mask)); 9880 match(Set dst (URShiftVL (Binary dst (LoadVector src2)) mask)); 9881 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9882 ins_encode %{ 9883 int vlen_enc = vector_length_encoding(this); 9884 BasicType bt = Matcher::vector_element_basic_type(this); 9885 int opc = this->ideal_Opcode(); 9886 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9887 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9888 %} 9889 ins_pipe( pipe_slow ); 9890 %} 9891 9892 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 9893 match(Set dst (MaxV (Binary dst src2) mask)); 9894 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 9895 ins_encode %{ 9896 int vlen_enc = vector_length_encoding(this); 9897 BasicType bt = Matcher::vector_element_basic_type(this); 9898 int opc = this->ideal_Opcode(); 9899 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9900 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9901 %} 9902 ins_pipe( pipe_slow ); 9903 %} 9904 9905 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 9906 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 9907 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 9908 ins_encode %{ 9909 int vlen_enc = vector_length_encoding(this); 9910 BasicType bt = Matcher::vector_element_basic_type(this); 9911 int opc = this->ideal_Opcode(); 9912 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9913 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9914 %} 9915 ins_pipe( pipe_slow ); 9916 %} 9917 9918 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 9919 match(Set dst (MinV (Binary dst src2) mask)); 9920 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 9921 ins_encode %{ 9922 int vlen_enc = vector_length_encoding(this); 9923 BasicType bt = Matcher::vector_element_basic_type(this); 9924 int opc = this->ideal_Opcode(); 9925 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9926 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9927 %} 9928 ins_pipe( pipe_slow ); 9929 %} 9930 9931 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 9932 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 9933 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 9934 ins_encode %{ 9935 int vlen_enc = vector_length_encoding(this); 9936 BasicType bt = Matcher::vector_element_basic_type(this); 9937 int opc = this->ideal_Opcode(); 9938 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9939 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9940 %} 9941 ins_pipe( pipe_slow ); 9942 %} 9943 9944 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 9945 match(Set dst (VectorRearrange (Binary dst src2) mask)); 9946 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 9947 ins_encode %{ 9948 int vlen_enc = vector_length_encoding(this); 9949 BasicType bt = Matcher::vector_element_basic_type(this); 9950 int opc = this->ideal_Opcode(); 9951 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9952 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 9953 %} 9954 ins_pipe( pipe_slow ); 9955 %} 9956 9957 instruct vabs_masked(vec dst, kReg mask) %{ 9958 match(Set dst (AbsVB dst mask)); 9959 match(Set dst (AbsVS dst mask)); 9960 match(Set dst (AbsVI dst mask)); 9961 match(Set dst (AbsVL dst mask)); 9962 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 9963 ins_encode %{ 9964 int vlen_enc = vector_length_encoding(this); 9965 BasicType bt = Matcher::vector_element_basic_type(this); 9966 int opc = this->ideal_Opcode(); 9967 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9968 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9969 %} 9970 ins_pipe( pipe_slow ); 9971 %} 9972 9973 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 9974 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 9975 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 9976 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 9977 ins_encode %{ 9978 int vlen_enc = vector_length_encoding(this); 9979 BasicType bt = Matcher::vector_element_basic_type(this); 9980 int opc = this->ideal_Opcode(); 9981 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9982 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 9983 %} 9984 ins_pipe( pipe_slow ); 9985 %} 9986 9987 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 9988 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 9989 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 9990 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 9991 ins_encode %{ 9992 int vlen_enc = vector_length_encoding(this); 9993 BasicType bt = Matcher::vector_element_basic_type(this); 9994 int opc = this->ideal_Opcode(); 9995 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9996 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 9997 %} 9998 ins_pipe( pipe_slow ); 9999 %} 10000 10001 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask) %{ 10002 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 10003 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask" %} 10004 ins_encode %{ 10005 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 10006 int vlen_enc = vector_length_encoding(this, $src1); 10007 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 10008 10009 // Comparison i 10010 switch (src1_elem_bt) { 10011 case T_BYTE: { 10012 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 10013 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10014 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10015 break; 10016 } 10017 case T_SHORT: { 10018 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 10019 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10020 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10021 break; 10022 } 10023 case T_INT: { 10024 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 10025 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10026 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10027 break; 10028 } 10029 case T_LONG: { 10030 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 10031 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 10032 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 10033 break; 10034 } 10035 case T_FLOAT: { 10036 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10037 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10038 break; 10039 } 10040 case T_DOUBLE: { 10041 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 10042 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 10043 break; 10044 } 10045 default: assert(false, "%s", type2name(src1_elem_bt)); break; 10046 } 10047 %} 10048 ins_pipe( pipe_slow ); 10049 %} 10050 10051 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 10052 predicate(Matcher::vector_length(n) <= 32); 10053 match(Set dst (MaskAll src)); 10054 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 10055 ins_encode %{ 10056 int mask_len = Matcher::vector_length(this); 10057 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 10058 %} 10059 ins_pipe( pipe_slow ); 10060 %} 10061 10062 #ifdef _LP64 10063 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 10064 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 10065 match(Set dst (XorVMask src (MaskAll cnt))); 10066 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 10067 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 10068 ins_encode %{ 10069 uint masklen = Matcher::vector_length(this); 10070 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 10071 %} 10072 ins_pipe( pipe_slow ); 10073 %} 10074 10075 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 10076 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 10077 (Matcher::vector_length(n) == 16) || 10078 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 10079 match(Set dst (XorVMask src (MaskAll cnt))); 10080 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 10081 ins_encode %{ 10082 uint masklen = Matcher::vector_length(this); 10083 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 10084 %} 10085 ins_pipe( pipe_slow ); 10086 %} 10087 10088 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 10089 predicate(n->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length(n) <= 8); 10090 match(Set dst (VectorLongToMask src)); 10091 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 10092 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 10093 ins_encode %{ 10094 int mask_len = Matcher::vector_length(this); 10095 int vec_enc = vector_length_encoding(mask_len); 10096 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10097 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 10098 %} 10099 ins_pipe( pipe_slow ); 10100 %} 10101 10102 10103 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 10104 predicate(n->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length(n) > 8); 10105 match(Set dst (VectorLongToMask src)); 10106 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 10107 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 10108 ins_encode %{ 10109 int mask_len = Matcher::vector_length(this); 10110 assert(mask_len <= 32, "invalid mask length"); 10111 int vec_enc = vector_length_encoding(mask_len); 10112 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 10113 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 10114 %} 10115 ins_pipe( pipe_slow ); 10116 %} 10117 10118 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 10119 predicate(n->bottom_type()->isa_vectmask()); 10120 match(Set dst (VectorLongToMask src)); 10121 format %{ "long_to_mask_evex $dst, $src\t!" %} 10122 ins_encode %{ 10123 __ kmov($dst$$KRegister, $src$$Register); 10124 %} 10125 ins_pipe( pipe_slow ); 10126 %} 10127 #endif 10128 10129 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 10130 match(Set dst (AndVMask src1 src2)); 10131 match(Set dst (OrVMask src1 src2)); 10132 match(Set dst (XorVMask src1 src2)); 10133 effect(TEMP kscratch); 10134 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 10135 ins_encode %{ 10136 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 10137 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 10138 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 10139 uint masklen = Matcher::vector_length(this); 10140 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 10141 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 10142 %} 10143 ins_pipe( pipe_slow ); 10144 %} 10145 10146 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 10147 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10148 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10149 ins_encode %{ 10150 int vlen_enc = vector_length_encoding(this); 10151 BasicType bt = Matcher::vector_element_basic_type(this); 10152 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10153 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 10154 %} 10155 ins_pipe( pipe_slow ); 10156 %} 10157 10158 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 10159 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 10160 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 10161 ins_encode %{ 10162 int vlen_enc = vector_length_encoding(this); 10163 BasicType bt = Matcher::vector_element_basic_type(this); 10164 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 10165 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 10166 %} 10167 ins_pipe( pipe_slow ); 10168 %} 10169 10170 instruct castMM(kReg dst) 10171 %{ 10172 match(Set dst (CastVV dst)); 10173 10174 size(0); 10175 format %{ "# castVV of $dst" %} 10176 ins_encode(/* empty encoding */); 10177 ins_cost(0); 10178 ins_pipe(empty); 10179 %} 10180 10181 instruct castVV(vec dst) 10182 %{ 10183 match(Set dst (CastVV dst)); 10184 10185 size(0); 10186 format %{ "# castVV of $dst" %} 10187 ins_encode(/* empty encoding */); 10188 ins_cost(0); 10189 ins_pipe(empty); 10190 %} 10191 10192 instruct castVVLeg(legVec dst) 10193 %{ 10194 match(Set dst (CastVV dst)); 10195 10196 size(0); 10197 format %{ "# castVV of $dst" %} 10198 ins_encode(/* empty encoding */); 10199 ins_cost(0); 10200 ins_pipe(empty); 10201 %} 10202 10203 instruct FloatClassCheck_reg_reg_vfpclass(rRegI dst, regF src, kReg ktmp, rFlagsReg cr) 10204 %{ 10205 match(Set dst (IsInfiniteF src)); 10206 effect(TEMP ktmp, KILL cr); 10207 format %{ "float_class_check $dst, $src" %} 10208 ins_encode %{ 10209 __ vfpclassss($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10210 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10211 %} 10212 ins_pipe(pipe_slow); 10213 %} 10214 10215 instruct DoubleClassCheck_reg_reg_vfpclass(rRegI dst, regD src, kReg ktmp, rFlagsReg cr) 10216 %{ 10217 match(Set dst (IsInfiniteD src)); 10218 effect(TEMP ktmp, KILL cr); 10219 format %{ "double_class_check $dst, $src" %} 10220 ins_encode %{ 10221 __ vfpclasssd($ktmp$$KRegister, $src$$XMMRegister, 0x18); 10222 __ kmovbl($dst$$Register, $ktmp$$KRegister); 10223 %} 10224 ins_pipe(pipe_slow); 10225 %} 10226 10227