1 // 2 // Copyright (c) 2011, 2022, Oracle and/or its affiliates. All rights reserved. 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 // 5 // This code is free software; you can redistribute it and/or modify it 6 // under the terms of the GNU General Public License version 2 only, as 7 // published by the Free Software Foundation. 8 // 9 // This code is distributed in the hope that it will be useful, but WITHOUT 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 // version 2 for more details (a copy is included in the LICENSE file that 13 // accompanied this code). 14 // 15 // You should have received a copy of the GNU General Public License version 16 // 2 along with this work; if not, write to the Free Software Foundation, 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 // 19 // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 // or visit www.oracle.com if you need additional information or have any 21 // questions. 22 // 23 // 24 25 // X86 Common Architecture Description File 26 27 //----------REGISTER DEFINITION BLOCK------------------------------------------ 28 // This information is used by the matcher and the register allocator to 29 // describe individual registers and classes of registers within the target 30 // architecture. 31 32 register %{ 33 //----------Architecture Description Register Definitions---------------------- 34 // General Registers 35 // "reg_def" name ( register save type, C convention save type, 36 // ideal register type, encoding ); 37 // Register Save Types: 38 // 39 // NS = No-Save: The register allocator assumes that these registers 40 // can be used without saving upon entry to the method, & 41 // that they do not need to be saved at call sites. 42 // 43 // SOC = Save-On-Call: The register allocator assumes that these registers 44 // can be used without saving upon entry to the method, 45 // but that they must be saved at call sites. 46 // 47 // SOE = Save-On-Entry: The register allocator assumes that these registers 48 // must be saved before using them upon entry to the 49 // method, but they do not need to be saved at call 50 // sites. 51 // 52 // AS = Always-Save: The register allocator assumes that these registers 53 // must be saved before using them upon entry to the 54 // method, & that they must be saved at call sites. 55 // 56 // Ideal Register Type is used to determine how to save & restore a 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI. 59 // 60 // The encoding number is the actual bit-pattern placed into the opcodes. 61 62 // XMM registers. 512-bit registers or 8 words each, labeled (a)-p. 63 // Word a in each register holds a Float, words ab hold a Double. 64 // The whole registers are used in SSE4.2 version intrinsics, 65 // array copy stubs and superword operations (see UseSSE42Intrinsics, 66 // UseXMMForArrayCopy and UseSuperword flags). 67 // For pre EVEX enabled architectures: 68 // XMM8-XMM15 must be encoded with REX (VEX for UseAVX) 69 // For EVEX enabled architectures: 70 // XMM8-XMM31 must be encoded with REX (EVEX for UseAVX). 71 // 72 // Linux ABI: No register preserved across function calls 73 // XMM0-XMM7 might hold parameters 74 // Windows ABI: XMM6-XMM31 preserved across function calls 75 // XMM0-XMM3 might hold parameters 76 77 reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()); 78 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1)); 79 reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2)); 80 reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3)); 81 reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4)); 82 reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5)); 83 reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6)); 84 reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7)); 85 reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8)); 86 reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9)); 87 reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10)); 88 reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11)); 89 reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12)); 90 reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13)); 91 reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14)); 92 reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15)); 93 94 reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()); 95 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1)); 96 reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2)); 97 reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3)); 98 reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4)); 99 reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5)); 100 reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6)); 101 reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7)); 102 reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8)); 103 reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9)); 104 reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10)); 105 reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11)); 106 reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12)); 107 reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13)); 108 reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14)); 109 reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15)); 110 111 reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()); 112 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1)); 113 reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2)); 114 reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3)); 115 reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4)); 116 reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5)); 117 reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6)); 118 reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7)); 119 reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8)); 120 reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9)); 121 reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10)); 122 reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11)); 123 reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12)); 124 reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13)); 125 reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14)); 126 reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15)); 127 128 reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()); 129 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1)); 130 reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2)); 131 reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3)); 132 reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4)); 133 reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5)); 134 reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6)); 135 reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7)); 136 reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8)); 137 reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9)); 138 reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10)); 139 reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11)); 140 reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12)); 141 reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13)); 142 reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14)); 143 reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15)); 144 145 reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()); 146 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1)); 147 reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2)); 148 reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3)); 149 reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4)); 150 reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5)); 151 reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6)); 152 reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7)); 153 reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8)); 154 reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9)); 155 reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10)); 156 reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11)); 157 reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12)); 158 reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13)); 159 reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14)); 160 reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15)); 161 162 reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()); 163 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1)); 164 reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2)); 165 reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3)); 166 reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4)); 167 reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5)); 168 reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6)); 169 reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7)); 170 reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8)); 171 reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9)); 172 reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10)); 173 reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11)); 174 reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12)); 175 reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13)); 176 reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14)); 177 reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15)); 178 179 reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()); 180 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1)); 181 reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2)); 182 reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3)); 183 reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4)); 184 reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5)); 185 reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6)); 186 reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7)); 187 reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8)); 188 reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9)); 189 reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10)); 190 reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11)); 191 reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12)); 192 reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13)); 193 reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14)); 194 reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15)); 195 196 reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()); 197 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1)); 198 reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2)); 199 reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3)); 200 reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4)); 201 reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5)); 202 reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6)); 203 reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7)); 204 reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8)); 205 reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9)); 206 reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10)); 207 reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11)); 208 reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12)); 209 reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13)); 210 reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14)); 211 reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15)); 212 213 #ifdef _LP64 214 215 reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()); 216 reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1)); 217 reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2)); 218 reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3)); 219 reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4)); 220 reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5)); 221 reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6)); 222 reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7)); 223 reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8)); 224 reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9)); 225 reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10)); 226 reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11)); 227 reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12)); 228 reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13)); 229 reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14)); 230 reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15)); 231 232 reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()); 233 reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1)); 234 reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2)); 235 reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3)); 236 reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4)); 237 reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5)); 238 reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6)); 239 reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7)); 240 reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8)); 241 reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9)); 242 reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10)); 243 reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11)); 244 reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12)); 245 reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13)); 246 reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14)); 247 reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15)); 248 249 reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()); 250 reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1)); 251 reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2)); 252 reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3)); 253 reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4)); 254 reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5)); 255 reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6)); 256 reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7)); 257 reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8)); 258 reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9)); 259 reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10)); 260 reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11)); 261 reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12)); 262 reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13)); 263 reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14)); 264 reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15)); 265 266 reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()); 267 reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1)); 268 reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2)); 269 reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3)); 270 reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4)); 271 reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5)); 272 reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6)); 273 reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7)); 274 reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8)); 275 reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9)); 276 reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10)); 277 reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11)); 278 reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12)); 279 reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13)); 280 reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14)); 281 reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15)); 282 283 reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()); 284 reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1)); 285 reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2)); 286 reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3)); 287 reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4)); 288 reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5)); 289 reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6)); 290 reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7)); 291 reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8)); 292 reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9)); 293 reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10)); 294 reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11)); 295 reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12)); 296 reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13)); 297 reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14)); 298 reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15)); 299 300 reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()); 301 reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1)); 302 reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2)); 303 reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3)); 304 reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4)); 305 reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5)); 306 reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6)); 307 reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7)); 308 reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8)); 309 reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9)); 310 reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10)); 311 reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11)); 312 reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12)); 313 reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13)); 314 reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14)); 315 reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15)); 316 317 reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()); 318 reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1)); 319 reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2)); 320 reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3)); 321 reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4)); 322 reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5)); 323 reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6)); 324 reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7)); 325 reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8)); 326 reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9)); 327 reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10)); 328 reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11)); 329 reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12)); 330 reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13)); 331 reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14)); 332 reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15)); 333 334 reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()); 335 reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1)); 336 reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2)); 337 reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3)); 338 reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4)); 339 reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5)); 340 reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6)); 341 reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7)); 342 reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8)); 343 reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9)); 344 reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10)); 345 reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11)); 346 reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12)); 347 reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13)); 348 reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14)); 349 reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15)); 350 351 reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()); 352 reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1)); 353 reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2)); 354 reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3)); 355 reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4)); 356 reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5)); 357 reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6)); 358 reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7)); 359 reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8)); 360 reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9)); 361 reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10)); 362 reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11)); 363 reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12)); 364 reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13)); 365 reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14)); 366 reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15)); 367 368 reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()); 369 reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1)); 370 reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2)); 371 reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3)); 372 reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4)); 373 reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5)); 374 reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6)); 375 reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7)); 376 reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8)); 377 reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9)); 378 reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10)); 379 reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11)); 380 reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12)); 381 reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13)); 382 reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14)); 383 reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15)); 384 385 reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()); 386 reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1)); 387 reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2)); 388 reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3)); 389 reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4)); 390 reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5)); 391 reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6)); 392 reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7)); 393 reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8)); 394 reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9)); 395 reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10)); 396 reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11)); 397 reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12)); 398 reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13)); 399 reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14)); 400 reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15)); 401 402 reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()); 403 reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1)); 404 reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2)); 405 reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3)); 406 reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4)); 407 reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5)); 408 reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6)); 409 reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7)); 410 reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8)); 411 reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9)); 412 reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10)); 413 reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11)); 414 reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12)); 415 reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13)); 416 reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14)); 417 reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15)); 418 419 reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()); 420 reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1)); 421 reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2)); 422 reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3)); 423 reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4)); 424 reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5)); 425 reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6)); 426 reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7)); 427 reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8)); 428 reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9)); 429 reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10)); 430 reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11)); 431 reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12)); 432 reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13)); 433 reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14)); 434 reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15)); 435 436 reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()); 437 reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1)); 438 reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2)); 439 reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3)); 440 reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4)); 441 reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5)); 442 reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6)); 443 reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7)); 444 reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8)); 445 reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9)); 446 reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10)); 447 reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11)); 448 reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12)); 449 reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13)); 450 reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14)); 451 reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15)); 452 453 reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()); 454 reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1)); 455 reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2)); 456 reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3)); 457 reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4)); 458 reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5)); 459 reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6)); 460 reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7)); 461 reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8)); 462 reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9)); 463 reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10)); 464 reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11)); 465 reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12)); 466 reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13)); 467 reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14)); 468 reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15)); 469 470 reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()); 471 reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1)); 472 reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2)); 473 reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3)); 474 reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4)); 475 reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5)); 476 reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6)); 477 reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7)); 478 reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8)); 479 reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9)); 480 reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10)); 481 reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11)); 482 reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12)); 483 reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13)); 484 reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14)); 485 reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15)); 486 487 reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()); 488 reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1)); 489 reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2)); 490 reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3)); 491 reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4)); 492 reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5)); 493 reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6)); 494 reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7)); 495 reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8)); 496 reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9)); 497 reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10)); 498 reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11)); 499 reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12)); 500 reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13)); 501 reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14)); 502 reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15)); 503 504 reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()); 505 reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1)); 506 reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2)); 507 reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3)); 508 reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4)); 509 reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5)); 510 reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6)); 511 reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7)); 512 reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8)); 513 reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9)); 514 reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10)); 515 reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11)); 516 reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12)); 517 reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13)); 518 reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14)); 519 reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15)); 520 521 reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()); 522 reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1)); 523 reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2)); 524 reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3)); 525 reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4)); 526 reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5)); 527 reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6)); 528 reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7)); 529 reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8)); 530 reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9)); 531 reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10)); 532 reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11)); 533 reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12)); 534 reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13)); 535 reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14)); 536 reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15)); 537 538 reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()); 539 reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1)); 540 reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2)); 541 reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3)); 542 reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4)); 543 reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5)); 544 reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6)); 545 reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7)); 546 reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8)); 547 reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9)); 548 reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10)); 549 reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11)); 550 reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12)); 551 reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13)); 552 reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14)); 553 reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15)); 554 555 reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()); 556 reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1)); 557 reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2)); 558 reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3)); 559 reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4)); 560 reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5)); 561 reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6)); 562 reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7)); 563 reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8)); 564 reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9)); 565 reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10)); 566 reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11)); 567 reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12)); 568 reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13)); 569 reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14)); 570 reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15)); 571 572 reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()); 573 reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1)); 574 reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2)); 575 reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3)); 576 reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4)); 577 reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5)); 578 reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6)); 579 reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7)); 580 reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8)); 581 reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9)); 582 reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10)); 583 reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11)); 584 reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12)); 585 reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13)); 586 reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14)); 587 reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15)); 588 589 reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()); 590 reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1)); 591 reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2)); 592 reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3)); 593 reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4)); 594 reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5)); 595 reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6)); 596 reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7)); 597 reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8)); 598 reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9)); 599 reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10)); 600 reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11)); 601 reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12)); 602 reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13)); 603 reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14)); 604 reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15)); 605 606 reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()); 607 reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1)); 608 reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2)); 609 reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3)); 610 reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4)); 611 reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5)); 612 reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6)); 613 reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7)); 614 reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8)); 615 reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9)); 616 reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10)); 617 reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11)); 618 reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12)); 619 reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13)); 620 reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14)); 621 reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15)); 622 623 #endif // _LP64 624 625 #ifdef _LP64 626 reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad()); 627 #else 628 reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad()); 629 #endif // _LP64 630 631 // AVX3 Mask Registers. 632 reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg()); 633 reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next()); 634 635 reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg()); 636 reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next()); 637 638 reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg()); 639 reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next()); 640 641 reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg()); 642 reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next()); 643 644 reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg()); 645 reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next()); 646 647 reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg()); 648 reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next()); 649 650 reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg()); 651 reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next()); 652 653 654 alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 655 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 656 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 657 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 658 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 659 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 660 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 661 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 662 #ifdef _LP64 663 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 664 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 665 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 666 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 667 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 668 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 669 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 670 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 671 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 672 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 673 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 674 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 675 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 676 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 677 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 678 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 679 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 680 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 681 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 682 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 683 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 684 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 685 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 686 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 687 #endif 688 ); 689 690 alloc_class chunk2(K7, K7_H, 691 K6, K6_H, 692 K5, K5_H, 693 K4, K4_H, 694 K3, K3_H, 695 K2, K2_H, 696 K1, K1_H); 697 698 reg_class vectmask_reg(K1, K1_H, 699 K2, K2_H, 700 K3, K3_H, 701 K4, K4_H, 702 K5, K5_H, 703 K6, K6_H, 704 K7, K7_H); 705 706 reg_class vectmask_reg_K1(K1, K1_H); 707 reg_class vectmask_reg_K2(K2, K2_H); 708 reg_class vectmask_reg_K3(K3, K3_H); 709 reg_class vectmask_reg_K4(K4, K4_H); 710 reg_class vectmask_reg_K5(K5, K5_H); 711 reg_class vectmask_reg_K6(K6, K6_H); 712 reg_class vectmask_reg_K7(K7, K7_H); 713 714 // flags allocation class should be last. 715 alloc_class chunk3(RFLAGS); 716 717 718 // Singleton class for condition codes 719 reg_class int_flags(RFLAGS); 720 721 // Class for pre evex float registers 722 reg_class float_reg_legacy(XMM0, 723 XMM1, 724 XMM2, 725 XMM3, 726 XMM4, 727 XMM5, 728 XMM6, 729 XMM7 730 #ifdef _LP64 731 ,XMM8, 732 XMM9, 733 XMM10, 734 XMM11, 735 XMM12, 736 XMM13, 737 XMM14, 738 XMM15 739 #endif 740 ); 741 742 // Class for evex float registers 743 reg_class float_reg_evex(XMM0, 744 XMM1, 745 XMM2, 746 XMM3, 747 XMM4, 748 XMM5, 749 XMM6, 750 XMM7 751 #ifdef _LP64 752 ,XMM8, 753 XMM9, 754 XMM10, 755 XMM11, 756 XMM12, 757 XMM13, 758 XMM14, 759 XMM15, 760 XMM16, 761 XMM17, 762 XMM18, 763 XMM19, 764 XMM20, 765 XMM21, 766 XMM22, 767 XMM23, 768 XMM24, 769 XMM25, 770 XMM26, 771 XMM27, 772 XMM28, 773 XMM29, 774 XMM30, 775 XMM31 776 #endif 777 ); 778 779 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} ); 780 reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 781 782 // Class for pre evex double registers 783 reg_class double_reg_legacy(XMM0, XMM0b, 784 XMM1, XMM1b, 785 XMM2, XMM2b, 786 XMM3, XMM3b, 787 XMM4, XMM4b, 788 XMM5, XMM5b, 789 XMM6, XMM6b, 790 XMM7, XMM7b 791 #ifdef _LP64 792 ,XMM8, XMM8b, 793 XMM9, XMM9b, 794 XMM10, XMM10b, 795 XMM11, XMM11b, 796 XMM12, XMM12b, 797 XMM13, XMM13b, 798 XMM14, XMM14b, 799 XMM15, XMM15b 800 #endif 801 ); 802 803 // Class for evex double registers 804 reg_class double_reg_evex(XMM0, XMM0b, 805 XMM1, XMM1b, 806 XMM2, XMM2b, 807 XMM3, XMM3b, 808 XMM4, XMM4b, 809 XMM5, XMM5b, 810 XMM6, XMM6b, 811 XMM7, XMM7b 812 #ifdef _LP64 813 ,XMM8, XMM8b, 814 XMM9, XMM9b, 815 XMM10, XMM10b, 816 XMM11, XMM11b, 817 XMM12, XMM12b, 818 XMM13, XMM13b, 819 XMM14, XMM14b, 820 XMM15, XMM15b, 821 XMM16, XMM16b, 822 XMM17, XMM17b, 823 XMM18, XMM18b, 824 XMM19, XMM19b, 825 XMM20, XMM20b, 826 XMM21, XMM21b, 827 XMM22, XMM22b, 828 XMM23, XMM23b, 829 XMM24, XMM24b, 830 XMM25, XMM25b, 831 XMM26, XMM26b, 832 XMM27, XMM27b, 833 XMM28, XMM28b, 834 XMM29, XMM29b, 835 XMM30, XMM30b, 836 XMM31, XMM31b 837 #endif 838 ); 839 840 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} ); 841 reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 842 843 // Class for pre evex 32bit vector registers 844 reg_class vectors_reg_legacy(XMM0, 845 XMM1, 846 XMM2, 847 XMM3, 848 XMM4, 849 XMM5, 850 XMM6, 851 XMM7 852 #ifdef _LP64 853 ,XMM8, 854 XMM9, 855 XMM10, 856 XMM11, 857 XMM12, 858 XMM13, 859 XMM14, 860 XMM15 861 #endif 862 ); 863 864 // Class for evex 32bit vector registers 865 reg_class vectors_reg_evex(XMM0, 866 XMM1, 867 XMM2, 868 XMM3, 869 XMM4, 870 XMM5, 871 XMM6, 872 XMM7 873 #ifdef _LP64 874 ,XMM8, 875 XMM9, 876 XMM10, 877 XMM11, 878 XMM12, 879 XMM13, 880 XMM14, 881 XMM15, 882 XMM16, 883 XMM17, 884 XMM18, 885 XMM19, 886 XMM20, 887 XMM21, 888 XMM22, 889 XMM23, 890 XMM24, 891 XMM25, 892 XMM26, 893 XMM27, 894 XMM28, 895 XMM29, 896 XMM30, 897 XMM31 898 #endif 899 ); 900 901 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} ); 902 reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 903 904 // Class for all 64bit vector registers 905 reg_class vectord_reg_legacy(XMM0, XMM0b, 906 XMM1, XMM1b, 907 XMM2, XMM2b, 908 XMM3, XMM3b, 909 XMM4, XMM4b, 910 XMM5, XMM5b, 911 XMM6, XMM6b, 912 XMM7, XMM7b 913 #ifdef _LP64 914 ,XMM8, XMM8b, 915 XMM9, XMM9b, 916 XMM10, XMM10b, 917 XMM11, XMM11b, 918 XMM12, XMM12b, 919 XMM13, XMM13b, 920 XMM14, XMM14b, 921 XMM15, XMM15b 922 #endif 923 ); 924 925 // Class for all 64bit vector registers 926 reg_class vectord_reg_evex(XMM0, XMM0b, 927 XMM1, XMM1b, 928 XMM2, XMM2b, 929 XMM3, XMM3b, 930 XMM4, XMM4b, 931 XMM5, XMM5b, 932 XMM6, XMM6b, 933 XMM7, XMM7b 934 #ifdef _LP64 935 ,XMM8, XMM8b, 936 XMM9, XMM9b, 937 XMM10, XMM10b, 938 XMM11, XMM11b, 939 XMM12, XMM12b, 940 XMM13, XMM13b, 941 XMM14, XMM14b, 942 XMM15, XMM15b, 943 XMM16, XMM16b, 944 XMM17, XMM17b, 945 XMM18, XMM18b, 946 XMM19, XMM19b, 947 XMM20, XMM20b, 948 XMM21, XMM21b, 949 XMM22, XMM22b, 950 XMM23, XMM23b, 951 XMM24, XMM24b, 952 XMM25, XMM25b, 953 XMM26, XMM26b, 954 XMM27, XMM27b, 955 XMM28, XMM28b, 956 XMM29, XMM29b, 957 XMM30, XMM30b, 958 XMM31, XMM31b 959 #endif 960 ); 961 962 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} ); 963 reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 964 965 // Class for all 128bit vector registers 966 reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, 967 XMM1, XMM1b, XMM1c, XMM1d, 968 XMM2, XMM2b, XMM2c, XMM2d, 969 XMM3, XMM3b, XMM3c, XMM3d, 970 XMM4, XMM4b, XMM4c, XMM4d, 971 XMM5, XMM5b, XMM5c, XMM5d, 972 XMM6, XMM6b, XMM6c, XMM6d, 973 XMM7, XMM7b, XMM7c, XMM7d 974 #ifdef _LP64 975 ,XMM8, XMM8b, XMM8c, XMM8d, 976 XMM9, XMM9b, XMM9c, XMM9d, 977 XMM10, XMM10b, XMM10c, XMM10d, 978 XMM11, XMM11b, XMM11c, XMM11d, 979 XMM12, XMM12b, XMM12c, XMM12d, 980 XMM13, XMM13b, XMM13c, XMM13d, 981 XMM14, XMM14b, XMM14c, XMM14d, 982 XMM15, XMM15b, XMM15c, XMM15d 983 #endif 984 ); 985 986 // Class for all 128bit vector registers 987 reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, 988 XMM1, XMM1b, XMM1c, XMM1d, 989 XMM2, XMM2b, XMM2c, XMM2d, 990 XMM3, XMM3b, XMM3c, XMM3d, 991 XMM4, XMM4b, XMM4c, XMM4d, 992 XMM5, XMM5b, XMM5c, XMM5d, 993 XMM6, XMM6b, XMM6c, XMM6d, 994 XMM7, XMM7b, XMM7c, XMM7d 995 #ifdef _LP64 996 ,XMM8, XMM8b, XMM8c, XMM8d, 997 XMM9, XMM9b, XMM9c, XMM9d, 998 XMM10, XMM10b, XMM10c, XMM10d, 999 XMM11, XMM11b, XMM11c, XMM11d, 1000 XMM12, XMM12b, XMM12c, XMM12d, 1001 XMM13, XMM13b, XMM13c, XMM13d, 1002 XMM14, XMM14b, XMM14c, XMM14d, 1003 XMM15, XMM15b, XMM15c, XMM15d, 1004 XMM16, XMM16b, XMM16c, XMM16d, 1005 XMM17, XMM17b, XMM17c, XMM17d, 1006 XMM18, XMM18b, XMM18c, XMM18d, 1007 XMM19, XMM19b, XMM19c, XMM19d, 1008 XMM20, XMM20b, XMM20c, XMM20d, 1009 XMM21, XMM21b, XMM21c, XMM21d, 1010 XMM22, XMM22b, XMM22c, XMM22d, 1011 XMM23, XMM23b, XMM23c, XMM23d, 1012 XMM24, XMM24b, XMM24c, XMM24d, 1013 XMM25, XMM25b, XMM25c, XMM25d, 1014 XMM26, XMM26b, XMM26c, XMM26d, 1015 XMM27, XMM27b, XMM27c, XMM27d, 1016 XMM28, XMM28b, XMM28c, XMM28d, 1017 XMM29, XMM29b, XMM29c, XMM29d, 1018 XMM30, XMM30b, XMM30c, XMM30d, 1019 XMM31, XMM31b, XMM31c, XMM31d 1020 #endif 1021 ); 1022 1023 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} ); 1024 reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1025 1026 // Class for all 256bit vector registers 1027 reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1028 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1029 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1030 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1031 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1032 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1033 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1034 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1035 #ifdef _LP64 1036 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1037 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1038 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1039 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1040 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1041 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1042 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1043 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h 1044 #endif 1045 ); 1046 1047 // Class for all 256bit vector registers 1048 reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, 1049 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, 1050 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, 1051 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, 1052 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, 1053 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, 1054 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, 1055 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h 1056 #ifdef _LP64 1057 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, 1058 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, 1059 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, 1060 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, 1061 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, 1062 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, 1063 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, 1064 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, 1065 XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, 1066 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, 1067 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, 1068 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, 1069 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, 1070 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, 1071 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, 1072 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, 1073 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, 1074 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, 1075 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, 1076 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, 1077 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, 1078 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, 1079 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, 1080 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h 1081 #endif 1082 ); 1083 1084 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} ); 1085 reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} ); 1086 1087 // Class for all 512bit vector registers 1088 reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1089 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1090 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1091 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1092 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1093 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1094 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1095 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1096 #ifdef _LP64 1097 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1098 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1099 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1100 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1101 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1102 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1103 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1104 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1105 ,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p, 1106 XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p, 1107 XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p, 1108 XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p, 1109 XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p, 1110 XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p, 1111 XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p, 1112 XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p, 1113 XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p, 1114 XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p, 1115 XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p, 1116 XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p, 1117 XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p, 1118 XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p, 1119 XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p, 1120 XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p 1121 #endif 1122 ); 1123 1124 // Class for restricted 512bit vector registers 1125 reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p, 1126 XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p, 1127 XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p, 1128 XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p, 1129 XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p, 1130 XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p, 1131 XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p, 1132 XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p 1133 #ifdef _LP64 1134 ,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p, 1135 XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p, 1136 XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p, 1137 XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p, 1138 XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p, 1139 XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p, 1140 XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p, 1141 XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p 1142 #endif 1143 ); 1144 1145 reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} ); 1146 reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} ); 1147 1148 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d); 1149 %} 1150 1151 1152 //----------SOURCE BLOCK------------------------------------------------------- 1153 // This is a block of C++ code which provides values, functions, and 1154 // definitions necessary in the rest of the architecture description 1155 1156 source_hpp %{ 1157 // Header information of the source block. 1158 // Method declarations/definitions which are used outside 1159 // the ad-scope can conveniently be defined here. 1160 // 1161 // To keep related declarations/definitions/uses close together, 1162 // we switch between source %{ }% and source_hpp %{ }% freely as needed. 1163 1164 #include "runtime/vm_version.hpp" 1165 1166 class NativeJump; 1167 1168 class CallStubImpl { 1169 1170 //-------------------------------------------------------------- 1171 //---< Used for optimization in Compile::shorten_branches >--- 1172 //-------------------------------------------------------------- 1173 1174 public: 1175 // Size of call trampoline stub. 1176 static uint size_call_trampoline() { 1177 return 0; // no call trampolines on this platform 1178 } 1179 1180 // number of relocations needed by a call trampoline stub 1181 static uint reloc_call_trampoline() { 1182 return 0; // no call trampolines on this platform 1183 } 1184 }; 1185 1186 class HandlerImpl { 1187 1188 public: 1189 1190 static int emit_exception_handler(CodeBuffer &cbuf); 1191 static int emit_deopt_handler(CodeBuffer& cbuf); 1192 1193 static uint size_exception_handler() { 1194 // NativeCall instruction size is the same as NativeJump. 1195 // exception handler starts out as jump and can be patched to 1196 // a call be deoptimization. (4932387) 1197 // Note that this value is also credited (in output.cpp) to 1198 // the size of the code section. 1199 return NativeJump::instruction_size; 1200 } 1201 1202 #ifdef _LP64 1203 static uint size_deopt_handler() { 1204 // three 5 byte instructions plus one move for unreachable address. 1205 return 15+3; 1206 } 1207 #else 1208 static uint size_deopt_handler() { 1209 // NativeCall instruction size is the same as NativeJump. 1210 // exception handler starts out as jump and can be patched to 1211 // a call be deoptimization. (4932387) 1212 // Note that this value is also credited (in output.cpp) to 1213 // the size of the code section. 1214 return 5 + NativeJump::instruction_size; // pushl(); jmp; 1215 } 1216 #endif 1217 }; 1218 1219 inline Assembler::AvxVectorLen vector_length_encoding(int bytes) { 1220 switch(bytes) { 1221 case 4: // fall-through 1222 case 8: // fall-through 1223 case 16: return Assembler::AVX_128bit; 1224 case 32: return Assembler::AVX_256bit; 1225 case 64: return Assembler::AVX_512bit; 1226 1227 default: { 1228 ShouldNotReachHere(); 1229 return Assembler::AVX_NoVec; 1230 } 1231 } 1232 } 1233 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) { 1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n)); 1236 } 1237 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) { 1239 uint def_idx = use->operand_index(opnd); 1240 Node* def = use->in(def_idx); 1241 return vector_length_encoding(def); 1242 } 1243 1244 static inline bool is_unsigned_booltest_pred(int bt) { 1245 return ((bt & BoolTest::unsigned_compare) == BoolTest::unsigned_compare); 1246 } 1247 1248 class Node::PD { 1249 public: 1250 enum NodeFlags { 1251 Flag_intel_jcc_erratum = Node::_last_flag << 1, 1252 _last_flag = Flag_intel_jcc_erratum 1253 }; 1254 }; 1255 1256 %} // end source_hpp 1257 1258 source %{ 1259 1260 #include "opto/addnode.hpp" 1261 #include "c2_intelJccErratum_x86.hpp" 1262 1263 void PhaseOutput::pd_perform_mach_node_analysis() { 1264 if (VM_Version::has_intel_jcc_erratum()) { 1265 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc()); 1266 _buf_sizes._code += extra_padding; 1267 } 1268 } 1269 1270 int MachNode::pd_alignment_required() const { 1271 if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(this)) { 1272 // Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86. 1273 return IntelJccErratum::largest_jcc_size() + 1; 1274 } else { 1275 return 1; 1276 } 1277 } 1278 1279 int MachNode::compute_padding(int current_offset) const { 1280 if (flags() & Node::PD::Flag_intel_jcc_erratum) { 1281 Compile* C = Compile::current(); 1282 PhaseOutput* output = C->output(); 1283 Block* block = output->block(); 1284 int index = output->index(); 1285 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc()); 1286 } else { 1287 return 0; 1288 } 1289 } 1290 1291 // Emit exception handler code. 1292 // Stuff framesize into a register and call a VM stub routine. 1293 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { 1294 1295 // Note that the code buffer's insts_mark is always relative to insts. 1296 // That's why we must use the macroassembler to generate a handler. 1297 C2_MacroAssembler _masm(&cbuf); 1298 address base = __ start_a_stub(size_exception_handler()); 1299 if (base == NULL) { 1300 ciEnv::current()->record_failure("CodeCache is full"); 1301 return 0; // CodeBuffer::expand failed 1302 } 1303 int offset = __ offset(); 1304 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point())); 1305 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); 1306 __ end_a_stub(); 1307 return offset; 1308 } 1309 1310 // Emit deopt handler code. 1311 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { 1312 1313 // Note that the code buffer's insts_mark is always relative to insts. 1314 // That's why we must use the macroassembler to generate a handler. 1315 C2_MacroAssembler _masm(&cbuf); 1316 address base = __ start_a_stub(size_deopt_handler()); 1317 if (base == NULL) { 1318 ciEnv::current()->record_failure("CodeCache is full"); 1319 return 0; // CodeBuffer::expand failed 1320 } 1321 int offset = __ offset(); 1322 1323 #ifdef _LP64 1324 address the_pc = (address) __ pc(); 1325 Label next; 1326 // push a "the_pc" on the stack without destroying any registers 1327 // as they all may be live. 1328 1329 // push address of "next" 1330 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32 1331 __ bind(next); 1332 // adjust it so it matches "the_pc" 1333 __ subptr(Address(rsp, 0), __ offset() - offset); 1334 #else 1335 InternalAddress here(__ pc()); 1336 __ pushptr(here.addr()); 1337 #endif 1338 1339 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack())); 1340 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset)); 1341 __ end_a_stub(); 1342 return offset; 1343 } 1344 1345 Assembler::Width widthForType(BasicType bt) { 1346 if (bt == T_BYTE) { 1347 return Assembler::B; 1348 } else if (bt == T_SHORT) { 1349 return Assembler::W; 1350 } else if (bt == T_INT) { 1351 return Assembler::D; 1352 } else { 1353 assert(bt == T_LONG, "not a long: %s", type2name(bt)); 1354 return Assembler::Q; 1355 } 1356 } 1357 1358 //============================================================================= 1359 1360 // Float masks come from different places depending on platform. 1361 #ifdef _LP64 1362 static address float_signmask() { return StubRoutines::x86::float_sign_mask(); } 1363 static address float_signflip() { return StubRoutines::x86::float_sign_flip(); } 1364 static address double_signmask() { return StubRoutines::x86::double_sign_mask(); } 1365 static address double_signflip() { return StubRoutines::x86::double_sign_flip(); } 1366 #else 1367 static address float_signmask() { return (address)float_signmask_pool; } 1368 static address float_signflip() { return (address)float_signflip_pool; } 1369 static address double_signmask() { return (address)double_signmask_pool; } 1370 static address double_signflip() { return (address)double_signflip_pool; } 1371 #endif 1372 static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); } 1373 static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); } 1374 static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); } 1375 static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); } 1376 static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); } 1377 static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); } 1378 static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); } 1379 static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); } 1380 static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); } 1381 static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); } 1382 static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); } 1383 static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); } 1384 static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); } 1385 static address vector_float_signflip() { return StubRoutines::x86::vector_float_sign_flip();} 1386 static address vector_double_signflip() { return StubRoutines::x86::vector_double_sign_flip();} 1387 1388 //============================================================================= 1389 const bool Matcher::match_rule_supported(int opcode) { 1390 if (!has_match_rule(opcode)) { 1391 return false; // no match rule present 1392 } 1393 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1394 switch (opcode) { 1395 case Op_AbsVL: 1396 case Op_StoreVectorScatter: 1397 if (UseAVX < 3) { 1398 return false; 1399 } 1400 break; 1401 case Op_PopCountI: 1402 case Op_PopCountL: 1403 if (!UsePopCountInstruction) { 1404 return false; 1405 } 1406 break; 1407 case Op_PopCountVI: 1408 if (!UsePopCountInstruction || (UseAVX < 2)) { 1409 return false; 1410 } 1411 break; 1412 case Op_PopCountVL: 1413 if (!UsePopCountInstruction || (UseAVX <= 2)) { 1414 return false; 1415 } 1416 break; 1417 case Op_MulVI: 1418 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX 1419 return false; 1420 } 1421 break; 1422 case Op_MulVL: 1423 if (UseSSE < 4) { // only with SSE4_1 or AVX 1424 return false; 1425 } 1426 break; 1427 case Op_MulReductionVL: 1428 if (VM_Version::supports_avx512dq() == false) { 1429 return false; 1430 } 1431 break; 1432 case Op_AddReductionVL: 1433 if (UseSSE < 2) { // requires at least SSE2 1434 return false; 1435 } 1436 break; 1437 case Op_AbsVB: 1438 case Op_AbsVS: 1439 case Op_AbsVI: 1440 case Op_AddReductionVI: 1441 case Op_AndReductionV: 1442 case Op_OrReductionV: 1443 case Op_XorReductionV: 1444 if (UseSSE < 3) { // requires at least SSSE3 1445 return false; 1446 } 1447 break; 1448 case Op_VectorLoadShuffle: 1449 case Op_VectorRearrange: 1450 case Op_MulReductionVI: 1451 if (UseSSE < 4) { // requires at least SSE4 1452 return false; 1453 } 1454 break; 1455 case Op_SqrtVD: 1456 case Op_SqrtVF: 1457 case Op_VectorMaskCmp: 1458 case Op_VectorCastB2X: 1459 case Op_VectorCastS2X: 1460 case Op_VectorCastI2X: 1461 case Op_VectorCastL2X: 1462 case Op_VectorCastF2X: 1463 case Op_VectorCastD2X: 1464 case Op_VectorUCastB2X: 1465 case Op_VectorUCastS2X: 1466 case Op_VectorUCastI2X: 1467 if (UseAVX < 1) { // enabled for AVX only 1468 return false; 1469 } 1470 break; 1471 case Op_RoundVF: 1472 if (UseAVX < 2) { // enabled for AVX2 only 1473 return false; 1474 } 1475 break; 1476 case Op_RoundVD: 1477 if (UseAVX < 3) { 1478 return false; // enabled for AVX3 only 1479 } 1480 break; 1481 case Op_CompareAndSwapL: 1482 #ifdef _LP64 1483 case Op_CompareAndSwapP: 1484 #endif 1485 if (!VM_Version::supports_cx8()) { 1486 return false; 1487 } 1488 break; 1489 case Op_CMoveVF: 1490 case Op_CMoveVD: 1491 if (UseAVX < 1) { // enabled for AVX only 1492 return false; 1493 } 1494 break; 1495 case Op_StrIndexOf: 1496 if (!UseSSE42Intrinsics) { 1497 return false; 1498 } 1499 break; 1500 case Op_StrIndexOfChar: 1501 if (!UseSSE42Intrinsics) { 1502 return false; 1503 } 1504 break; 1505 case Op_OnSpinWait: 1506 if (VM_Version::supports_on_spin_wait() == false) { 1507 return false; 1508 } 1509 break; 1510 case Op_MulVB: 1511 case Op_LShiftVB: 1512 case Op_RShiftVB: 1513 case Op_URShiftVB: 1514 case Op_VectorInsert: 1515 case Op_VectorLoadMask: 1516 case Op_VectorStoreMask: 1517 case Op_VectorBlend: 1518 if (UseSSE < 4) { 1519 return false; 1520 } 1521 break; 1522 #ifdef _LP64 1523 case Op_MaxD: 1524 case Op_MaxF: 1525 case Op_MinD: 1526 case Op_MinF: 1527 if (UseAVX < 1) { // enabled for AVX only 1528 return false; 1529 } 1530 break; 1531 #endif 1532 case Op_CacheWB: 1533 case Op_CacheWBPreSync: 1534 case Op_CacheWBPostSync: 1535 if (!VM_Version::supports_data_cache_line_flush()) { 1536 return false; 1537 } 1538 break; 1539 case Op_ExtractB: 1540 case Op_ExtractL: 1541 case Op_ExtractI: 1542 case Op_RoundDoubleMode: 1543 if (UseSSE < 4) { 1544 return false; 1545 } 1546 break; 1547 case Op_RoundDoubleModeV: 1548 if (VM_Version::supports_avx() == false) { 1549 return false; // 128bit vroundpd is not available 1550 } 1551 break; 1552 case Op_LoadVectorGather: 1553 if (UseAVX < 2) { 1554 return false; 1555 } 1556 break; 1557 case Op_FmaVD: 1558 case Op_FmaVF: 1559 if (!UseFMA) { 1560 return false; 1561 } 1562 break; 1563 case Op_MacroLogicV: 1564 if (UseAVX < 3 || !UseVectorMacroLogic) { 1565 return false; 1566 } 1567 break; 1568 1569 case Op_VectorCmpMasked: 1570 case Op_VectorMaskGen: 1571 case Op_LoadVectorMasked: 1572 case Op_StoreVectorMasked: 1573 if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) { 1574 return false; 1575 } 1576 break; 1577 case Op_VectorMaskFirstTrue: 1578 case Op_VectorMaskLastTrue: 1579 case Op_VectorMaskTrueCount: 1580 case Op_VectorMaskToLong: 1581 if (!is_LP64 || UseAVX < 1) { 1582 return false; 1583 } 1584 break; 1585 case Op_RoundF: 1586 case Op_RoundD: 1587 if (!is_LP64) { 1588 return false; 1589 } 1590 break; 1591 case Op_CopySignD: 1592 case Op_CopySignF: 1593 if (UseAVX < 3 || !is_LP64) { 1594 return false; 1595 } 1596 if (!VM_Version::supports_avx512vl()) { 1597 return false; 1598 } 1599 break; 1600 #ifndef _LP64 1601 case Op_AddReductionVF: 1602 case Op_AddReductionVD: 1603 case Op_MulReductionVF: 1604 case Op_MulReductionVD: 1605 if (UseSSE < 1) { // requires at least SSE 1606 return false; 1607 } 1608 break; 1609 case Op_MulAddVS2VI: 1610 case Op_RShiftVL: 1611 case Op_AbsVD: 1612 case Op_NegVD: 1613 if (UseSSE < 2) { 1614 return false; 1615 } 1616 break; 1617 #endif // !LP64 1618 case Op_SignumF: 1619 if (UseSSE < 1) { 1620 return false; 1621 } 1622 break; 1623 case Op_SignumD: 1624 if (UseSSE < 2) { 1625 return false; 1626 } 1627 break; 1628 case Op_SqrtF: 1629 if (UseSSE < 1) { 1630 return false; 1631 } 1632 break; 1633 case Op_SqrtD: 1634 #ifdef _LP64 1635 if (UseSSE < 2) { 1636 return false; 1637 } 1638 #else 1639 // x86_32.ad has a special match rule for SqrtD. 1640 // Together with common x86 rules, this handles all UseSSE cases. 1641 #endif 1642 break; 1643 } 1644 return true; // Match rules are supported by default. 1645 } 1646 1647 //------------------------------------------------------------------------ 1648 1649 // Identify extra cases that we might want to provide match rules for vector nodes and 1650 // other intrinsics guarded with vector length (vlen) and element type (bt). 1651 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) { 1652 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1653 if (!match_rule_supported(opcode)) { 1654 return false; 1655 } 1656 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes): 1657 // * SSE2 supports 128bit vectors for all types; 1658 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types; 1659 // * AVX2 supports 256bit vectors for all types; 1660 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types; 1661 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types. 1662 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE). 1663 // And MaxVectorSize is taken into account as well. 1664 if (!vector_size_supported(bt, vlen)) { 1665 return false; 1666 } 1667 // Special cases which require vector length follow: 1668 // * implementation limitations 1669 // * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ 1670 // * 128bit vroundpd instruction is present only in AVX1 1671 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1672 switch (opcode) { 1673 case Op_AbsVF: 1674 case Op_NegVF: 1675 if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) { 1676 return false; // 512bit vandps and vxorps are not available 1677 } 1678 break; 1679 case Op_AbsVD: 1680 case Op_NegVD: 1681 case Op_MulVL: 1682 if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) { 1683 return false; // 512bit vpmullq, vandpd and vxorpd are not available 1684 } 1685 break; 1686 case Op_CMoveVF: 1687 if (vlen != 8) { 1688 return false; // implementation limitation (only vcmov8F_reg is present) 1689 } 1690 break; 1691 case Op_RotateRightV: 1692 case Op_RotateLeftV: 1693 if (bt != T_INT && bt != T_LONG) { 1694 return false; 1695 } // fallthrough 1696 case Op_MacroLogicV: 1697 if (!VM_Version::supports_evex() || 1698 ((size_in_bits != 512) && !VM_Version::supports_avx512vl())) { 1699 return false; 1700 } 1701 break; 1702 case Op_ClearArray: 1703 case Op_VectorMaskGen: 1704 case Op_VectorCmpMasked: 1705 case Op_LoadVectorMasked: 1706 case Op_StoreVectorMasked: 1707 if (!is_LP64 || !VM_Version::supports_avx512bw()) { 1708 return false; 1709 } 1710 if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) { 1711 return false; 1712 } 1713 break; 1714 case Op_CMoveVD: 1715 if (vlen != 4) { 1716 return false; // implementation limitation (only vcmov4D_reg is present) 1717 } 1718 break; 1719 case Op_MaxV: 1720 case Op_MinV: 1721 if (UseSSE < 4 && is_integral_type(bt)) { 1722 return false; 1723 } 1724 if ((bt == T_FLOAT || bt == T_DOUBLE)) { 1725 // Float/Double intrinsics are enabled for AVX family currently. 1726 if (UseAVX == 0) { 1727 return false; 1728 } 1729 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { // 512 bit Float/Double intrinsics need AVX512DQ 1730 return false; 1731 } 1732 } 1733 break; 1734 case Op_CallLeafVector: 1735 if (size_in_bits == 512 && !VM_Version::supports_avx512vlbwdq()) { 1736 return false; 1737 } 1738 break; 1739 case Op_AddReductionVI: 1740 if (bt == T_INT && (UseSSE < 3 || !VM_Version::supports_ssse3())) { 1741 return false; 1742 } 1743 // fallthrough 1744 case Op_AndReductionV: 1745 case Op_OrReductionV: 1746 case Op_XorReductionV: 1747 if (is_subword_type(bt) && (UseSSE < 4)) { 1748 return false; 1749 } 1750 #ifndef _LP64 1751 if (bt == T_BYTE || bt == T_LONG) { 1752 return false; 1753 } 1754 #endif 1755 break; 1756 #ifndef _LP64 1757 case Op_VectorInsert: 1758 if (bt == T_LONG || bt == T_DOUBLE) { 1759 return false; 1760 } 1761 break; 1762 #endif 1763 case Op_MinReductionV: 1764 case Op_MaxReductionV: 1765 if ((bt == T_INT || is_subword_type(bt)) && UseSSE < 4) { 1766 return false; 1767 } else if (bt == T_LONG && (UseAVX < 3 || !VM_Version::supports_avx512vlbwdq())) { 1768 return false; 1769 } 1770 // Float/Double intrinsics enabled for AVX family. 1771 if (UseAVX == 0 && (bt == T_FLOAT || bt == T_DOUBLE)) { 1772 return false; 1773 } 1774 if (UseAVX > 2 && (!VM_Version::supports_avx512dq() && size_in_bits == 512)) { 1775 return false; 1776 } 1777 #ifndef _LP64 1778 if (bt == T_BYTE || bt == T_LONG) { 1779 return false; 1780 } 1781 #endif 1782 break; 1783 case Op_VectorTest: 1784 if (UseSSE < 4) { 1785 return false; // Implementation limitation 1786 } else if (size_in_bits < 32) { 1787 return false; // Implementation limitation 1788 } else if (size_in_bits == 512 && (VM_Version::supports_avx512bw() == false)) { 1789 return false; // Implementation limitation 1790 } 1791 break; 1792 case Op_VectorLoadShuffle: 1793 case Op_VectorRearrange: 1794 if(vlen == 2) { 1795 return false; // Implementation limitation due to how shuffle is loaded 1796 } else if (size_in_bits == 256 && UseAVX < 2) { 1797 return false; // Implementation limitation 1798 } else if (bt == T_BYTE && size_in_bits > 256 && !VM_Version::supports_avx512_vbmi()) { 1799 return false; // Implementation limitation 1800 } else if (bt == T_SHORT && size_in_bits > 256 && !VM_Version::supports_avx512bw()) { 1801 return false; // Implementation limitation 1802 } 1803 break; 1804 case Op_VectorLoadMask: 1805 if (size_in_bits == 256 && UseAVX < 2) { 1806 return false; // Implementation limitation 1807 } 1808 // fallthrough 1809 case Op_VectorStoreMask: 1810 if (vlen == 2) { 1811 return false; // Implementation limitation 1812 } 1813 break; 1814 case Op_VectorCastB2X: 1815 case Op_VectorCastS2X: 1816 case Op_VectorCastI2X: 1817 if (bt != T_DOUBLE && size_in_bits == 256 && UseAVX < 2) { 1818 return false; 1819 } 1820 break; 1821 case Op_VectorCastL2X: 1822 if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) { 1823 return false; 1824 } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) { 1825 return false; 1826 } 1827 break; 1828 case Op_VectorCastD2X: 1829 if (is_subword_type(bt) || bt == T_INT) { 1830 return false; 1831 } 1832 if (bt == T_LONG && !VM_Version::supports_avx512dq()) { 1833 return false; 1834 } 1835 break; 1836 case Op_RoundVD: 1837 if (!VM_Version::supports_avx512dq()) { 1838 return false; 1839 } 1840 break; 1841 case Op_VectorCastF2X: 1842 if (is_subword_type(bt) || bt == T_LONG) { 1843 return false; 1844 } 1845 break; 1846 case Op_MulReductionVI: 1847 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) { 1848 return false; 1849 } 1850 break; 1851 case Op_LoadVectorGatherMasked: 1852 case Op_StoreVectorScatterMasked: 1853 case Op_StoreVectorScatter: 1854 if(is_subword_type(bt)) { 1855 return false; 1856 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1857 return false; 1858 } 1859 // fallthrough 1860 case Op_LoadVectorGather: 1861 if (size_in_bits == 64 ) { 1862 return false; 1863 } 1864 break; 1865 case Op_MaskAll: 1866 if (!VM_Version::supports_evex()) { 1867 return false; 1868 } 1869 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) { 1870 return false; 1871 } 1872 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) { 1873 return false; 1874 } 1875 break; 1876 case Op_VectorMaskCmp: 1877 if (vlen < 2 || size_in_bits < 32) { 1878 return false; 1879 } 1880 break; 1881 case Op_VectorLongToMask: 1882 if (UseAVX < 1 || !is_LP64) { 1883 return false; 1884 } 1885 if (UseAVX < 3 && !VM_Version::supports_bmi2()) { 1886 return false; 1887 } 1888 break; 1889 case Op_PopCountVI: 1890 if (!VM_Version::supports_avx512_vpopcntdq() && 1891 (vlen == 16) && !VM_Version::supports_avx512bw()) { 1892 return false; 1893 } 1894 break; 1895 case Op_PopCountVL: 1896 if (!VM_Version::supports_avx512_vpopcntdq() && 1897 ((vlen <= 4) || ((vlen == 8) && !VM_Version::supports_avx512bw()))) { 1898 return false; 1899 } 1900 break; 1901 } 1902 return true; // Per default match rules are supported. 1903 } 1904 1905 const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) { 1906 // ADLC based match_rule_supported routine checks for the existence of pattern based 1907 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes 1908 // of their non-masked counterpart with mask edge being the differentiator. 1909 // This routine does a strict check on the existence of masked operation patterns 1910 // by returning a default false value for all the other opcodes apart from the 1911 // ones whose masked instruction patterns are defined in this file. 1912 if (!match_rule_supported_vector(opcode, vlen, bt)) { 1913 return false; 1914 } 1915 1916 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false); 1917 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte; 1918 if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) { 1919 return false; 1920 } 1921 switch(opcode) { 1922 // Unary masked operations 1923 case Op_AbsVB: 1924 case Op_AbsVS: 1925 if(!VM_Version::supports_avx512bw()) { 1926 return false; // Implementation limitation 1927 } 1928 case Op_AbsVI: 1929 case Op_AbsVL: 1930 return true; 1931 1932 // Ternary masked operations 1933 case Op_FmaVF: 1934 case Op_FmaVD: 1935 return true; 1936 1937 case Op_MacroLogicV: 1938 if(bt != T_INT && bt != T_LONG) { 1939 return false; 1940 } 1941 return true; 1942 1943 // Binary masked operations 1944 case Op_AddVB: 1945 case Op_AddVS: 1946 case Op_SubVB: 1947 case Op_SubVS: 1948 case Op_MulVS: 1949 case Op_LShiftVS: 1950 case Op_RShiftVS: 1951 case Op_URShiftVS: 1952 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1953 if (!VM_Version::supports_avx512bw()) { 1954 return false; // Implementation limitation 1955 } 1956 return true; 1957 1958 case Op_MulVL: 1959 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1960 if (!VM_Version::supports_avx512dq()) { 1961 return false; // Implementation limitation 1962 } 1963 return true; 1964 1965 case Op_AndV: 1966 case Op_OrV: 1967 case Op_XorV: 1968 case Op_RotateRightV: 1969 case Op_RotateLeftV: 1970 if (bt != T_INT && bt != T_LONG) { 1971 return false; // Implementation limitation 1972 } 1973 return true; 1974 1975 case Op_VectorLoadMask: 1976 assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), ""); 1977 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 1978 return false; 1979 } 1980 return true; 1981 1982 case Op_AddVI: 1983 case Op_AddVL: 1984 case Op_AddVF: 1985 case Op_AddVD: 1986 case Op_SubVI: 1987 case Op_SubVL: 1988 case Op_SubVF: 1989 case Op_SubVD: 1990 case Op_MulVI: 1991 case Op_MulVF: 1992 case Op_MulVD: 1993 case Op_DivVF: 1994 case Op_DivVD: 1995 case Op_SqrtVF: 1996 case Op_SqrtVD: 1997 case Op_LShiftVI: 1998 case Op_LShiftVL: 1999 case Op_RShiftVI: 2000 case Op_RShiftVL: 2001 case Op_URShiftVI: 2002 case Op_URShiftVL: 2003 case Op_LoadVectorMasked: 2004 case Op_StoreVectorMasked: 2005 case Op_LoadVectorGatherMasked: 2006 case Op_StoreVectorScatterMasked: 2007 return true; 2008 2009 case Op_MaxV: 2010 case Op_MinV: 2011 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2012 return false; // Implementation limitation 2013 } 2014 if (is_floating_point_type(bt)) { 2015 return false; // Implementation limitation 2016 } 2017 return true; 2018 2019 case Op_VectorMaskCmp: 2020 if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) { 2021 return false; // Implementation limitation 2022 } 2023 return true; 2024 2025 case Op_VectorRearrange: 2026 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) { 2027 return false; // Implementation limitation 2028 } 2029 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) { 2030 return false; // Implementation limitation 2031 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) { 2032 return false; // Implementation limitation 2033 } 2034 return true; 2035 2036 // Binary Logical operations 2037 case Op_AndVMask: 2038 case Op_OrVMask: 2039 case Op_XorVMask: 2040 if (vlen > 16 && !VM_Version::supports_avx512bw()) { 2041 return false; // Implementation limitation 2042 } 2043 return true; 2044 2045 case Op_MaskAll: 2046 return true; 2047 2048 default: 2049 return false; 2050 } 2051 } 2052 2053 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) { 2054 assert(Matcher::is_generic_vector(generic_opnd), "not generic"); 2055 bool legacy = (generic_opnd->opcode() == LEGVEC); 2056 if (!VM_Version::supports_avx512vlbwdq() && // KNL 2057 is_temp && !legacy && (ideal_reg == Op_VecZ)) { 2058 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL. 2059 return new legVecZOper(); 2060 } 2061 if (legacy) { 2062 switch (ideal_reg) { 2063 case Op_VecS: return new legVecSOper(); 2064 case Op_VecD: return new legVecDOper(); 2065 case Op_VecX: return new legVecXOper(); 2066 case Op_VecY: return new legVecYOper(); 2067 case Op_VecZ: return new legVecZOper(); 2068 } 2069 } else { 2070 switch (ideal_reg) { 2071 case Op_VecS: return new vecSOper(); 2072 case Op_VecD: return new vecDOper(); 2073 case Op_VecX: return new vecXOper(); 2074 case Op_VecY: return new vecYOper(); 2075 case Op_VecZ: return new vecZOper(); 2076 } 2077 } 2078 ShouldNotReachHere(); 2079 return NULL; 2080 } 2081 2082 bool Matcher::is_reg2reg_move(MachNode* m) { 2083 switch (m->rule()) { 2084 case MoveVec2Leg_rule: 2085 case MoveLeg2Vec_rule: 2086 case MoveF2VL_rule: 2087 case MoveF2LEG_rule: 2088 case MoveVL2F_rule: 2089 case MoveLEG2F_rule: 2090 case MoveD2VL_rule: 2091 case MoveD2LEG_rule: 2092 case MoveVL2D_rule: 2093 case MoveLEG2D_rule: 2094 return true; 2095 default: 2096 return false; 2097 } 2098 } 2099 2100 bool Matcher::is_generic_vector(MachOper* opnd) { 2101 switch (opnd->opcode()) { 2102 case VEC: 2103 case LEGVEC: 2104 return true; 2105 default: 2106 return false; 2107 } 2108 } 2109 2110 //------------------------------------------------------------------------ 2111 2112 const RegMask* Matcher::predicate_reg_mask(void) { 2113 return &_VECTMASK_REG_mask; 2114 } 2115 2116 const TypeVectMask* Matcher::predicate_reg_type(const Type* elemTy, int length) { 2117 return new TypeVectMask(elemTy, length); 2118 } 2119 2120 // Max vector size in bytes. 0 if not supported. 2121 const int Matcher::vector_width_in_bytes(BasicType bt) { 2122 assert(is_java_primitive(bt), "only primitive type vectors"); 2123 if (UseSSE < 2) return 0; 2124 // SSE2 supports 128bit vectors for all types. 2125 // AVX2 supports 256bit vectors for all types. 2126 // AVX2/EVEX supports 512bit vectors for all types. 2127 int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16; 2128 // AVX1 supports 256bit vectors only for FLOAT and DOUBLE. 2129 if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE)) 2130 size = (UseAVX > 2) ? 64 : 32; 2131 if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR)) 2132 size = (VM_Version::supports_avx512bw()) ? 64 : 32; 2133 // Use flag to limit vector size. 2134 size = MIN2(size,(int)MaxVectorSize); 2135 // Minimum 2 values in vector (or 4 for bytes). 2136 switch (bt) { 2137 case T_DOUBLE: 2138 case T_LONG: 2139 if (size < 16) return 0; 2140 break; 2141 case T_FLOAT: 2142 case T_INT: 2143 if (size < 8) return 0; 2144 break; 2145 case T_BOOLEAN: 2146 if (size < 4) return 0; 2147 break; 2148 case T_CHAR: 2149 if (size < 4) return 0; 2150 break; 2151 case T_BYTE: 2152 if (size < 4) return 0; 2153 break; 2154 case T_SHORT: 2155 if (size < 4) return 0; 2156 break; 2157 default: 2158 ShouldNotReachHere(); 2159 } 2160 return size; 2161 } 2162 2163 // Limits on vector size (number of elements) loaded into vector. 2164 const int Matcher::max_vector_size(const BasicType bt) { 2165 return vector_width_in_bytes(bt)/type2aelembytes(bt); 2166 } 2167 const int Matcher::min_vector_size(const BasicType bt) { 2168 int max_size = max_vector_size(bt); 2169 // Min size which can be loaded into vector is 4 bytes. 2170 int size = (type2aelembytes(bt) == 1) ? 4 : 2; 2171 // Support for calling svml double64 vectors 2172 if (bt == T_DOUBLE) { 2173 size = 1; 2174 } 2175 return MIN2(size,max_size); 2176 } 2177 2178 const int Matcher::scalable_vector_reg_size(const BasicType bt) { 2179 return -1; 2180 } 2181 2182 // Vector ideal reg corresponding to specified size in bytes 2183 const uint Matcher::vector_ideal_reg(int size) { 2184 assert(MaxVectorSize >= size, ""); 2185 switch(size) { 2186 case 4: return Op_VecS; 2187 case 8: return Op_VecD; 2188 case 16: return Op_VecX; 2189 case 32: return Op_VecY; 2190 case 64: return Op_VecZ; 2191 } 2192 ShouldNotReachHere(); 2193 return 0; 2194 } 2195 2196 // Check for shift by small constant as well 2197 static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) { 2198 if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() && 2199 shift->in(2)->get_int() <= 3 && 2200 // Are there other uses besides address expressions? 2201 !matcher->is_visited(shift)) { 2202 address_visited.set(shift->_idx); // Flag as address_visited 2203 mstack.push(shift->in(2), Matcher::Visit); 2204 Node *conv = shift->in(1); 2205 #ifdef _LP64 2206 // Allow Matcher to match the rule which bypass 2207 // ConvI2L operation for an array index on LP64 2208 // if the index value is positive. 2209 if (conv->Opcode() == Op_ConvI2L && 2210 conv->as_Type()->type()->is_long()->_lo >= 0 && 2211 // Are there other uses besides address expressions? 2212 !matcher->is_visited(conv)) { 2213 address_visited.set(conv->_idx); // Flag as address_visited 2214 mstack.push(conv->in(1), Matcher::Pre_Visit); 2215 } else 2216 #endif 2217 mstack.push(conv, Matcher::Pre_Visit); 2218 return true; 2219 } 2220 return false; 2221 } 2222 2223 // This function identifies sub-graphs in which a 'load' node is 2224 // input to two different nodes, and such that it can be matched 2225 // with BMI instructions like blsi, blsr, etc. 2226 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32. 2227 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL* 2228 // refers to the same node. 2229 // 2230 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop) 2231 // This is a temporary solution until we make DAGs expressible in ADL. 2232 template<typename ConType> 2233 class FusedPatternMatcher { 2234 Node* _op1_node; 2235 Node* _mop_node; 2236 int _con_op; 2237 2238 static int match_next(Node* n, int next_op, int next_op_idx) { 2239 if (n->in(1) == NULL || n->in(2) == NULL) { 2240 return -1; 2241 } 2242 2243 if (next_op_idx == -1) { // n is commutative, try rotations 2244 if (n->in(1)->Opcode() == next_op) { 2245 return 1; 2246 } else if (n->in(2)->Opcode() == next_op) { 2247 return 2; 2248 } 2249 } else { 2250 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index"); 2251 if (n->in(next_op_idx)->Opcode() == next_op) { 2252 return next_op_idx; 2253 } 2254 } 2255 return -1; 2256 } 2257 2258 public: 2259 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) : 2260 _op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { } 2261 2262 bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative 2263 int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative 2264 typename ConType::NativeType con_value) { 2265 if (_op1_node->Opcode() != op1) { 2266 return false; 2267 } 2268 if (_mop_node->outcnt() > 2) { 2269 return false; 2270 } 2271 op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx); 2272 if (op1_op2_idx == -1) { 2273 return false; 2274 } 2275 // Memory operation must be the other edge 2276 int op1_mop_idx = (op1_op2_idx & 1) + 1; 2277 2278 // Check that the mop node is really what we want 2279 if (_op1_node->in(op1_mop_idx) == _mop_node) { 2280 Node* op2_node = _op1_node->in(op1_op2_idx); 2281 if (op2_node->outcnt() > 1) { 2282 return false; 2283 } 2284 assert(op2_node->Opcode() == op2, "Should be"); 2285 op2_con_idx = match_next(op2_node, _con_op, op2_con_idx); 2286 if (op2_con_idx == -1) { 2287 return false; 2288 } 2289 // Memory operation must be the other edge 2290 int op2_mop_idx = (op2_con_idx & 1) + 1; 2291 // Check that the memory operation is the same node 2292 if (op2_node->in(op2_mop_idx) == _mop_node) { 2293 // Now check the constant 2294 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type(); 2295 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) { 2296 return true; 2297 } 2298 } 2299 } 2300 return false; 2301 } 2302 }; 2303 2304 static bool is_bmi_pattern(Node* n, Node* m) { 2305 assert(UseBMI1Instructions, "sanity"); 2306 if (n != NULL && m != NULL) { 2307 if (m->Opcode() == Op_LoadI) { 2308 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI); 2309 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) || 2310 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) || 2311 bmii.match(Op_XorI, -1, Op_AddI, -1, -1); 2312 } else if (m->Opcode() == Op_LoadL) { 2313 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL); 2314 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) || 2315 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) || 2316 bmil.match(Op_XorL, -1, Op_AddL, -1, -1); 2317 } 2318 } 2319 return false; 2320 } 2321 2322 // Should the matcher clone input 'm' of node 'n'? 2323 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) { 2324 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'. 2325 if (UseBMI1Instructions && is_bmi_pattern(n, m)) { 2326 mstack.push(m, Visit); 2327 return true; 2328 } 2329 if (is_vshift_con_pattern(n, m)) { // ShiftV src (ShiftCntV con) 2330 mstack.push(m, Visit); // m = ShiftCntV 2331 return true; 2332 } 2333 return false; 2334 } 2335 2336 // Should the Matcher clone shifts on addressing modes, expecting them 2337 // to be subsumed into complex addressing expressions or compute them 2338 // into registers? 2339 bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { 2340 Node *off = m->in(AddPNode::Offset); 2341 if (off->is_Con()) { 2342 address_visited.test_set(m->_idx); // Flag as address_visited 2343 Node *adr = m->in(AddPNode::Address); 2344 2345 // Intel can handle 2 adds in addressing mode 2346 // AtomicAdd is not an addressing expression. 2347 // Cheap to find it by looking for screwy base. 2348 if (adr->is_AddP() && 2349 !adr->in(AddPNode::Base)->is_top() && 2350 LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32 2351 // Are there other uses besides address expressions? 2352 !is_visited(adr)) { 2353 address_visited.set(adr->_idx); // Flag as address_visited 2354 Node *shift = adr->in(AddPNode::Offset); 2355 if (!clone_shift(shift, this, mstack, address_visited)) { 2356 mstack.push(shift, Pre_Visit); 2357 } 2358 mstack.push(adr->in(AddPNode::Address), Pre_Visit); 2359 mstack.push(adr->in(AddPNode::Base), Pre_Visit); 2360 } else { 2361 mstack.push(adr, Pre_Visit); 2362 } 2363 2364 // Clone X+offset as it also folds into most addressing expressions 2365 mstack.push(off, Visit); 2366 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2367 return true; 2368 } else if (clone_shift(off, this, mstack, address_visited)) { 2369 address_visited.test_set(m->_idx); // Flag as address_visited 2370 mstack.push(m->in(AddPNode::Address), Pre_Visit); 2371 mstack.push(m->in(AddPNode::Base), Pre_Visit); 2372 return true; 2373 } 2374 return false; 2375 } 2376 2377 static inline Assembler::ComparisonPredicate booltest_pred_to_comparison_pred(int bt) { 2378 switch (bt) { 2379 case BoolTest::eq: 2380 return Assembler::eq; 2381 case BoolTest::ne: 2382 return Assembler::neq; 2383 case BoolTest::le: 2384 case BoolTest::ule: 2385 return Assembler::le; 2386 case BoolTest::ge: 2387 case BoolTest::uge: 2388 return Assembler::nlt; 2389 case BoolTest::lt: 2390 case BoolTest::ult: 2391 return Assembler::lt; 2392 case BoolTest::gt: 2393 case BoolTest::ugt: 2394 return Assembler::nle; 2395 default : ShouldNotReachHere(); return Assembler::_false; 2396 } 2397 } 2398 2399 static inline Assembler::ComparisonPredicateFP booltest_pred_to_comparison_pred_fp(int bt) { 2400 switch (bt) { 2401 case BoolTest::eq: return Assembler::EQ_OQ; // ordered non-signaling 2402 // As per JLS 15.21.1, != of NaNs is true. Thus use unordered compare. 2403 case BoolTest::ne: return Assembler::NEQ_UQ; // unordered non-signaling 2404 case BoolTest::le: return Assembler::LE_OQ; // ordered non-signaling 2405 case BoolTest::ge: return Assembler::GE_OQ; // ordered non-signaling 2406 case BoolTest::lt: return Assembler::LT_OQ; // ordered non-signaling 2407 case BoolTest::gt: return Assembler::GT_OQ; // ordered non-signaling 2408 default: ShouldNotReachHere(); return Assembler::FALSE_OS; 2409 } 2410 } 2411 2412 // Helper methods for MachSpillCopyNode::implementation(). 2413 static void vec_mov_helper(CodeBuffer *cbuf, int src_lo, int dst_lo, 2414 int src_hi, int dst_hi, uint ireg, outputStream* st) { 2415 assert(ireg == Op_VecS || // 32bit vector 2416 (src_lo & 1) == 0 && (src_lo + 1) == src_hi && 2417 (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi, 2418 "no non-adjacent vector moves" ); 2419 if (cbuf) { 2420 C2_MacroAssembler _masm(cbuf); 2421 switch (ireg) { 2422 case Op_VecS: // copy whole register 2423 case Op_VecD: 2424 case Op_VecX: 2425 #ifndef _LP64 2426 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2427 #else 2428 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2429 __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2430 } else { 2431 __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2432 } 2433 #endif 2434 break; 2435 case Op_VecY: 2436 #ifndef _LP64 2437 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2438 #else 2439 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2440 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo])); 2441 } else { 2442 __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0); 2443 } 2444 #endif 2445 break; 2446 case Op_VecZ: 2447 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2); 2448 break; 2449 default: 2450 ShouldNotReachHere(); 2451 } 2452 #ifndef PRODUCT 2453 } else { 2454 switch (ireg) { 2455 case Op_VecS: 2456 case Op_VecD: 2457 case Op_VecX: 2458 st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2459 break; 2460 case Op_VecY: 2461 case Op_VecZ: 2462 st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]); 2463 break; 2464 default: 2465 ShouldNotReachHere(); 2466 } 2467 #endif 2468 } 2469 } 2470 2471 void vec_spill_helper(CodeBuffer *cbuf, bool is_load, 2472 int stack_offset, int reg, uint ireg, outputStream* st) { 2473 if (cbuf) { 2474 C2_MacroAssembler _masm(cbuf); 2475 if (is_load) { 2476 switch (ireg) { 2477 case Op_VecS: 2478 __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2479 break; 2480 case Op_VecD: 2481 __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2482 break; 2483 case Op_VecX: 2484 #ifndef _LP64 2485 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2486 #else 2487 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2488 __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2489 } else { 2490 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2491 __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2492 } 2493 #endif 2494 break; 2495 case Op_VecY: 2496 #ifndef _LP64 2497 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2498 #else 2499 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2500 __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset)); 2501 } else { 2502 __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2503 __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0); 2504 } 2505 #endif 2506 break; 2507 case Op_VecZ: 2508 __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2); 2509 break; 2510 default: 2511 ShouldNotReachHere(); 2512 } 2513 } else { // store 2514 switch (ireg) { 2515 case Op_VecS: 2516 __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2517 break; 2518 case Op_VecD: 2519 __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2520 break; 2521 case Op_VecX: 2522 #ifndef _LP64 2523 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2524 #else 2525 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2526 __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2527 } 2528 else { 2529 __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2530 } 2531 #endif 2532 break; 2533 case Op_VecY: 2534 #ifndef _LP64 2535 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2536 #else 2537 if ((UseAVX < 3) || VM_Version::supports_avx512vl()) { 2538 __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg])); 2539 } 2540 else { 2541 __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0); 2542 } 2543 #endif 2544 break; 2545 case Op_VecZ: 2546 __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2); 2547 break; 2548 default: 2549 ShouldNotReachHere(); 2550 } 2551 } 2552 #ifndef PRODUCT 2553 } else { 2554 if (is_load) { 2555 switch (ireg) { 2556 case Op_VecS: 2557 st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2558 break; 2559 case Op_VecD: 2560 st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2561 break; 2562 case Op_VecX: 2563 st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2564 break; 2565 case Op_VecY: 2566 case Op_VecZ: 2567 st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset); 2568 break; 2569 default: 2570 ShouldNotReachHere(); 2571 } 2572 } else { // store 2573 switch (ireg) { 2574 case Op_VecS: 2575 st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2576 break; 2577 case Op_VecD: 2578 st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2579 break; 2580 case Op_VecX: 2581 st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2582 break; 2583 case Op_VecY: 2584 case Op_VecZ: 2585 st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]); 2586 break; 2587 default: 2588 ShouldNotReachHere(); 2589 } 2590 } 2591 #endif 2592 } 2593 } 2594 2595 template <class T> 2596 static inline GrowableArray<jvalue>* vreplicate_imm(BasicType bt, T con, int len) { 2597 GrowableArray<jvalue>* val = new GrowableArray<jvalue>(len); 2598 jvalue ele; 2599 switch (bt) { 2600 case T_BYTE: ele.b = con; break; 2601 case T_SHORT: ele.s = con; break; 2602 case T_INT: ele.i = con; break; 2603 case T_LONG: ele.j = con; break; 2604 case T_FLOAT: ele.f = con; break; 2605 case T_DOUBLE: ele.d = con; break; 2606 default: ShouldNotReachHere(); 2607 } 2608 for (int i = 0; i < len; i++) { 2609 val->append(ele); 2610 } 2611 return val; 2612 } 2613 2614 static inline jlong high_bit_set(BasicType bt) { 2615 switch (bt) { 2616 case T_BYTE: return 0x8080808080808080; 2617 case T_SHORT: return 0x8000800080008000; 2618 case T_INT: return 0x8000000080000000; 2619 case T_LONG: return 0x8000000000000000; 2620 default: 2621 ShouldNotReachHere(); 2622 return 0; 2623 } 2624 } 2625 2626 #ifndef PRODUCT 2627 void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const { 2628 st->print("nop \t# %d bytes pad for loops and calls", _count); 2629 } 2630 #endif 2631 2632 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const { 2633 C2_MacroAssembler _masm(&cbuf); 2634 __ nop(_count); 2635 } 2636 2637 uint MachNopNode::size(PhaseRegAlloc*) const { 2638 return _count; 2639 } 2640 2641 #ifndef PRODUCT 2642 void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const { 2643 st->print("# breakpoint"); 2644 } 2645 #endif 2646 2647 void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { 2648 C2_MacroAssembler _masm(&cbuf); 2649 __ int3(); 2650 } 2651 2652 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { 2653 return MachNode::size(ra_); 2654 } 2655 2656 %} 2657 2658 encode %{ 2659 2660 enc_class call_epilog %{ 2661 if (VerifyStackAtCalls) { 2662 // Check that stack depth is unchanged: find majik cookie on stack 2663 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word)); 2664 C2_MacroAssembler _masm(&cbuf); 2665 Label L; 2666 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d); 2667 __ jccb(Assembler::equal, L); 2668 // Die if stack mismatch 2669 __ int3(); 2670 __ bind(L); 2671 } 2672 %} 2673 2674 %} 2675 2676 // Operands for bound floating pointer register arguments 2677 operand rxmm0() %{ 2678 constraint(ALLOC_IN_RC(xmm0_reg)); 2679 match(VecX); 2680 format%{%} 2681 interface(REG_INTER); 2682 %} 2683 2684 //----------OPERANDS----------------------------------------------------------- 2685 // Operand definitions must precede instruction definitions for correct parsing 2686 // in the ADLC because operands constitute user defined types which are used in 2687 // instruction definitions. 2688 2689 // Vectors 2690 2691 // Dummy generic vector class. Should be used for all vector operands. 2692 // Replaced with vec[SDXYZ] during post-selection pass. 2693 operand vec() %{ 2694 constraint(ALLOC_IN_RC(dynamic)); 2695 match(VecX); 2696 match(VecY); 2697 match(VecZ); 2698 match(VecS); 2699 match(VecD); 2700 2701 format %{ %} 2702 interface(REG_INTER); 2703 %} 2704 2705 // Dummy generic legacy vector class. Should be used for all legacy vector operands. 2706 // Replaced with legVec[SDXYZ] during post-selection cleanup. 2707 // Note: legacy register class is used to avoid extra (unneeded in 32-bit VM) 2708 // runtime code generation via reg_class_dynamic. 2709 operand legVec() %{ 2710 constraint(ALLOC_IN_RC(dynamic)); 2711 match(VecX); 2712 match(VecY); 2713 match(VecZ); 2714 match(VecS); 2715 match(VecD); 2716 2717 format %{ %} 2718 interface(REG_INTER); 2719 %} 2720 2721 // Replaces vec during post-selection cleanup. See above. 2722 operand vecS() %{ 2723 constraint(ALLOC_IN_RC(vectors_reg_vlbwdq)); 2724 match(VecS); 2725 2726 format %{ %} 2727 interface(REG_INTER); 2728 %} 2729 2730 // Replaces legVec during post-selection cleanup. See above. 2731 operand legVecS() %{ 2732 constraint(ALLOC_IN_RC(vectors_reg_legacy)); 2733 match(VecS); 2734 2735 format %{ %} 2736 interface(REG_INTER); 2737 %} 2738 2739 // Replaces vec during post-selection cleanup. See above. 2740 operand vecD() %{ 2741 constraint(ALLOC_IN_RC(vectord_reg_vlbwdq)); 2742 match(VecD); 2743 2744 format %{ %} 2745 interface(REG_INTER); 2746 %} 2747 2748 // Replaces legVec during post-selection cleanup. See above. 2749 operand legVecD() %{ 2750 constraint(ALLOC_IN_RC(vectord_reg_legacy)); 2751 match(VecD); 2752 2753 format %{ %} 2754 interface(REG_INTER); 2755 %} 2756 2757 // Replaces vec during post-selection cleanup. See above. 2758 operand vecX() %{ 2759 constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq)); 2760 match(VecX); 2761 2762 format %{ %} 2763 interface(REG_INTER); 2764 %} 2765 2766 // Replaces legVec during post-selection cleanup. See above. 2767 operand legVecX() %{ 2768 constraint(ALLOC_IN_RC(vectorx_reg_legacy)); 2769 match(VecX); 2770 2771 format %{ %} 2772 interface(REG_INTER); 2773 %} 2774 2775 // Replaces vec during post-selection cleanup. See above. 2776 operand vecY() %{ 2777 constraint(ALLOC_IN_RC(vectory_reg_vlbwdq)); 2778 match(VecY); 2779 2780 format %{ %} 2781 interface(REG_INTER); 2782 %} 2783 2784 // Replaces legVec during post-selection cleanup. See above. 2785 operand legVecY() %{ 2786 constraint(ALLOC_IN_RC(vectory_reg_legacy)); 2787 match(VecY); 2788 2789 format %{ %} 2790 interface(REG_INTER); 2791 %} 2792 2793 // Replaces vec during post-selection cleanup. See above. 2794 operand vecZ() %{ 2795 constraint(ALLOC_IN_RC(vectorz_reg)); 2796 match(VecZ); 2797 2798 format %{ %} 2799 interface(REG_INTER); 2800 %} 2801 2802 // Replaces legVec during post-selection cleanup. See above. 2803 operand legVecZ() %{ 2804 constraint(ALLOC_IN_RC(vectorz_reg_legacy)); 2805 match(VecZ); 2806 2807 format %{ %} 2808 interface(REG_INTER); 2809 %} 2810 2811 // Comparison Code for FP conditional move 2812 operand cmpOp_vcmppd() %{ 2813 match(Bool); 2814 2815 predicate(n->as_Bool()->_test._test != BoolTest::overflow && 2816 n->as_Bool()->_test._test != BoolTest::no_overflow); 2817 format %{ "" %} 2818 interface(COND_INTER) %{ 2819 equal (0x0, "eq"); 2820 less (0x1, "lt"); 2821 less_equal (0x2, "le"); 2822 not_equal (0xC, "ne"); 2823 greater_equal(0xD, "ge"); 2824 greater (0xE, "gt"); 2825 //TODO cannot compile (adlc breaks) without two next lines with error: 2826 // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{ 2827 // equal' for overflow. 2828 overflow (0x20, "o"); // not really supported by the instruction 2829 no_overflow (0x21, "no"); // not really supported by the instruction 2830 %} 2831 %} 2832 2833 2834 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit) 2835 2836 // ============================================================================ 2837 2838 instruct ShouldNotReachHere() %{ 2839 match(Halt); 2840 format %{ "stop\t# ShouldNotReachHere" %} 2841 ins_encode %{ 2842 if (is_reachable()) { 2843 __ stop(_halt_reason); 2844 } 2845 %} 2846 ins_pipe(pipe_slow); 2847 %} 2848 2849 // ============================================================================ 2850 2851 instruct addF_reg(regF dst, regF src) %{ 2852 predicate((UseSSE>=1) && (UseAVX == 0)); 2853 match(Set dst (AddF dst src)); 2854 2855 format %{ "addss $dst, $src" %} 2856 ins_cost(150); 2857 ins_encode %{ 2858 __ addss($dst$$XMMRegister, $src$$XMMRegister); 2859 %} 2860 ins_pipe(pipe_slow); 2861 %} 2862 2863 instruct addF_mem(regF dst, memory src) %{ 2864 predicate((UseSSE>=1) && (UseAVX == 0)); 2865 match(Set dst (AddF dst (LoadF src))); 2866 2867 format %{ "addss $dst, $src" %} 2868 ins_cost(150); 2869 ins_encode %{ 2870 __ addss($dst$$XMMRegister, $src$$Address); 2871 %} 2872 ins_pipe(pipe_slow); 2873 %} 2874 2875 instruct addF_imm(regF dst, immF con) %{ 2876 predicate((UseSSE>=1) && (UseAVX == 0)); 2877 match(Set dst (AddF dst con)); 2878 format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 2879 ins_cost(150); 2880 ins_encode %{ 2881 __ addss($dst$$XMMRegister, $constantaddress($con)); 2882 %} 2883 ins_pipe(pipe_slow); 2884 %} 2885 2886 instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ 2887 predicate(UseAVX > 0); 2888 match(Set dst (AddF src1 src2)); 2889 2890 format %{ "vaddss $dst, $src1, $src2" %} 2891 ins_cost(150); 2892 ins_encode %{ 2893 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2894 %} 2895 ins_pipe(pipe_slow); 2896 %} 2897 2898 instruct addF_reg_mem(regF dst, regF src1, memory src2) %{ 2899 predicate(UseAVX > 0); 2900 match(Set dst (AddF src1 (LoadF src2))); 2901 2902 format %{ "vaddss $dst, $src1, $src2" %} 2903 ins_cost(150); 2904 ins_encode %{ 2905 __ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2906 %} 2907 ins_pipe(pipe_slow); 2908 %} 2909 2910 instruct addF_reg_imm(regF dst, regF src, immF con) %{ 2911 predicate(UseAVX > 0); 2912 match(Set dst (AddF src con)); 2913 2914 format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 2915 ins_cost(150); 2916 ins_encode %{ 2917 __ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2918 %} 2919 ins_pipe(pipe_slow); 2920 %} 2921 2922 instruct addD_reg(regD dst, regD src) %{ 2923 predicate((UseSSE>=2) && (UseAVX == 0)); 2924 match(Set dst (AddD dst src)); 2925 2926 format %{ "addsd $dst, $src" %} 2927 ins_cost(150); 2928 ins_encode %{ 2929 __ addsd($dst$$XMMRegister, $src$$XMMRegister); 2930 %} 2931 ins_pipe(pipe_slow); 2932 %} 2933 2934 instruct addD_mem(regD dst, memory src) %{ 2935 predicate((UseSSE>=2) && (UseAVX == 0)); 2936 match(Set dst (AddD dst (LoadD src))); 2937 2938 format %{ "addsd $dst, $src" %} 2939 ins_cost(150); 2940 ins_encode %{ 2941 __ addsd($dst$$XMMRegister, $src$$Address); 2942 %} 2943 ins_pipe(pipe_slow); 2944 %} 2945 2946 instruct addD_imm(regD dst, immD con) %{ 2947 predicate((UseSSE>=2) && (UseAVX == 0)); 2948 match(Set dst (AddD dst con)); 2949 format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 2950 ins_cost(150); 2951 ins_encode %{ 2952 __ addsd($dst$$XMMRegister, $constantaddress($con)); 2953 %} 2954 ins_pipe(pipe_slow); 2955 %} 2956 2957 instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ 2958 predicate(UseAVX > 0); 2959 match(Set dst (AddD src1 src2)); 2960 2961 format %{ "vaddsd $dst, $src1, $src2" %} 2962 ins_cost(150); 2963 ins_encode %{ 2964 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 2965 %} 2966 ins_pipe(pipe_slow); 2967 %} 2968 2969 instruct addD_reg_mem(regD dst, regD src1, memory src2) %{ 2970 predicate(UseAVX > 0); 2971 match(Set dst (AddD src1 (LoadD src2))); 2972 2973 format %{ "vaddsd $dst, $src1, $src2" %} 2974 ins_cost(150); 2975 ins_encode %{ 2976 __ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 2977 %} 2978 ins_pipe(pipe_slow); 2979 %} 2980 2981 instruct addD_reg_imm(regD dst, regD src, immD con) %{ 2982 predicate(UseAVX > 0); 2983 match(Set dst (AddD src con)); 2984 2985 format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 2986 ins_cost(150); 2987 ins_encode %{ 2988 __ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 2989 %} 2990 ins_pipe(pipe_slow); 2991 %} 2992 2993 instruct subF_reg(regF dst, regF src) %{ 2994 predicate((UseSSE>=1) && (UseAVX == 0)); 2995 match(Set dst (SubF dst src)); 2996 2997 format %{ "subss $dst, $src" %} 2998 ins_cost(150); 2999 ins_encode %{ 3000 __ subss($dst$$XMMRegister, $src$$XMMRegister); 3001 %} 3002 ins_pipe(pipe_slow); 3003 %} 3004 3005 instruct subF_mem(regF dst, memory src) %{ 3006 predicate((UseSSE>=1) && (UseAVX == 0)); 3007 match(Set dst (SubF dst (LoadF src))); 3008 3009 format %{ "subss $dst, $src" %} 3010 ins_cost(150); 3011 ins_encode %{ 3012 __ subss($dst$$XMMRegister, $src$$Address); 3013 %} 3014 ins_pipe(pipe_slow); 3015 %} 3016 3017 instruct subF_imm(regF dst, immF con) %{ 3018 predicate((UseSSE>=1) && (UseAVX == 0)); 3019 match(Set dst (SubF dst con)); 3020 format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3021 ins_cost(150); 3022 ins_encode %{ 3023 __ subss($dst$$XMMRegister, $constantaddress($con)); 3024 %} 3025 ins_pipe(pipe_slow); 3026 %} 3027 3028 instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ 3029 predicate(UseAVX > 0); 3030 match(Set dst (SubF src1 src2)); 3031 3032 format %{ "vsubss $dst, $src1, $src2" %} 3033 ins_cost(150); 3034 ins_encode %{ 3035 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3036 %} 3037 ins_pipe(pipe_slow); 3038 %} 3039 3040 instruct subF_reg_mem(regF dst, regF src1, memory src2) %{ 3041 predicate(UseAVX > 0); 3042 match(Set dst (SubF src1 (LoadF src2))); 3043 3044 format %{ "vsubss $dst, $src1, $src2" %} 3045 ins_cost(150); 3046 ins_encode %{ 3047 __ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3048 %} 3049 ins_pipe(pipe_slow); 3050 %} 3051 3052 instruct subF_reg_imm(regF dst, regF src, immF con) %{ 3053 predicate(UseAVX > 0); 3054 match(Set dst (SubF src con)); 3055 3056 format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3057 ins_cost(150); 3058 ins_encode %{ 3059 __ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3060 %} 3061 ins_pipe(pipe_slow); 3062 %} 3063 3064 instruct subD_reg(regD dst, regD src) %{ 3065 predicate((UseSSE>=2) && (UseAVX == 0)); 3066 match(Set dst (SubD dst src)); 3067 3068 format %{ "subsd $dst, $src" %} 3069 ins_cost(150); 3070 ins_encode %{ 3071 __ subsd($dst$$XMMRegister, $src$$XMMRegister); 3072 %} 3073 ins_pipe(pipe_slow); 3074 %} 3075 3076 instruct subD_mem(regD dst, memory src) %{ 3077 predicate((UseSSE>=2) && (UseAVX == 0)); 3078 match(Set dst (SubD dst (LoadD src))); 3079 3080 format %{ "subsd $dst, $src" %} 3081 ins_cost(150); 3082 ins_encode %{ 3083 __ subsd($dst$$XMMRegister, $src$$Address); 3084 %} 3085 ins_pipe(pipe_slow); 3086 %} 3087 3088 instruct subD_imm(regD dst, immD con) %{ 3089 predicate((UseSSE>=2) && (UseAVX == 0)); 3090 match(Set dst (SubD dst con)); 3091 format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3092 ins_cost(150); 3093 ins_encode %{ 3094 __ subsd($dst$$XMMRegister, $constantaddress($con)); 3095 %} 3096 ins_pipe(pipe_slow); 3097 %} 3098 3099 instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ 3100 predicate(UseAVX > 0); 3101 match(Set dst (SubD src1 src2)); 3102 3103 format %{ "vsubsd $dst, $src1, $src2" %} 3104 ins_cost(150); 3105 ins_encode %{ 3106 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3107 %} 3108 ins_pipe(pipe_slow); 3109 %} 3110 3111 instruct subD_reg_mem(regD dst, regD src1, memory src2) %{ 3112 predicate(UseAVX > 0); 3113 match(Set dst (SubD src1 (LoadD src2))); 3114 3115 format %{ "vsubsd $dst, $src1, $src2" %} 3116 ins_cost(150); 3117 ins_encode %{ 3118 __ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3119 %} 3120 ins_pipe(pipe_slow); 3121 %} 3122 3123 instruct subD_reg_imm(regD dst, regD src, immD con) %{ 3124 predicate(UseAVX > 0); 3125 match(Set dst (SubD src con)); 3126 3127 format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3128 ins_cost(150); 3129 ins_encode %{ 3130 __ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3131 %} 3132 ins_pipe(pipe_slow); 3133 %} 3134 3135 instruct mulF_reg(regF dst, regF src) %{ 3136 predicate((UseSSE>=1) && (UseAVX == 0)); 3137 match(Set dst (MulF dst src)); 3138 3139 format %{ "mulss $dst, $src" %} 3140 ins_cost(150); 3141 ins_encode %{ 3142 __ mulss($dst$$XMMRegister, $src$$XMMRegister); 3143 %} 3144 ins_pipe(pipe_slow); 3145 %} 3146 3147 instruct mulF_mem(regF dst, memory src) %{ 3148 predicate((UseSSE>=1) && (UseAVX == 0)); 3149 match(Set dst (MulF dst (LoadF src))); 3150 3151 format %{ "mulss $dst, $src" %} 3152 ins_cost(150); 3153 ins_encode %{ 3154 __ mulss($dst$$XMMRegister, $src$$Address); 3155 %} 3156 ins_pipe(pipe_slow); 3157 %} 3158 3159 instruct mulF_imm(regF dst, immF con) %{ 3160 predicate((UseSSE>=1) && (UseAVX == 0)); 3161 match(Set dst (MulF dst con)); 3162 format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3163 ins_cost(150); 3164 ins_encode %{ 3165 __ mulss($dst$$XMMRegister, $constantaddress($con)); 3166 %} 3167 ins_pipe(pipe_slow); 3168 %} 3169 3170 instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ 3171 predicate(UseAVX > 0); 3172 match(Set dst (MulF src1 src2)); 3173 3174 format %{ "vmulss $dst, $src1, $src2" %} 3175 ins_cost(150); 3176 ins_encode %{ 3177 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3178 %} 3179 ins_pipe(pipe_slow); 3180 %} 3181 3182 instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{ 3183 predicate(UseAVX > 0); 3184 match(Set dst (MulF src1 (LoadF src2))); 3185 3186 format %{ "vmulss $dst, $src1, $src2" %} 3187 ins_cost(150); 3188 ins_encode %{ 3189 __ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3190 %} 3191 ins_pipe(pipe_slow); 3192 %} 3193 3194 instruct mulF_reg_imm(regF dst, regF src, immF con) %{ 3195 predicate(UseAVX > 0); 3196 match(Set dst (MulF src con)); 3197 3198 format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3199 ins_cost(150); 3200 ins_encode %{ 3201 __ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3202 %} 3203 ins_pipe(pipe_slow); 3204 %} 3205 3206 instruct mulD_reg(regD dst, regD src) %{ 3207 predicate((UseSSE>=2) && (UseAVX == 0)); 3208 match(Set dst (MulD dst src)); 3209 3210 format %{ "mulsd $dst, $src" %} 3211 ins_cost(150); 3212 ins_encode %{ 3213 __ mulsd($dst$$XMMRegister, $src$$XMMRegister); 3214 %} 3215 ins_pipe(pipe_slow); 3216 %} 3217 3218 instruct mulD_mem(regD dst, memory src) %{ 3219 predicate((UseSSE>=2) && (UseAVX == 0)); 3220 match(Set dst (MulD dst (LoadD src))); 3221 3222 format %{ "mulsd $dst, $src" %} 3223 ins_cost(150); 3224 ins_encode %{ 3225 __ mulsd($dst$$XMMRegister, $src$$Address); 3226 %} 3227 ins_pipe(pipe_slow); 3228 %} 3229 3230 instruct mulD_imm(regD dst, immD con) %{ 3231 predicate((UseSSE>=2) && (UseAVX == 0)); 3232 match(Set dst (MulD dst con)); 3233 format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3234 ins_cost(150); 3235 ins_encode %{ 3236 __ mulsd($dst$$XMMRegister, $constantaddress($con)); 3237 %} 3238 ins_pipe(pipe_slow); 3239 %} 3240 3241 instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ 3242 predicate(UseAVX > 0); 3243 match(Set dst (MulD src1 src2)); 3244 3245 format %{ "vmulsd $dst, $src1, $src2" %} 3246 ins_cost(150); 3247 ins_encode %{ 3248 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3249 %} 3250 ins_pipe(pipe_slow); 3251 %} 3252 3253 instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{ 3254 predicate(UseAVX > 0); 3255 match(Set dst (MulD src1 (LoadD src2))); 3256 3257 format %{ "vmulsd $dst, $src1, $src2" %} 3258 ins_cost(150); 3259 ins_encode %{ 3260 __ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3261 %} 3262 ins_pipe(pipe_slow); 3263 %} 3264 3265 instruct mulD_reg_imm(regD dst, regD src, immD con) %{ 3266 predicate(UseAVX > 0); 3267 match(Set dst (MulD src con)); 3268 3269 format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3270 ins_cost(150); 3271 ins_encode %{ 3272 __ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3273 %} 3274 ins_pipe(pipe_slow); 3275 %} 3276 3277 instruct divF_reg(regF dst, regF src) %{ 3278 predicate((UseSSE>=1) && (UseAVX == 0)); 3279 match(Set dst (DivF dst src)); 3280 3281 format %{ "divss $dst, $src" %} 3282 ins_cost(150); 3283 ins_encode %{ 3284 __ divss($dst$$XMMRegister, $src$$XMMRegister); 3285 %} 3286 ins_pipe(pipe_slow); 3287 %} 3288 3289 instruct divF_mem(regF dst, memory src) %{ 3290 predicate((UseSSE>=1) && (UseAVX == 0)); 3291 match(Set dst (DivF dst (LoadF src))); 3292 3293 format %{ "divss $dst, $src" %} 3294 ins_cost(150); 3295 ins_encode %{ 3296 __ divss($dst$$XMMRegister, $src$$Address); 3297 %} 3298 ins_pipe(pipe_slow); 3299 %} 3300 3301 instruct divF_imm(regF dst, immF con) %{ 3302 predicate((UseSSE>=1) && (UseAVX == 0)); 3303 match(Set dst (DivF dst con)); 3304 format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %} 3305 ins_cost(150); 3306 ins_encode %{ 3307 __ divss($dst$$XMMRegister, $constantaddress($con)); 3308 %} 3309 ins_pipe(pipe_slow); 3310 %} 3311 3312 instruct divF_reg_reg(regF dst, regF src1, regF src2) %{ 3313 predicate(UseAVX > 0); 3314 match(Set dst (DivF src1 src2)); 3315 3316 format %{ "vdivss $dst, $src1, $src2" %} 3317 ins_cost(150); 3318 ins_encode %{ 3319 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3320 %} 3321 ins_pipe(pipe_slow); 3322 %} 3323 3324 instruct divF_reg_mem(regF dst, regF src1, memory src2) %{ 3325 predicate(UseAVX > 0); 3326 match(Set dst (DivF src1 (LoadF src2))); 3327 3328 format %{ "vdivss $dst, $src1, $src2" %} 3329 ins_cost(150); 3330 ins_encode %{ 3331 __ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3332 %} 3333 ins_pipe(pipe_slow); 3334 %} 3335 3336 instruct divF_reg_imm(regF dst, regF src, immF con) %{ 3337 predicate(UseAVX > 0); 3338 match(Set dst (DivF src con)); 3339 3340 format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %} 3341 ins_cost(150); 3342 ins_encode %{ 3343 __ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3344 %} 3345 ins_pipe(pipe_slow); 3346 %} 3347 3348 instruct divD_reg(regD dst, regD src) %{ 3349 predicate((UseSSE>=2) && (UseAVX == 0)); 3350 match(Set dst (DivD dst src)); 3351 3352 format %{ "divsd $dst, $src" %} 3353 ins_cost(150); 3354 ins_encode %{ 3355 __ divsd($dst$$XMMRegister, $src$$XMMRegister); 3356 %} 3357 ins_pipe(pipe_slow); 3358 %} 3359 3360 instruct divD_mem(regD dst, memory src) %{ 3361 predicate((UseSSE>=2) && (UseAVX == 0)); 3362 match(Set dst (DivD dst (LoadD src))); 3363 3364 format %{ "divsd $dst, $src" %} 3365 ins_cost(150); 3366 ins_encode %{ 3367 __ divsd($dst$$XMMRegister, $src$$Address); 3368 %} 3369 ins_pipe(pipe_slow); 3370 %} 3371 3372 instruct divD_imm(regD dst, immD con) %{ 3373 predicate((UseSSE>=2) && (UseAVX == 0)); 3374 match(Set dst (DivD dst con)); 3375 format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %} 3376 ins_cost(150); 3377 ins_encode %{ 3378 __ divsd($dst$$XMMRegister, $constantaddress($con)); 3379 %} 3380 ins_pipe(pipe_slow); 3381 %} 3382 3383 instruct divD_reg_reg(regD dst, regD src1, regD src2) %{ 3384 predicate(UseAVX > 0); 3385 match(Set dst (DivD src1 src2)); 3386 3387 format %{ "vdivsd $dst, $src1, $src2" %} 3388 ins_cost(150); 3389 ins_encode %{ 3390 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister); 3391 %} 3392 ins_pipe(pipe_slow); 3393 %} 3394 3395 instruct divD_reg_mem(regD dst, regD src1, memory src2) %{ 3396 predicate(UseAVX > 0); 3397 match(Set dst (DivD src1 (LoadD src2))); 3398 3399 format %{ "vdivsd $dst, $src1, $src2" %} 3400 ins_cost(150); 3401 ins_encode %{ 3402 __ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address); 3403 %} 3404 ins_pipe(pipe_slow); 3405 %} 3406 3407 instruct divD_reg_imm(regD dst, regD src, immD con) %{ 3408 predicate(UseAVX > 0); 3409 match(Set dst (DivD src con)); 3410 3411 format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %} 3412 ins_cost(150); 3413 ins_encode %{ 3414 __ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con)); 3415 %} 3416 ins_pipe(pipe_slow); 3417 %} 3418 3419 instruct absF_reg(regF dst) %{ 3420 predicate((UseSSE>=1) && (UseAVX == 0)); 3421 match(Set dst (AbsF dst)); 3422 ins_cost(150); 3423 format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %} 3424 ins_encode %{ 3425 __ andps($dst$$XMMRegister, ExternalAddress(float_signmask())); 3426 %} 3427 ins_pipe(pipe_slow); 3428 %} 3429 3430 instruct absF_reg_reg(vlRegF dst, vlRegF src) %{ 3431 predicate(UseAVX > 0); 3432 match(Set dst (AbsF src)); 3433 ins_cost(150); 3434 format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %} 3435 ins_encode %{ 3436 int vlen_enc = Assembler::AVX_128bit; 3437 __ vandps($dst$$XMMRegister, $src$$XMMRegister, 3438 ExternalAddress(float_signmask()), vlen_enc); 3439 %} 3440 ins_pipe(pipe_slow); 3441 %} 3442 3443 instruct absD_reg(regD dst) %{ 3444 predicate((UseSSE>=2) && (UseAVX == 0)); 3445 match(Set dst (AbsD dst)); 3446 ins_cost(150); 3447 format %{ "andpd $dst, [0x7fffffffffffffff]\t" 3448 "# abs double by sign masking" %} 3449 ins_encode %{ 3450 __ andpd($dst$$XMMRegister, ExternalAddress(double_signmask())); 3451 %} 3452 ins_pipe(pipe_slow); 3453 %} 3454 3455 instruct absD_reg_reg(vlRegD dst, vlRegD src) %{ 3456 predicate(UseAVX > 0); 3457 match(Set dst (AbsD src)); 3458 ins_cost(150); 3459 format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t" 3460 "# abs double by sign masking" %} 3461 ins_encode %{ 3462 int vlen_enc = Assembler::AVX_128bit; 3463 __ vandpd($dst$$XMMRegister, $src$$XMMRegister, 3464 ExternalAddress(double_signmask()), vlen_enc); 3465 %} 3466 ins_pipe(pipe_slow); 3467 %} 3468 3469 instruct negF_reg(regF dst) %{ 3470 predicate((UseSSE>=1) && (UseAVX == 0)); 3471 match(Set dst (NegF dst)); 3472 ins_cost(150); 3473 format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %} 3474 ins_encode %{ 3475 __ xorps($dst$$XMMRegister, ExternalAddress(float_signflip())); 3476 %} 3477 ins_pipe(pipe_slow); 3478 %} 3479 3480 instruct negF_reg_reg(vlRegF dst, vlRegF src) %{ 3481 predicate(UseAVX > 0); 3482 match(Set dst (NegF src)); 3483 ins_cost(150); 3484 format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %} 3485 ins_encode %{ 3486 __ vnegatess($dst$$XMMRegister, $src$$XMMRegister, 3487 ExternalAddress(float_signflip())); 3488 %} 3489 ins_pipe(pipe_slow); 3490 %} 3491 3492 instruct negD_reg(regD dst) %{ 3493 predicate((UseSSE>=2) && (UseAVX == 0)); 3494 match(Set dst (NegD dst)); 3495 ins_cost(150); 3496 format %{ "xorpd $dst, [0x8000000000000000]\t" 3497 "# neg double by sign flipping" %} 3498 ins_encode %{ 3499 __ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip())); 3500 %} 3501 ins_pipe(pipe_slow); 3502 %} 3503 3504 instruct negD_reg_reg(vlRegD dst, vlRegD src) %{ 3505 predicate(UseAVX > 0); 3506 match(Set dst (NegD src)); 3507 ins_cost(150); 3508 format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t" 3509 "# neg double by sign flipping" %} 3510 ins_encode %{ 3511 __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister, 3512 ExternalAddress(double_signflip())); 3513 %} 3514 ins_pipe(pipe_slow); 3515 %} 3516 3517 // sqrtss instruction needs destination register to be pre initialized for best performance 3518 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3519 instruct sqrtF_reg(regF dst) %{ 3520 predicate(UseSSE>=1); 3521 match(Set dst (SqrtF dst)); 3522 format %{ "sqrtss $dst, $dst" %} 3523 ins_encode %{ 3524 __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister); 3525 %} 3526 ins_pipe(pipe_slow); 3527 %} 3528 3529 // sqrtsd instruction needs destination register to be pre initialized for best performance 3530 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below 3531 instruct sqrtD_reg(regD dst) %{ 3532 predicate(UseSSE>=2); 3533 match(Set dst (SqrtD dst)); 3534 format %{ "sqrtsd $dst, $dst" %} 3535 ins_encode %{ 3536 __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister); 3537 %} 3538 ins_pipe(pipe_slow); 3539 %} 3540 3541 3542 // ---------------------------------------- VectorReinterpret ------------------------------------ 3543 instruct reinterpret_mask(kReg dst) %{ 3544 predicate(n->bottom_type()->isa_vectmask() && 3545 Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src 3546 match(Set dst (VectorReinterpret dst)); 3547 ins_cost(125); 3548 format %{ "vector_reinterpret $dst\t!" %} 3549 ins_encode %{ 3550 // empty 3551 %} 3552 ins_pipe( pipe_slow ); 3553 %} 3554 3555 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{ 3556 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3557 n->bottom_type()->isa_vectmask() && 3558 n->in(1)->bottom_type()->isa_vectmask() && 3559 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT && 3560 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3561 match(Set dst (VectorReinterpret src)); 3562 effect(TEMP xtmp); 3563 format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %} 3564 ins_encode %{ 3565 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT); 3566 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3567 assert(src_sz == dst_sz , "src and dst size mismatch"); 3568 int vlen_enc = vector_length_encoding(src_sz); 3569 __ evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3570 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3571 %} 3572 ins_pipe( pipe_slow ); 3573 %} 3574 3575 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{ 3576 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3577 n->bottom_type()->isa_vectmask() && 3578 n->in(1)->bottom_type()->isa_vectmask() && 3579 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT || 3580 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) && 3581 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3582 match(Set dst (VectorReinterpret src)); 3583 effect(TEMP xtmp); 3584 format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %} 3585 ins_encode %{ 3586 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT); 3587 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3588 assert(src_sz == dst_sz , "src and dst size mismatch"); 3589 int vlen_enc = vector_length_encoding(src_sz); 3590 __ evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3591 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3592 %} 3593 ins_pipe( pipe_slow ); 3594 %} 3595 3596 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{ 3597 predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) && 3598 n->bottom_type()->isa_vectmask() && 3599 n->in(1)->bottom_type()->isa_vectmask() && 3600 (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG || 3601 n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) && 3602 n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src 3603 match(Set dst (VectorReinterpret src)); 3604 effect(TEMP xtmp); 3605 format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %} 3606 ins_encode %{ 3607 int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG); 3608 int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE); 3609 assert(src_sz == dst_sz , "src and dst size mismatch"); 3610 int vlen_enc = vector_length_encoding(src_sz); 3611 __ evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc); 3612 __ evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc); 3613 %} 3614 ins_pipe( pipe_slow ); 3615 %} 3616 3617 instruct reinterpret(vec dst) %{ 3618 predicate(!n->bottom_type()->isa_vectmask() && 3619 Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src 3620 match(Set dst (VectorReinterpret dst)); 3621 ins_cost(125); 3622 format %{ "vector_reinterpret $dst\t!" %} 3623 ins_encode %{ 3624 // empty 3625 %} 3626 ins_pipe( pipe_slow ); 3627 %} 3628 3629 instruct reinterpret_expand(vec dst, vec src, rRegP scratch) %{ 3630 predicate(UseAVX == 0 && 3631 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3632 match(Set dst (VectorReinterpret src)); 3633 ins_cost(125); 3634 effect(TEMP dst, TEMP scratch); 3635 format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %} 3636 ins_encode %{ 3637 assert(Matcher::vector_length_in_bytes(this) <= 16, "required"); 3638 assert(Matcher::vector_length_in_bytes(this, $src) <= 8, "required"); 3639 3640 int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src); 3641 if (src_vlen_in_bytes == 4) { 3642 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register); 3643 } else { 3644 assert(src_vlen_in_bytes == 8, ""); 3645 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), $scratch$$Register); 3646 } 3647 __ pand($dst$$XMMRegister, $src$$XMMRegister); 3648 %} 3649 ins_pipe( pipe_slow ); 3650 %} 3651 3652 instruct vreinterpret_expand4(legVec dst, vec src, rRegP scratch) %{ 3653 predicate(UseAVX > 0 && 3654 !n->bottom_type()->isa_vectmask() && 3655 (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src 3656 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3657 match(Set dst (VectorReinterpret src)); 3658 ins_cost(125); 3659 effect(TEMP scratch); 3660 format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %} 3661 ins_encode %{ 3662 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, $scratch$$Register); 3663 %} 3664 ins_pipe( pipe_slow ); 3665 %} 3666 3667 3668 instruct vreinterpret_expand(legVec dst, vec src) %{ 3669 predicate(UseAVX > 0 && 3670 !n->bottom_type()->isa_vectmask() && 3671 (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src 3672 (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst 3673 match(Set dst (VectorReinterpret src)); 3674 ins_cost(125); 3675 format %{ "vector_reinterpret_expand $dst,$src\t!" %} 3676 ins_encode %{ 3677 switch (Matcher::vector_length_in_bytes(this, $src)) { 3678 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3679 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3680 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3681 default: ShouldNotReachHere(); 3682 } 3683 %} 3684 ins_pipe( pipe_slow ); 3685 %} 3686 3687 instruct reinterpret_shrink(vec dst, legVec src) %{ 3688 predicate(!n->bottom_type()->isa_vectmask() && 3689 Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst 3690 match(Set dst (VectorReinterpret src)); 3691 ins_cost(125); 3692 format %{ "vector_reinterpret_shrink $dst,$src\t!" %} 3693 ins_encode %{ 3694 switch (Matcher::vector_length_in_bytes(this)) { 3695 case 4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break; 3696 case 8: __ movq ($dst$$XMMRegister, $src$$XMMRegister); break; 3697 case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break; 3698 case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break; 3699 default: ShouldNotReachHere(); 3700 } 3701 %} 3702 ins_pipe( pipe_slow ); 3703 %} 3704 3705 // ---------------------------------------------------------------------------------------------------- 3706 3707 #ifdef _LP64 3708 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{ 3709 match(Set dst (RoundDoubleMode src rmode)); 3710 format %{ "roundsd $dst,$src" %} 3711 ins_cost(150); 3712 ins_encode %{ 3713 assert(UseSSE >= 4, "required"); 3714 __ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant); 3715 %} 3716 ins_pipe(pipe_slow); 3717 %} 3718 3719 instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{ 3720 match(Set dst (RoundDoubleMode (LoadD src) rmode)); 3721 format %{ "roundsd $dst,$src" %} 3722 ins_cost(150); 3723 ins_encode %{ 3724 assert(UseSSE >= 4, "required"); 3725 __ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant); 3726 %} 3727 ins_pipe(pipe_slow); 3728 %} 3729 3730 instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{ 3731 match(Set dst (RoundDoubleMode con rmode)); 3732 effect(TEMP scratch_reg); 3733 format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %} 3734 ins_cost(150); 3735 ins_encode %{ 3736 assert(UseSSE >= 4, "required"); 3737 __ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, $scratch_reg$$Register); 3738 %} 3739 ins_pipe(pipe_slow); 3740 %} 3741 3742 instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{ 3743 predicate(Matcher::vector_length(n) < 8); 3744 match(Set dst (RoundDoubleModeV src rmode)); 3745 format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %} 3746 ins_encode %{ 3747 assert(UseAVX > 0, "required"); 3748 int vlen_enc = vector_length_encoding(this); 3749 __ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vlen_enc); 3750 %} 3751 ins_pipe( pipe_slow ); 3752 %} 3753 3754 instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{ 3755 predicate(Matcher::vector_length(n) == 8); 3756 match(Set dst (RoundDoubleModeV src rmode)); 3757 format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %} 3758 ins_encode %{ 3759 assert(UseAVX > 2, "required"); 3760 __ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit); 3761 %} 3762 ins_pipe( pipe_slow ); 3763 %} 3764 3765 instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{ 3766 predicate(Matcher::vector_length(n) < 8); 3767 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3768 format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %} 3769 ins_encode %{ 3770 assert(UseAVX > 0, "required"); 3771 int vlen_enc = vector_length_encoding(this); 3772 __ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vlen_enc); 3773 %} 3774 ins_pipe( pipe_slow ); 3775 %} 3776 3777 instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{ 3778 predicate(Matcher::vector_length(n) == 8); 3779 match(Set dst (RoundDoubleModeV (LoadVector mem) rmode)); 3780 format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %} 3781 ins_encode %{ 3782 assert(UseAVX > 2, "required"); 3783 __ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit); 3784 %} 3785 ins_pipe( pipe_slow ); 3786 %} 3787 #endif // _LP64 3788 3789 instruct onspinwait() %{ 3790 match(OnSpinWait); 3791 ins_cost(200); 3792 3793 format %{ 3794 $$template 3795 $$emit$$"pause\t! membar_onspinwait" 3796 %} 3797 ins_encode %{ 3798 __ pause(); 3799 %} 3800 ins_pipe(pipe_slow); 3801 %} 3802 3803 // a * b + c 3804 instruct fmaD_reg(regD a, regD b, regD c) %{ 3805 predicate(UseFMA); 3806 match(Set c (FmaD c (Binary a b))); 3807 format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %} 3808 ins_cost(150); 3809 ins_encode %{ 3810 __ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3811 %} 3812 ins_pipe( pipe_slow ); 3813 %} 3814 3815 // a * b + c 3816 instruct fmaF_reg(regF a, regF b, regF c) %{ 3817 predicate(UseFMA); 3818 match(Set c (FmaF c (Binary a b))); 3819 format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %} 3820 ins_cost(150); 3821 ins_encode %{ 3822 __ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister); 3823 %} 3824 ins_pipe( pipe_slow ); 3825 %} 3826 3827 // ====================VECTOR INSTRUCTIONS===================================== 3828 3829 // Dummy reg-to-reg vector moves. Removed during post-selection cleanup. 3830 instruct MoveVec2Leg(legVec dst, vec src) %{ 3831 match(Set dst src); 3832 format %{ "" %} 3833 ins_encode %{ 3834 ShouldNotReachHere(); 3835 %} 3836 ins_pipe( fpu_reg_reg ); 3837 %} 3838 3839 instruct MoveLeg2Vec(vec dst, legVec src) %{ 3840 match(Set dst src); 3841 format %{ "" %} 3842 ins_encode %{ 3843 ShouldNotReachHere(); 3844 %} 3845 ins_pipe( fpu_reg_reg ); 3846 %} 3847 3848 // ============================================================================ 3849 3850 // Load vectors generic operand pattern 3851 instruct loadV(vec dst, memory mem) %{ 3852 match(Set dst (LoadVector mem)); 3853 ins_cost(125); 3854 format %{ "load_vector $dst,$mem" %} 3855 ins_encode %{ 3856 __ load_vector($dst$$XMMRegister, $mem$$Address, Matcher::vector_length_in_bytes(this)); 3857 %} 3858 ins_pipe( pipe_slow ); 3859 %} 3860 3861 // Store vectors generic operand pattern. 3862 instruct storeV(memory mem, vec src) %{ 3863 match(Set mem (StoreVector mem src)); 3864 ins_cost(145); 3865 format %{ "store_vector $mem,$src\n\t" %} 3866 ins_encode %{ 3867 switch (Matcher::vector_length_in_bytes(this, $src)) { 3868 case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break; 3869 case 8: __ movq ($mem$$Address, $src$$XMMRegister); break; 3870 case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break; 3871 case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break; 3872 case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break; 3873 default: ShouldNotReachHere(); 3874 } 3875 %} 3876 ins_pipe( pipe_slow ); 3877 %} 3878 3879 // ---------------------------------------- Gather ------------------------------------ 3880 3881 // Gather INT, LONG, FLOAT, DOUBLE 3882 3883 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{ 3884 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32); 3885 match(Set dst (LoadVectorGather mem idx)); 3886 effect(TEMP dst, TEMP tmp, TEMP mask); 3887 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %} 3888 ins_encode %{ 3889 assert(UseAVX >= 2, "sanity"); 3890 3891 int vlen_enc = vector_length_encoding(this); 3892 BasicType elem_bt = Matcher::vector_element_basic_type(this); 3893 3894 assert(Matcher::vector_length_in_bytes(this) >= 16, "sanity"); 3895 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3896 3897 if (vlen_enc == Assembler::AVX_128bit) { 3898 __ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set())); 3899 } else { 3900 __ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set())); 3901 } 3902 __ lea($tmp$$Register, $mem$$Address); 3903 __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc); 3904 %} 3905 ins_pipe( pipe_slow ); 3906 %} 3907 3908 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{ 3909 predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64); 3910 match(Set dst (LoadVectorGather mem idx)); 3911 effect(TEMP dst, TEMP tmp, TEMP ktmp); 3912 format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %} 3913 ins_encode %{ 3914 assert(UseAVX > 2, "sanity"); 3915 3916 int vlen_enc = vector_length_encoding(this); 3917 BasicType elem_bt = Matcher::vector_element_basic_type(this); 3918 3919 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3920 3921 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register); 3922 __ lea($tmp$$Register, $mem$$Address); 3923 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 3924 %} 3925 ins_pipe( pipe_slow ); 3926 %} 3927 3928 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 3929 match(Set dst (LoadVectorGatherMasked mem (Binary idx mask))); 3930 effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp); 3931 format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %} 3932 ins_encode %{ 3933 assert(UseAVX > 2, "sanity"); 3934 int vlen_enc = vector_length_encoding(this); 3935 BasicType elem_bt = Matcher::vector_element_basic_type(this); 3936 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3937 // Note: Since gather instruction partially updates the opmask register used 3938 // for predication hense moving mask operand to a temporary. 3939 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 3940 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 3941 __ lea($tmp$$Register, $mem$$Address); 3942 __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc); 3943 %} 3944 ins_pipe( pipe_slow ); 3945 %} 3946 // ====================Scatter======================================= 3947 3948 // Scatter INT, LONG, FLOAT, DOUBLE 3949 3950 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{ 3951 predicate(UseAVX > 2); 3952 match(Set mem (StoreVectorScatter mem (Binary src idx))); 3953 effect(TEMP tmp, TEMP ktmp); 3954 format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %} 3955 ins_encode %{ 3956 int vlen_enc = vector_length_encoding(this, $src); 3957 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 3958 3959 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 3960 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3961 3962 __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register); 3963 __ lea($tmp$$Register, $mem$$Address); 3964 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 3965 %} 3966 ins_pipe( pipe_slow ); 3967 %} 3968 3969 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{ 3970 match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask)))); 3971 effect(TEMP tmp, TEMP ktmp); 3972 format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %} 3973 ins_encode %{ 3974 int vlen_enc = vector_length_encoding(this, $src); 3975 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 3976 assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity"); 3977 assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE 3978 // Note: Since scatter instruction partially updates the opmask register used 3979 // for predication hense moving mask operand to a temporary. 3980 __ kmovwl($ktmp$$KRegister, $mask$$KRegister); 3981 __ lea($tmp$$Register, $mem$$Address); 3982 __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc); 3983 %} 3984 ins_pipe( pipe_slow ); 3985 %} 3986 3987 // ====================REPLICATE======================================= 3988 3989 // Replicate byte scalar to be vector 3990 instruct ReplB_reg(vec dst, rRegI src) %{ 3991 match(Set dst (ReplicateB src)); 3992 format %{ "replicateB $dst,$src" %} 3993 ins_encode %{ 3994 uint vlen = Matcher::vector_length(this); 3995 if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 3996 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW 3997 int vlen_enc = vector_length_encoding(this); 3998 __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc); 3999 } else if (VM_Version::supports_avx2()) { 4000 int vlen_enc = vector_length_encoding(this); 4001 __ movdl($dst$$XMMRegister, $src$$Register); 4002 __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4003 } else { 4004 __ movdl($dst$$XMMRegister, $src$$Register); 4005 __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister); 4006 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4007 if (vlen >= 16) { 4008 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4009 if (vlen >= 32) { 4010 assert(vlen == 32, "sanity"); 4011 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4012 } 4013 } 4014 } 4015 %} 4016 ins_pipe( pipe_slow ); 4017 %} 4018 4019 instruct ReplB_mem(vec dst, memory mem) %{ 4020 predicate(VM_Version::supports_avx2()); 4021 match(Set dst (ReplicateB (LoadB mem))); 4022 format %{ "replicateB $dst,$mem" %} 4023 ins_encode %{ 4024 int vlen_enc = vector_length_encoding(this); 4025 __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vlen_enc); 4026 %} 4027 ins_pipe( pipe_slow ); 4028 %} 4029 4030 instruct ReplB_imm(vec dst, immI con) %{ 4031 match(Set dst (ReplicateB con)); 4032 format %{ "replicateB $dst,$con" %} 4033 ins_encode %{ 4034 InternalAddress addr = $constantaddress(T_BYTE, vreplicate_imm(T_BYTE, $con$$constant, Matcher::vector_length(this))); 4035 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4036 %} 4037 ins_pipe( pipe_slow ); 4038 %} 4039 4040 // ====================ReplicateS======================================= 4041 4042 instruct ReplS_reg(vec dst, rRegI src) %{ 4043 match(Set dst (ReplicateS src)); 4044 format %{ "replicateS $dst,$src" %} 4045 ins_encode %{ 4046 uint vlen = Matcher::vector_length(this); 4047 if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands 4048 assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW 4049 int vlen_enc = vector_length_encoding(this); 4050 __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc); 4051 } else if (VM_Version::supports_avx2()) { 4052 int vlen_enc = vector_length_encoding(this); 4053 __ movdl($dst$$XMMRegister, $src$$Register); 4054 __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4055 } else { 4056 __ movdl($dst$$XMMRegister, $src$$Register); 4057 __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4058 if (vlen >= 8) { 4059 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4060 if (vlen >= 16) { 4061 assert(vlen == 16, "sanity"); 4062 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4063 } 4064 } 4065 } 4066 %} 4067 ins_pipe( pipe_slow ); 4068 %} 4069 4070 instruct ReplS_mem(vec dst, memory mem) %{ 4071 predicate(VM_Version::supports_avx2()); 4072 match(Set dst (ReplicateS (LoadS mem))); 4073 format %{ "replicateS $dst,$mem" %} 4074 ins_encode %{ 4075 int vlen_enc = vector_length_encoding(this); 4076 __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc); 4077 %} 4078 ins_pipe( pipe_slow ); 4079 %} 4080 4081 instruct ReplS_imm(vec dst, immI con) %{ 4082 match(Set dst (ReplicateS con)); 4083 format %{ "replicateS $dst,$con" %} 4084 ins_encode %{ 4085 InternalAddress addr = $constantaddress(T_SHORT, vreplicate_imm(T_SHORT, $con$$constant, Matcher::vector_length(this))); 4086 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4087 %} 4088 ins_pipe( pipe_slow ); 4089 %} 4090 4091 // ====================ReplicateI======================================= 4092 4093 instruct ReplI_reg(vec dst, rRegI src) %{ 4094 match(Set dst (ReplicateI src)); 4095 format %{ "replicateI $dst,$src" %} 4096 ins_encode %{ 4097 uint vlen = Matcher::vector_length(this); 4098 if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4099 int vlen_enc = vector_length_encoding(this); 4100 __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc); 4101 } else if (VM_Version::supports_avx2()) { 4102 int vlen_enc = vector_length_encoding(this); 4103 __ movdl($dst$$XMMRegister, $src$$Register); 4104 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4105 } else { 4106 __ movdl($dst$$XMMRegister, $src$$Register); 4107 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4108 if (vlen >= 8) { 4109 assert(vlen == 8, "sanity"); 4110 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4111 } 4112 } 4113 %} 4114 ins_pipe( pipe_slow ); 4115 %} 4116 4117 instruct ReplI_mem(vec dst, memory mem) %{ 4118 match(Set dst (ReplicateI (LoadI mem))); 4119 format %{ "replicateI $dst,$mem" %} 4120 ins_encode %{ 4121 uint vlen = Matcher::vector_length(this); 4122 if (vlen <= 4) { 4123 __ movdl($dst$$XMMRegister, $mem$$Address); 4124 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4125 } else { 4126 assert(VM_Version::supports_avx2(), "sanity"); 4127 int vlen_enc = vector_length_encoding(this); 4128 __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4129 } 4130 %} 4131 ins_pipe( pipe_slow ); 4132 %} 4133 4134 instruct ReplI_imm(vec dst, immI con) %{ 4135 match(Set dst (ReplicateI con)); 4136 format %{ "replicateI $dst,$con" %} 4137 ins_encode %{ 4138 InternalAddress addr = $constantaddress(T_INT, vreplicate_imm(T_INT, $con$$constant, Matcher::vector_length(this))); 4139 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4140 %} 4141 ins_pipe( pipe_slow ); 4142 %} 4143 4144 // Replicate scalar zero to be vector 4145 instruct ReplI_zero(vec dst, immI_0 zero) %{ 4146 match(Set dst (ReplicateB zero)); 4147 match(Set dst (ReplicateS zero)); 4148 match(Set dst (ReplicateI zero)); 4149 format %{ "replicateI $dst,$zero" %} 4150 ins_encode %{ 4151 uint vsize = Matcher::vector_length_in_bytes(this); 4152 if (vsize <= 16) { 4153 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4154 } else { 4155 int vlen_enc = vector_length_encoding(this); 4156 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4157 } 4158 %} 4159 ins_pipe( fpu_reg_reg ); 4160 %} 4161 4162 instruct ReplI_M1(vec dst, immI_M1 con) %{ 4163 predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) >= 16); 4164 match(Set dst (ReplicateB con)); 4165 match(Set dst (ReplicateS con)); 4166 match(Set dst (ReplicateI con)); 4167 effect(TEMP dst); 4168 format %{ "vallones $dst" %} 4169 ins_encode %{ 4170 int vector_len = vector_length_encoding(this); 4171 __ vallones($dst$$XMMRegister, vector_len); 4172 %} 4173 ins_pipe( pipe_slow ); 4174 %} 4175 4176 // ====================ReplicateL======================================= 4177 4178 #ifdef _LP64 4179 // Replicate long (8 byte) scalar to be vector 4180 instruct ReplL_reg(vec dst, rRegL src) %{ 4181 match(Set dst (ReplicateL src)); 4182 format %{ "replicateL $dst,$src" %} 4183 ins_encode %{ 4184 uint vlen = Matcher::vector_length(this); 4185 if (vlen == 2) { 4186 __ movdq($dst$$XMMRegister, $src$$Register); 4187 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4188 } else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4189 int vlen_enc = vector_length_encoding(this); 4190 __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc); 4191 } else if (VM_Version::supports_avx2()) { 4192 assert(vlen == 4, "sanity"); 4193 int vlen_enc = vector_length_encoding(this); 4194 __ movdq($dst$$XMMRegister, $src$$Register); 4195 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4196 } else { 4197 assert(vlen == 4, "sanity"); 4198 __ movdq($dst$$XMMRegister, $src$$Register); 4199 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4200 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4201 } 4202 %} 4203 ins_pipe( pipe_slow ); 4204 %} 4205 #else // _LP64 4206 // Replicate long (8 byte) scalar to be vector 4207 instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{ 4208 predicate(Matcher::vector_length(n) <= 4); 4209 match(Set dst (ReplicateL src)); 4210 effect(TEMP dst, USE src, TEMP tmp); 4211 format %{ "replicateL $dst,$src" %} 4212 ins_encode %{ 4213 uint vlen = Matcher::vector_length(this); 4214 if (vlen == 2) { 4215 __ movdl($dst$$XMMRegister, $src$$Register); 4216 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4217 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4218 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4219 } else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands 4220 int vlen_enc = Assembler::AVX_256bit; 4221 __ movdl($dst$$XMMRegister, $src$$Register); 4222 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4223 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4224 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4225 } else { 4226 __ movdl($dst$$XMMRegister, $src$$Register); 4227 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4228 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4229 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4230 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4231 } 4232 %} 4233 ins_pipe( pipe_slow ); 4234 %} 4235 4236 instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{ 4237 predicate(Matcher::vector_length(n) == 8); 4238 match(Set dst (ReplicateL src)); 4239 effect(TEMP dst, USE src, TEMP tmp); 4240 format %{ "replicateL $dst,$src" %} 4241 ins_encode %{ 4242 if (VM_Version::supports_avx512vl()) { 4243 __ movdl($dst$$XMMRegister, $src$$Register); 4244 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4245 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4246 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4247 __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister); 4248 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1); 4249 } else { 4250 int vlen_enc = Assembler::AVX_512bit; 4251 __ movdl($dst$$XMMRegister, $src$$Register); 4252 __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register)); 4253 __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister); 4254 __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4255 } 4256 %} 4257 ins_pipe( pipe_slow ); 4258 %} 4259 #endif // _LP64 4260 4261 instruct ReplL_mem(vec dst, memory mem) %{ 4262 match(Set dst (ReplicateL (LoadL mem))); 4263 format %{ "replicateL $dst,$mem" %} 4264 ins_encode %{ 4265 uint vlen = Matcher::vector_length(this); 4266 if (vlen == 2) { 4267 __ movq($dst$$XMMRegister, $mem$$Address); 4268 __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister); 4269 } else { 4270 assert(VM_Version::supports_avx2(), "sanity"); 4271 int vlen_enc = vector_length_encoding(this); 4272 __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc); 4273 } 4274 %} 4275 ins_pipe( pipe_slow ); 4276 %} 4277 4278 // Replicate long (8 byte) scalar immediate to be vector by loading from const table. 4279 instruct ReplL_imm(vec dst, immL con) %{ 4280 match(Set dst (ReplicateL con)); 4281 format %{ "replicateL $dst,$con" %} 4282 ins_encode %{ 4283 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, $con$$constant, Matcher::vector_length(this))); 4284 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4285 %} 4286 ins_pipe( pipe_slow ); 4287 %} 4288 4289 instruct ReplL_zero(vec dst, immL0 zero) %{ 4290 match(Set dst (ReplicateL zero)); 4291 format %{ "replicateL $dst,$zero" %} 4292 ins_encode %{ 4293 int vlen = Matcher::vector_length(this); 4294 if (vlen == 2) { 4295 __ pxor($dst$$XMMRegister, $dst$$XMMRegister); 4296 } else { 4297 int vlen_enc = vector_length_encoding(this); 4298 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 4299 } 4300 %} 4301 ins_pipe( fpu_reg_reg ); 4302 %} 4303 4304 instruct ReplL_M1(vec dst, immL_M1 con) %{ 4305 predicate(UseAVX > 0); 4306 match(Set dst (ReplicateL con)); 4307 effect(TEMP dst); 4308 format %{ "vallones $dst" %} 4309 ins_encode %{ 4310 int vector_len = vector_length_encoding(this); 4311 __ vallones($dst$$XMMRegister, vector_len); 4312 %} 4313 ins_pipe( pipe_slow ); 4314 %} 4315 4316 // ====================ReplicateF======================================= 4317 4318 instruct ReplF_reg(vec dst, vlRegF src) %{ 4319 match(Set dst (ReplicateF src)); 4320 format %{ "replicateF $dst,$src" %} 4321 ins_encode %{ 4322 uint vlen = Matcher::vector_length(this); 4323 if (vlen <= 4) { 4324 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4325 } else if (VM_Version::supports_avx2()) { 4326 int vlen_enc = vector_length_encoding(this); 4327 __ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4328 } else { 4329 assert(vlen == 8, "sanity"); 4330 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00); 4331 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4332 } 4333 %} 4334 ins_pipe( pipe_slow ); 4335 %} 4336 4337 instruct ReplF_mem(vec dst, memory mem) %{ 4338 match(Set dst (ReplicateF (LoadF mem))); 4339 format %{ "replicateF $dst,$mem" %} 4340 ins_encode %{ 4341 uint vlen = Matcher::vector_length(this); 4342 if (vlen <= 4) { 4343 __ movdl($dst$$XMMRegister, $mem$$Address); 4344 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00); 4345 } else { 4346 assert(VM_Version::supports_avx(), "sanity"); 4347 int vlen_enc = vector_length_encoding(this); 4348 __ vbroadcastss($dst$$XMMRegister, $mem$$Address, vlen_enc); 4349 } 4350 %} 4351 ins_pipe( pipe_slow ); 4352 %} 4353 4354 // Replicate float scalar immediate to be vector by loading from const table. 4355 instruct ReplF_imm(vec dst, immF con) %{ 4356 match(Set dst (ReplicateF con)); 4357 format %{ "replicateF $dst,$con" %} 4358 ins_encode %{ 4359 InternalAddress addr = $constantaddress(T_FLOAT, vreplicate_imm(T_FLOAT, $con$$constant, Matcher::vector_length(this))); 4360 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4361 %} 4362 ins_pipe( pipe_slow ); 4363 %} 4364 4365 instruct ReplF_zero(vec dst, immF0 zero) %{ 4366 match(Set dst (ReplicateF zero)); 4367 format %{ "replicateF $dst,$zero" %} 4368 ins_encode %{ 4369 uint vlen = Matcher::vector_length(this); 4370 if (vlen <= 4) { 4371 __ xorps($dst$$XMMRegister, $dst$$XMMRegister); 4372 } else { 4373 int vlen_enc = vector_length_encoding(this); 4374 __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 4375 } 4376 %} 4377 ins_pipe( fpu_reg_reg ); 4378 %} 4379 4380 // ====================ReplicateD======================================= 4381 4382 // Replicate double (8 bytes) scalar to be vector 4383 instruct ReplD_reg(vec dst, vlRegD src) %{ 4384 match(Set dst (ReplicateD src)); 4385 format %{ "replicateD $dst,$src" %} 4386 ins_encode %{ 4387 uint vlen = Matcher::vector_length(this); 4388 if (vlen == 2) { 4389 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4390 } else if (VM_Version::supports_avx2()) { 4391 int vlen_enc = vector_length_encoding(this); 4392 __ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); // reg-to-reg variant requires AVX2 4393 } else { 4394 assert(vlen == 4, "sanity"); 4395 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44); 4396 __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister); 4397 } 4398 %} 4399 ins_pipe( pipe_slow ); 4400 %} 4401 4402 instruct ReplD_mem(vec dst, memory mem) %{ 4403 match(Set dst (ReplicateD (LoadD mem))); 4404 format %{ "replicateD $dst,$mem" %} 4405 ins_encode %{ 4406 uint vlen = Matcher::vector_length(this); 4407 if (vlen == 2) { 4408 __ movq($dst$$XMMRegister, $mem$$Address); 4409 __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44); 4410 } else { 4411 assert(VM_Version::supports_avx(), "sanity"); 4412 int vlen_enc = vector_length_encoding(this); 4413 __ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vlen_enc); 4414 } 4415 %} 4416 ins_pipe( pipe_slow ); 4417 %} 4418 4419 // Replicate double (8 byte) scalar immediate to be vector by loading from const table. 4420 instruct ReplD_imm(vec dst, immD con) %{ 4421 match(Set dst (ReplicateD con)); 4422 format %{ "replicateD $dst,$con" %} 4423 ins_encode %{ 4424 InternalAddress addr = $constantaddress(T_DOUBLE, vreplicate_imm(T_DOUBLE, $con$$constant, Matcher::vector_length(this))); 4425 __ load_vector($dst$$XMMRegister, addr, Matcher::vector_length_in_bytes(this)); 4426 %} 4427 ins_pipe( pipe_slow ); 4428 %} 4429 4430 instruct ReplD_zero(vec dst, immD0 zero) %{ 4431 match(Set dst (ReplicateD zero)); 4432 format %{ "replicateD $dst,$zero" %} 4433 ins_encode %{ 4434 uint vlen = Matcher::vector_length(this); 4435 if (vlen == 2) { 4436 __ xorpd($dst$$XMMRegister, $dst$$XMMRegister); 4437 } else { 4438 int vlen_enc = vector_length_encoding(this); 4439 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ 4440 } 4441 %} 4442 ins_pipe( fpu_reg_reg ); 4443 %} 4444 4445 // ====================VECTOR INSERT======================================= 4446 4447 instruct insert(vec dst, rRegI val, immU8 idx) %{ 4448 predicate(Matcher::vector_length_in_bytes(n) < 32); 4449 match(Set dst (VectorInsert (Binary dst val) idx)); 4450 format %{ "vector_insert $dst,$val,$idx" %} 4451 ins_encode %{ 4452 assert(UseSSE >= 4, "required"); 4453 assert(Matcher::vector_length_in_bytes(this) >= 8, "required"); 4454 4455 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4456 4457 assert(is_integral_type(elem_bt), ""); 4458 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4459 4460 __ insert(elem_bt, $dst$$XMMRegister, $val$$Register, $idx$$constant); 4461 %} 4462 ins_pipe( pipe_slow ); 4463 %} 4464 4465 instruct insert32(vec dst, vec src, rRegI val, immU8 idx, vec vtmp) %{ 4466 predicate(Matcher::vector_length_in_bytes(n) == 32); 4467 match(Set dst (VectorInsert (Binary src val) idx)); 4468 effect(TEMP vtmp); 4469 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4470 ins_encode %{ 4471 int vlen_enc = Assembler::AVX_256bit; 4472 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4473 int elem_per_lane = 16/type2aelembytes(elem_bt); 4474 int log2epr = log2(elem_per_lane); 4475 4476 assert(is_integral_type(elem_bt), "sanity"); 4477 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4478 4479 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4480 uint y_idx = ($idx$$constant >> log2epr) & 1; 4481 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4482 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4483 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4484 %} 4485 ins_pipe( pipe_slow ); 4486 %} 4487 4488 instruct insert64(vec dst, vec src, rRegI val, immU8 idx, legVec vtmp) %{ 4489 predicate(Matcher::vector_length_in_bytes(n) == 64); 4490 match(Set dst (VectorInsert (Binary src val) idx)); 4491 effect(TEMP vtmp); 4492 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4493 ins_encode %{ 4494 assert(UseAVX > 2, "sanity"); 4495 4496 BasicType elem_bt = Matcher::vector_element_basic_type(this); 4497 int elem_per_lane = 16/type2aelembytes(elem_bt); 4498 int log2epr = log2(elem_per_lane); 4499 4500 assert(is_integral_type(elem_bt), ""); 4501 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4502 4503 uint x_idx = $idx$$constant & right_n_bits(log2epr); 4504 uint y_idx = ($idx$$constant >> log2epr) & 3; 4505 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4506 __ vinsert(elem_bt, $vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4507 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4508 %} 4509 ins_pipe( pipe_slow ); 4510 %} 4511 4512 #ifdef _LP64 4513 instruct insert2L(vec dst, rRegL val, immU8 idx) %{ 4514 predicate(Matcher::vector_length(n) == 2); 4515 match(Set dst (VectorInsert (Binary dst val) idx)); 4516 format %{ "vector_insert $dst,$val,$idx" %} 4517 ins_encode %{ 4518 assert(UseSSE >= 4, "required"); 4519 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4520 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4521 4522 __ pinsrq($dst$$XMMRegister, $val$$Register, $idx$$constant); 4523 %} 4524 ins_pipe( pipe_slow ); 4525 %} 4526 4527 instruct insert4L(vec dst, vec src, rRegL val, immU8 idx, vec vtmp) %{ 4528 predicate(Matcher::vector_length(n) == 4); 4529 match(Set dst (VectorInsert (Binary src val) idx)); 4530 effect(TEMP vtmp); 4531 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4532 ins_encode %{ 4533 assert(Matcher::vector_element_basic_type(this) == T_LONG, ""); 4534 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4535 4536 uint x_idx = $idx$$constant & right_n_bits(1); 4537 uint y_idx = ($idx$$constant >> 1) & 1; 4538 int vlen_enc = Assembler::AVX_256bit; 4539 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4540 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4541 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4542 %} 4543 ins_pipe( pipe_slow ); 4544 %} 4545 4546 instruct insert8L(vec dst, vec src, rRegL val, immU8 idx, legVec vtmp) %{ 4547 predicate(Matcher::vector_length(n) == 8); 4548 match(Set dst (VectorInsert (Binary src val) idx)); 4549 effect(TEMP vtmp); 4550 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4551 ins_encode %{ 4552 assert(Matcher::vector_element_basic_type(this) == T_LONG, "sanity"); 4553 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4554 4555 uint x_idx = $idx$$constant & right_n_bits(1); 4556 uint y_idx = ($idx$$constant >> 1) & 3; 4557 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4558 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$Register, x_idx); 4559 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4560 %} 4561 ins_pipe( pipe_slow ); 4562 %} 4563 #endif 4564 4565 instruct insertF(vec dst, regF val, immU8 idx) %{ 4566 predicate(Matcher::vector_length(n) < 8); 4567 match(Set dst (VectorInsert (Binary dst val) idx)); 4568 format %{ "vector_insert $dst,$val,$idx" %} 4569 ins_encode %{ 4570 assert(UseSSE >= 4, "sanity"); 4571 4572 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4573 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4574 4575 uint x_idx = $idx$$constant & right_n_bits(2); 4576 __ insertps($dst$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4577 %} 4578 ins_pipe( pipe_slow ); 4579 %} 4580 4581 instruct vinsertF(vec dst, vec src, regF val, immU8 idx, vec vtmp) %{ 4582 predicate(Matcher::vector_length(n) >= 8); 4583 match(Set dst (VectorInsert (Binary src val) idx)); 4584 effect(TEMP vtmp); 4585 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4586 ins_encode %{ 4587 assert(Matcher::vector_element_basic_type(this) == T_FLOAT, "sanity"); 4588 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4589 4590 int vlen = Matcher::vector_length(this); 4591 uint x_idx = $idx$$constant & right_n_bits(2); 4592 if (vlen == 8) { 4593 uint y_idx = ($idx$$constant >> 2) & 1; 4594 int vlen_enc = Assembler::AVX_256bit; 4595 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4596 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4597 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4598 } else { 4599 assert(vlen == 16, "sanity"); 4600 uint y_idx = ($idx$$constant >> 2) & 3; 4601 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4602 __ vinsertps($vtmp$$XMMRegister, $vtmp$$XMMRegister, $val$$XMMRegister, x_idx << 4); 4603 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4604 } 4605 %} 4606 ins_pipe( pipe_slow ); 4607 %} 4608 4609 #ifdef _LP64 4610 instruct insert2D(vec dst, regD val, immU8 idx, rRegL tmp) %{ 4611 predicate(Matcher::vector_length(n) == 2); 4612 match(Set dst (VectorInsert (Binary dst val) idx)); 4613 effect(TEMP tmp); 4614 format %{ "vector_insert $dst,$val,$idx\t!using $tmp as TEMP" %} 4615 ins_encode %{ 4616 assert(UseSSE >= 4, "sanity"); 4617 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4618 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4619 4620 __ movq($tmp$$Register, $val$$XMMRegister); 4621 __ pinsrq($dst$$XMMRegister, $tmp$$Register, $idx$$constant); 4622 %} 4623 ins_pipe( pipe_slow ); 4624 %} 4625 4626 instruct insert4D(vec dst, vec src, regD val, immU8 idx, rRegL tmp, vec vtmp) %{ 4627 predicate(Matcher::vector_length(n) == 4); 4628 match(Set dst (VectorInsert (Binary src val) idx)); 4629 effect(TEMP vtmp, TEMP tmp); 4630 format %{ "vector_insert $dst,$src,$val,$idx\t!using $tmp, $vtmp as TEMP" %} 4631 ins_encode %{ 4632 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4633 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4634 4635 uint x_idx = $idx$$constant & right_n_bits(1); 4636 uint y_idx = ($idx$$constant >> 1) & 1; 4637 int vlen_enc = Assembler::AVX_256bit; 4638 __ movq($tmp$$Register, $val$$XMMRegister); 4639 __ vextracti128($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4640 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4641 __ vinserti128($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4642 %} 4643 ins_pipe( pipe_slow ); 4644 %} 4645 4646 instruct insert8D(vec dst, vec src, regD val, immI idx, rRegL tmp, legVec vtmp) %{ 4647 predicate(Matcher::vector_length(n) == 8); 4648 match(Set dst (VectorInsert (Binary src val) idx)); 4649 effect(TEMP tmp, TEMP vtmp); 4650 format %{ "vector_insert $dst,$src,$val,$idx\t!using $vtmp as TEMP" %} 4651 ins_encode %{ 4652 assert(Matcher::vector_element_basic_type(this) == T_DOUBLE, "sanity"); 4653 assert($idx$$constant < (int)Matcher::vector_length(this), "out of bounds"); 4654 4655 uint x_idx = $idx$$constant & right_n_bits(1); 4656 uint y_idx = ($idx$$constant >> 1) & 3; 4657 __ movq($tmp$$Register, $val$$XMMRegister); 4658 __ vextracti32x4($vtmp$$XMMRegister, $src$$XMMRegister, y_idx); 4659 __ vpinsrq($vtmp$$XMMRegister, $vtmp$$XMMRegister, $tmp$$Register, x_idx); 4660 __ vinserti32x4($dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister, y_idx); 4661 %} 4662 ins_pipe( pipe_slow ); 4663 %} 4664 #endif 4665 4666 // ====================REDUCTION ARITHMETIC======================================= 4667 4668 // =======================Int Reduction========================================== 4669 4670 instruct reductionI(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4671 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_INT); // src2 4672 match(Set dst (AddReductionVI src1 src2)); 4673 match(Set dst (MulReductionVI src1 src2)); 4674 match(Set dst (AndReductionV src1 src2)); 4675 match(Set dst ( OrReductionV src1 src2)); 4676 match(Set dst (XorReductionV src1 src2)); 4677 match(Set dst (MinReductionV src1 src2)); 4678 match(Set dst (MaxReductionV src1 src2)); 4679 effect(TEMP vtmp1, TEMP vtmp2); 4680 format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4681 ins_encode %{ 4682 int opcode = this->ideal_Opcode(); 4683 int vlen = Matcher::vector_length(this, $src2); 4684 __ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4685 %} 4686 ins_pipe( pipe_slow ); 4687 %} 4688 4689 // =======================Long Reduction========================================== 4690 4691 #ifdef _LP64 4692 instruct reductionL(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4693 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && !VM_Version::supports_avx512dq()); 4694 match(Set dst (AddReductionVL src1 src2)); 4695 match(Set dst (MulReductionVL src1 src2)); 4696 match(Set dst (AndReductionV src1 src2)); 4697 match(Set dst ( OrReductionV src1 src2)); 4698 match(Set dst (XorReductionV src1 src2)); 4699 match(Set dst (MinReductionV src1 src2)); 4700 match(Set dst (MaxReductionV src1 src2)); 4701 effect(TEMP vtmp1, TEMP vtmp2); 4702 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4703 ins_encode %{ 4704 int opcode = this->ideal_Opcode(); 4705 int vlen = Matcher::vector_length(this, $src2); 4706 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4707 %} 4708 ins_pipe( pipe_slow ); 4709 %} 4710 4711 instruct reductionL_avx512dq(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{ 4712 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_LONG && VM_Version::supports_avx512dq()); 4713 match(Set dst (AddReductionVL src1 src2)); 4714 match(Set dst (MulReductionVL src1 src2)); 4715 match(Set dst (AndReductionV src1 src2)); 4716 match(Set dst ( OrReductionV src1 src2)); 4717 match(Set dst (XorReductionV src1 src2)); 4718 match(Set dst (MinReductionV src1 src2)); 4719 match(Set dst (MaxReductionV src1 src2)); 4720 effect(TEMP vtmp1, TEMP vtmp2); 4721 format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4722 ins_encode %{ 4723 int opcode = this->ideal_Opcode(); 4724 int vlen = Matcher::vector_length(this, $src2); 4725 __ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4726 %} 4727 ins_pipe( pipe_slow ); 4728 %} 4729 #endif // _LP64 4730 4731 // =======================Float Reduction========================================== 4732 4733 instruct reductionF128(regF dst, vec src, vec vtmp) %{ 4734 predicate(Matcher::vector_length(n->in(2)) <= 4); // src 4735 match(Set dst (AddReductionVF dst src)); 4736 match(Set dst (MulReductionVF dst src)); 4737 effect(TEMP dst, TEMP vtmp); 4738 format %{ "vector_reduction_float $dst,$src ; using $vtmp as TEMP" %} 4739 ins_encode %{ 4740 int opcode = this->ideal_Opcode(); 4741 int vlen = Matcher::vector_length(this, $src); 4742 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4743 %} 4744 ins_pipe( pipe_slow ); 4745 %} 4746 4747 instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{ 4748 predicate(Matcher::vector_length(n->in(2)) == 8); // src 4749 match(Set dst (AddReductionVF dst src)); 4750 match(Set dst (MulReductionVF dst src)); 4751 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4752 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4753 ins_encode %{ 4754 int opcode = this->ideal_Opcode(); 4755 int vlen = Matcher::vector_length(this, $src); 4756 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4757 %} 4758 ins_pipe( pipe_slow ); 4759 %} 4760 4761 instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4762 predicate(Matcher::vector_length(n->in(2)) == 16); // src 4763 match(Set dst (AddReductionVF dst src)); 4764 match(Set dst (MulReductionVF dst src)); 4765 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4766 format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4767 ins_encode %{ 4768 int opcode = this->ideal_Opcode(); 4769 int vlen = Matcher::vector_length(this, $src); 4770 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4771 %} 4772 ins_pipe( pipe_slow ); 4773 %} 4774 4775 // =======================Double Reduction========================================== 4776 4777 instruct reduction2D(regD dst, vec src, vec vtmp) %{ 4778 predicate(Matcher::vector_length(n->in(2)) == 2); // src 4779 match(Set dst (AddReductionVD dst src)); 4780 match(Set dst (MulReductionVD dst src)); 4781 effect(TEMP dst, TEMP vtmp); 4782 format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %} 4783 ins_encode %{ 4784 int opcode = this->ideal_Opcode(); 4785 int vlen = Matcher::vector_length(this, $src); 4786 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister); 4787 %} 4788 ins_pipe( pipe_slow ); 4789 %} 4790 4791 instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{ 4792 predicate(Matcher::vector_length(n->in(2)) == 4); // src 4793 match(Set dst (AddReductionVD dst src)); 4794 match(Set dst (MulReductionVD dst src)); 4795 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4796 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4797 ins_encode %{ 4798 int opcode = this->ideal_Opcode(); 4799 int vlen = Matcher::vector_length(this, $src); 4800 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4801 %} 4802 ins_pipe( pipe_slow ); 4803 %} 4804 4805 instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{ 4806 predicate(Matcher::vector_length(n->in(2)) == 8); // src 4807 match(Set dst (AddReductionVD dst src)); 4808 match(Set dst (MulReductionVD dst src)); 4809 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4810 format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %} 4811 ins_encode %{ 4812 int opcode = this->ideal_Opcode(); 4813 int vlen = Matcher::vector_length(this, $src); 4814 __ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4815 %} 4816 ins_pipe( pipe_slow ); 4817 %} 4818 4819 // =======================Byte Reduction========================================== 4820 4821 #ifdef _LP64 4822 instruct reductionB(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4823 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && !VM_Version::supports_avx512bw()); 4824 match(Set dst (AddReductionVI src1 src2)); 4825 match(Set dst (AndReductionV src1 src2)); 4826 match(Set dst ( OrReductionV src1 src2)); 4827 match(Set dst (XorReductionV src1 src2)); 4828 match(Set dst (MinReductionV src1 src2)); 4829 match(Set dst (MaxReductionV src1 src2)); 4830 effect(TEMP vtmp1, TEMP vtmp2); 4831 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4832 ins_encode %{ 4833 int opcode = this->ideal_Opcode(); 4834 int vlen = Matcher::vector_length(this, $src2); 4835 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4836 %} 4837 ins_pipe( pipe_slow ); 4838 %} 4839 4840 instruct reductionB_avx512bw(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 4841 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && VM_Version::supports_avx512bw()); 4842 match(Set dst (AddReductionVI src1 src2)); 4843 match(Set dst (AndReductionV src1 src2)); 4844 match(Set dst ( OrReductionV src1 src2)); 4845 match(Set dst (XorReductionV src1 src2)); 4846 match(Set dst (MinReductionV src1 src2)); 4847 match(Set dst (MaxReductionV src1 src2)); 4848 effect(TEMP vtmp1, TEMP vtmp2); 4849 format %{ "vector_reduction_byte $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4850 ins_encode %{ 4851 int opcode = this->ideal_Opcode(); 4852 int vlen = Matcher::vector_length(this, $src2); 4853 __ reduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4854 %} 4855 ins_pipe( pipe_slow ); 4856 %} 4857 #endif 4858 4859 // =======================Short Reduction========================================== 4860 4861 instruct reductionS(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4862 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_SHORT); // src2 4863 match(Set dst (AddReductionVI src1 src2)); 4864 match(Set dst (MulReductionVI src1 src2)); 4865 match(Set dst (AndReductionV src1 src2)); 4866 match(Set dst ( OrReductionV src1 src2)); 4867 match(Set dst (XorReductionV src1 src2)); 4868 match(Set dst (MinReductionV src1 src2)); 4869 match(Set dst (MaxReductionV src1 src2)); 4870 effect(TEMP vtmp1, TEMP vtmp2); 4871 format %{ "vector_reduction_short $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %} 4872 ins_encode %{ 4873 int opcode = this->ideal_Opcode(); 4874 int vlen = Matcher::vector_length(this, $src2); 4875 __ reduceS(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4876 %} 4877 ins_pipe( pipe_slow ); 4878 %} 4879 4880 // =======================Mul Reduction========================================== 4881 4882 instruct mul_reductionB(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{ 4883 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 4884 Matcher::vector_length(n->in(2)) <= 32); // src2 4885 match(Set dst (MulReductionVI src1 src2)); 4886 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4887 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 4888 ins_encode %{ 4889 int opcode = this->ideal_Opcode(); 4890 int vlen = Matcher::vector_length(this, $src2); 4891 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4892 %} 4893 ins_pipe( pipe_slow ); 4894 %} 4895 4896 instruct mul_reduction64B(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{ 4897 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_BYTE && 4898 Matcher::vector_length(n->in(2)) == 64); // src2 4899 match(Set dst (MulReductionVI src1 src2)); 4900 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2); 4901 format %{ "vector_mul_reduction_byte $dst,$src1,$src2; using $vtmp1, $vtmp2 as TEMP" %} 4902 ins_encode %{ 4903 int opcode = this->ideal_Opcode(); 4904 int vlen = Matcher::vector_length(this, $src2); 4905 __ mulreduceB(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 4906 %} 4907 ins_pipe( pipe_slow ); 4908 %} 4909 4910 //--------------------Min/Max Float Reduction -------------------- 4911 // Float Min Reduction 4912 instruct minmax_reduction2F(legRegF dst, immF src1, legVec src2, legVec tmp, 4913 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 4914 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 4915 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 4916 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 4917 Matcher::vector_length(n->in(2)) == 2); 4918 match(Set dst (MinReductionV src1 src2)); 4919 match(Set dst (MaxReductionV src1 src2)); 4920 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 4921 format %{ "vector_minmax2F_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 4922 ins_encode %{ 4923 assert(UseAVX > 0, "sanity"); 4924 4925 int opcode = this->ideal_Opcode(); 4926 int vlen = Matcher::vector_length(this, $src2); 4927 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 4928 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 4929 %} 4930 ins_pipe( pipe_slow ); 4931 %} 4932 4933 instruct minmax_reductionF(legRegF dst, immF src1, legVec src2, legVec tmp, legVec atmp, 4934 legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 4935 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 4936 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeF::POS_INF) || 4937 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeF::NEG_INF)) && 4938 Matcher::vector_length(n->in(2)) >= 4); 4939 match(Set dst (MinReductionV src1 src2)); 4940 match(Set dst (MaxReductionV src1 src2)); 4941 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 4942 format %{ "vector_minmaxF_reduction $dst,$src1,$src2 ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 4943 ins_encode %{ 4944 assert(UseAVX > 0, "sanity"); 4945 4946 int opcode = this->ideal_Opcode(); 4947 int vlen = Matcher::vector_length(this, $src2); 4948 __ reduceFloatMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, $tmp$$XMMRegister, 4949 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 4950 %} 4951 ins_pipe( pipe_slow ); 4952 %} 4953 4954 instruct minmax_reduction2F_av(legRegF dst, legVec src, legVec tmp, 4955 legVec atmp, legVec btmp, legVec xmm_1, rFlagsReg cr) %{ 4956 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 4957 Matcher::vector_length(n->in(2)) == 2); 4958 match(Set dst (MinReductionV dst src)); 4959 match(Set dst (MaxReductionV dst src)); 4960 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_1, KILL cr); 4961 format %{ "vector_minmax2F_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_1 as TEMP" %} 4962 ins_encode %{ 4963 assert(UseAVX > 0, "sanity"); 4964 4965 int opcode = this->ideal_Opcode(); 4966 int vlen = Matcher::vector_length(this, $src); 4967 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 4968 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_1$$XMMRegister); 4969 %} 4970 ins_pipe( pipe_slow ); 4971 %} 4972 4973 4974 instruct minmax_reductionF_av(legRegF dst, legVec src, legVec tmp, 4975 legVec atmp, legVec btmp, legVec xmm_0, legVec xmm_1, rFlagsReg cr) %{ 4976 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_FLOAT && 4977 Matcher::vector_length(n->in(2)) >= 4); 4978 match(Set dst (MinReductionV dst src)); 4979 match(Set dst (MaxReductionV dst src)); 4980 effect(TEMP dst, TEMP tmp, TEMP atmp, TEMP btmp, TEMP xmm_0, TEMP xmm_1, KILL cr); 4981 format %{ "vector_minmaxF_reduction $dst,$src ; using $tmp, $atmp, $btmp, $xmm_0, $xmm_1 as TEMP" %} 4982 ins_encode %{ 4983 assert(UseAVX > 0, "sanity"); 4984 4985 int opcode = this->ideal_Opcode(); 4986 int vlen = Matcher::vector_length(this, $src); 4987 __ reduceFloatMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister, 4988 $atmp$$XMMRegister, $btmp$$XMMRegister, $xmm_0$$XMMRegister, $xmm_1$$XMMRegister); 4989 %} 4990 ins_pipe( pipe_slow ); 4991 %} 4992 4993 4994 //--------------------Min Double Reduction -------------------- 4995 instruct minmax_reduction2D(legRegD dst, immD src1, legVec src2, 4996 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 4997 rFlagsReg cr) %{ 4998 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 4999 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5000 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5001 Matcher::vector_length(n->in(2)) == 2); 5002 match(Set dst (MinReductionV src1 src2)); 5003 match(Set dst (MaxReductionV src1 src2)); 5004 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5005 format %{ "vector_minmax2D_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5006 ins_encode %{ 5007 assert(UseAVX > 0, "sanity"); 5008 5009 int opcode = this->ideal_Opcode(); 5010 int vlen = Matcher::vector_length(this, $src2); 5011 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5012 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5013 %} 5014 ins_pipe( pipe_slow ); 5015 %} 5016 5017 instruct minmax_reductionD(legRegD dst, immD src1, legVec src2, 5018 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5019 rFlagsReg cr) %{ 5020 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5021 ((n->Opcode() == Op_MinReductionV && n->in(1)->bottom_type() == TypeD::POS_INF) || 5022 (n->Opcode() == Op_MaxReductionV && n->in(1)->bottom_type() == TypeD::NEG_INF)) && 5023 Matcher::vector_length(n->in(2)) >= 4); 5024 match(Set dst (MinReductionV src1 src2)); 5025 match(Set dst (MaxReductionV src1 src2)); 5026 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5027 format %{ "vector_minmaxD_reduction $dst,$src1,$src2 ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5028 ins_encode %{ 5029 assert(UseAVX > 0, "sanity"); 5030 5031 int opcode = this->ideal_Opcode(); 5032 int vlen = Matcher::vector_length(this, $src2); 5033 __ reduceDoubleMinMax(opcode, vlen, false, $dst$$XMMRegister, $src2$$XMMRegister, 5034 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5035 %} 5036 ins_pipe( pipe_slow ); 5037 %} 5038 5039 5040 instruct minmax_reduction2D_av(legRegD dst, legVec src, 5041 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, // TEMPs 5042 rFlagsReg cr) %{ 5043 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5044 Matcher::vector_length(n->in(2)) == 2); 5045 match(Set dst (MinReductionV dst src)); 5046 match(Set dst (MaxReductionV dst src)); 5047 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr); 5048 format %{ "vector_minmax2D_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4 as TEMP" %} 5049 ins_encode %{ 5050 assert(UseAVX > 0, "sanity"); 5051 5052 int opcode = this->ideal_Opcode(); 5053 int vlen = Matcher::vector_length(this, $src); 5054 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5055 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister); 5056 %} 5057 ins_pipe( pipe_slow ); 5058 %} 5059 5060 instruct minmax_reductionD_av(legRegD dst, legVec src, 5061 legVec tmp1, legVec tmp2, legVec tmp3, legVec tmp4, legVec tmp5, // TEMPs 5062 rFlagsReg cr) %{ 5063 predicate(Matcher::vector_element_basic_type(n->in(2)) == T_DOUBLE && 5064 Matcher::vector_length(n->in(2)) >= 4); 5065 match(Set dst (MinReductionV dst src)); 5066 match(Set dst (MaxReductionV dst src)); 5067 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr); 5068 format %{ "vector_minmaxD_reduction $dst,$src ; using $tmp1, $tmp2, $tmp3, $tmp4, $tmp5 as TEMP" %} 5069 ins_encode %{ 5070 assert(UseAVX > 0, "sanity"); 5071 5072 int opcode = this->ideal_Opcode(); 5073 int vlen = Matcher::vector_length(this, $src); 5074 __ reduceDoubleMinMax(opcode, vlen, true, $dst$$XMMRegister, $src$$XMMRegister, 5075 $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister, $tmp4$$XMMRegister, $tmp5$$XMMRegister); 5076 %} 5077 ins_pipe( pipe_slow ); 5078 %} 5079 5080 // ====================VECTOR ARITHMETIC======================================= 5081 5082 // --------------------------------- ADD -------------------------------------- 5083 5084 // Bytes vector add 5085 instruct vaddB(vec dst, vec src) %{ 5086 predicate(UseAVX == 0); 5087 match(Set dst (AddVB dst src)); 5088 format %{ "paddb $dst,$src\t! add packedB" %} 5089 ins_encode %{ 5090 __ paddb($dst$$XMMRegister, $src$$XMMRegister); 5091 %} 5092 ins_pipe( pipe_slow ); 5093 %} 5094 5095 instruct vaddB_reg(vec dst, vec src1, vec src2) %{ 5096 predicate(UseAVX > 0); 5097 match(Set dst (AddVB src1 src2)); 5098 format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %} 5099 ins_encode %{ 5100 int vlen_enc = vector_length_encoding(this); 5101 __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5102 %} 5103 ins_pipe( pipe_slow ); 5104 %} 5105 5106 instruct vaddB_mem(vec dst, vec src, memory mem) %{ 5107 predicate((UseAVX > 0) && 5108 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5109 match(Set dst (AddVB src (LoadVector mem))); 5110 format %{ "vpaddb $dst,$src,$mem\t! add packedB" %} 5111 ins_encode %{ 5112 int vlen_enc = vector_length_encoding(this); 5113 __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5114 %} 5115 ins_pipe( pipe_slow ); 5116 %} 5117 5118 // Shorts/Chars vector add 5119 instruct vaddS(vec dst, vec src) %{ 5120 predicate(UseAVX == 0); 5121 match(Set dst (AddVS dst src)); 5122 format %{ "paddw $dst,$src\t! add packedS" %} 5123 ins_encode %{ 5124 __ paddw($dst$$XMMRegister, $src$$XMMRegister); 5125 %} 5126 ins_pipe( pipe_slow ); 5127 %} 5128 5129 instruct vaddS_reg(vec dst, vec src1, vec src2) %{ 5130 predicate(UseAVX > 0); 5131 match(Set dst (AddVS src1 src2)); 5132 format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %} 5133 ins_encode %{ 5134 int vlen_enc = vector_length_encoding(this); 5135 __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5136 %} 5137 ins_pipe( pipe_slow ); 5138 %} 5139 5140 instruct vaddS_mem(vec dst, vec src, memory mem) %{ 5141 predicate((UseAVX > 0) && 5142 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5143 match(Set dst (AddVS src (LoadVector mem))); 5144 format %{ "vpaddw $dst,$src,$mem\t! add packedS" %} 5145 ins_encode %{ 5146 int vlen_enc = vector_length_encoding(this); 5147 __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5148 %} 5149 ins_pipe( pipe_slow ); 5150 %} 5151 5152 // Integers vector add 5153 instruct vaddI(vec dst, vec src) %{ 5154 predicate(UseAVX == 0); 5155 match(Set dst (AddVI dst src)); 5156 format %{ "paddd $dst,$src\t! add packedI" %} 5157 ins_encode %{ 5158 __ paddd($dst$$XMMRegister, $src$$XMMRegister); 5159 %} 5160 ins_pipe( pipe_slow ); 5161 %} 5162 5163 instruct vaddI_reg(vec dst, vec src1, vec src2) %{ 5164 predicate(UseAVX > 0); 5165 match(Set dst (AddVI src1 src2)); 5166 format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %} 5167 ins_encode %{ 5168 int vlen_enc = vector_length_encoding(this); 5169 __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5170 %} 5171 ins_pipe( pipe_slow ); 5172 %} 5173 5174 5175 instruct vaddI_mem(vec dst, vec src, memory mem) %{ 5176 predicate((UseAVX > 0) && 5177 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5178 match(Set dst (AddVI src (LoadVector mem))); 5179 format %{ "vpaddd $dst,$src,$mem\t! add packedI" %} 5180 ins_encode %{ 5181 int vlen_enc = vector_length_encoding(this); 5182 __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5183 %} 5184 ins_pipe( pipe_slow ); 5185 %} 5186 5187 // Longs vector add 5188 instruct vaddL(vec dst, vec src) %{ 5189 predicate(UseAVX == 0); 5190 match(Set dst (AddVL dst src)); 5191 format %{ "paddq $dst,$src\t! add packedL" %} 5192 ins_encode %{ 5193 __ paddq($dst$$XMMRegister, $src$$XMMRegister); 5194 %} 5195 ins_pipe( pipe_slow ); 5196 %} 5197 5198 instruct vaddL_reg(vec dst, vec src1, vec src2) %{ 5199 predicate(UseAVX > 0); 5200 match(Set dst (AddVL src1 src2)); 5201 format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %} 5202 ins_encode %{ 5203 int vlen_enc = vector_length_encoding(this); 5204 __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5205 %} 5206 ins_pipe( pipe_slow ); 5207 %} 5208 5209 instruct vaddL_mem(vec dst, vec src, memory mem) %{ 5210 predicate((UseAVX > 0) && 5211 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5212 match(Set dst (AddVL src (LoadVector mem))); 5213 format %{ "vpaddq $dst,$src,$mem\t! add packedL" %} 5214 ins_encode %{ 5215 int vlen_enc = vector_length_encoding(this); 5216 __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5217 %} 5218 ins_pipe( pipe_slow ); 5219 %} 5220 5221 // Floats vector add 5222 instruct vaddF(vec dst, vec src) %{ 5223 predicate(UseAVX == 0); 5224 match(Set dst (AddVF dst src)); 5225 format %{ "addps $dst,$src\t! add packedF" %} 5226 ins_encode %{ 5227 __ addps($dst$$XMMRegister, $src$$XMMRegister); 5228 %} 5229 ins_pipe( pipe_slow ); 5230 %} 5231 5232 instruct vaddF_reg(vec dst, vec src1, vec src2) %{ 5233 predicate(UseAVX > 0); 5234 match(Set dst (AddVF src1 src2)); 5235 format %{ "vaddps $dst,$src1,$src2\t! add packedF" %} 5236 ins_encode %{ 5237 int vlen_enc = vector_length_encoding(this); 5238 __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5239 %} 5240 ins_pipe( pipe_slow ); 5241 %} 5242 5243 instruct vaddF_mem(vec dst, vec src, memory mem) %{ 5244 predicate((UseAVX > 0) && 5245 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5246 match(Set dst (AddVF src (LoadVector mem))); 5247 format %{ "vaddps $dst,$src,$mem\t! add packedF" %} 5248 ins_encode %{ 5249 int vlen_enc = vector_length_encoding(this); 5250 __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5251 %} 5252 ins_pipe( pipe_slow ); 5253 %} 5254 5255 // Doubles vector add 5256 instruct vaddD(vec dst, vec src) %{ 5257 predicate(UseAVX == 0); 5258 match(Set dst (AddVD dst src)); 5259 format %{ "addpd $dst,$src\t! add packedD" %} 5260 ins_encode %{ 5261 __ addpd($dst$$XMMRegister, $src$$XMMRegister); 5262 %} 5263 ins_pipe( pipe_slow ); 5264 %} 5265 5266 instruct vaddD_reg(vec dst, vec src1, vec src2) %{ 5267 predicate(UseAVX > 0); 5268 match(Set dst (AddVD src1 src2)); 5269 format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %} 5270 ins_encode %{ 5271 int vlen_enc = vector_length_encoding(this); 5272 __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5273 %} 5274 ins_pipe( pipe_slow ); 5275 %} 5276 5277 instruct vaddD_mem(vec dst, vec src, memory mem) %{ 5278 predicate((UseAVX > 0) && 5279 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5280 match(Set dst (AddVD src (LoadVector mem))); 5281 format %{ "vaddpd $dst,$src,$mem\t! add packedD" %} 5282 ins_encode %{ 5283 int vlen_enc = vector_length_encoding(this); 5284 __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5285 %} 5286 ins_pipe( pipe_slow ); 5287 %} 5288 5289 // --------------------------------- SUB -------------------------------------- 5290 5291 // Bytes vector sub 5292 instruct vsubB(vec dst, vec src) %{ 5293 predicate(UseAVX == 0); 5294 match(Set dst (SubVB dst src)); 5295 format %{ "psubb $dst,$src\t! sub packedB" %} 5296 ins_encode %{ 5297 __ psubb($dst$$XMMRegister, $src$$XMMRegister); 5298 %} 5299 ins_pipe( pipe_slow ); 5300 %} 5301 5302 instruct vsubB_reg(vec dst, vec src1, vec src2) %{ 5303 predicate(UseAVX > 0); 5304 match(Set dst (SubVB src1 src2)); 5305 format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %} 5306 ins_encode %{ 5307 int vlen_enc = vector_length_encoding(this); 5308 __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5309 %} 5310 ins_pipe( pipe_slow ); 5311 %} 5312 5313 instruct vsubB_mem(vec dst, vec src, memory mem) %{ 5314 predicate((UseAVX > 0) && 5315 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5316 match(Set dst (SubVB src (LoadVector mem))); 5317 format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %} 5318 ins_encode %{ 5319 int vlen_enc = vector_length_encoding(this); 5320 __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5321 %} 5322 ins_pipe( pipe_slow ); 5323 %} 5324 5325 // Shorts/Chars vector sub 5326 instruct vsubS(vec dst, vec src) %{ 5327 predicate(UseAVX == 0); 5328 match(Set dst (SubVS dst src)); 5329 format %{ "psubw $dst,$src\t! sub packedS" %} 5330 ins_encode %{ 5331 __ psubw($dst$$XMMRegister, $src$$XMMRegister); 5332 %} 5333 ins_pipe( pipe_slow ); 5334 %} 5335 5336 5337 instruct vsubS_reg(vec dst, vec src1, vec src2) %{ 5338 predicate(UseAVX > 0); 5339 match(Set dst (SubVS src1 src2)); 5340 format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %} 5341 ins_encode %{ 5342 int vlen_enc = vector_length_encoding(this); 5343 __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5344 %} 5345 ins_pipe( pipe_slow ); 5346 %} 5347 5348 instruct vsubS_mem(vec dst, vec src, memory mem) %{ 5349 predicate((UseAVX > 0) && 5350 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5351 match(Set dst (SubVS src (LoadVector mem))); 5352 format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %} 5353 ins_encode %{ 5354 int vlen_enc = vector_length_encoding(this); 5355 __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5356 %} 5357 ins_pipe( pipe_slow ); 5358 %} 5359 5360 // Integers vector sub 5361 instruct vsubI(vec dst, vec src) %{ 5362 predicate(UseAVX == 0); 5363 match(Set dst (SubVI dst src)); 5364 format %{ "psubd $dst,$src\t! sub packedI" %} 5365 ins_encode %{ 5366 __ psubd($dst$$XMMRegister, $src$$XMMRegister); 5367 %} 5368 ins_pipe( pipe_slow ); 5369 %} 5370 5371 instruct vsubI_reg(vec dst, vec src1, vec src2) %{ 5372 predicate(UseAVX > 0); 5373 match(Set dst (SubVI src1 src2)); 5374 format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %} 5375 ins_encode %{ 5376 int vlen_enc = vector_length_encoding(this); 5377 __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5378 %} 5379 ins_pipe( pipe_slow ); 5380 %} 5381 5382 instruct vsubI_mem(vec dst, vec src, memory mem) %{ 5383 predicate((UseAVX > 0) && 5384 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5385 match(Set dst (SubVI src (LoadVector mem))); 5386 format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %} 5387 ins_encode %{ 5388 int vlen_enc = vector_length_encoding(this); 5389 __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5390 %} 5391 ins_pipe( pipe_slow ); 5392 %} 5393 5394 // Longs vector sub 5395 instruct vsubL(vec dst, vec src) %{ 5396 predicate(UseAVX == 0); 5397 match(Set dst (SubVL dst src)); 5398 format %{ "psubq $dst,$src\t! sub packedL" %} 5399 ins_encode %{ 5400 __ psubq($dst$$XMMRegister, $src$$XMMRegister); 5401 %} 5402 ins_pipe( pipe_slow ); 5403 %} 5404 5405 instruct vsubL_reg(vec dst, vec src1, vec src2) %{ 5406 predicate(UseAVX > 0); 5407 match(Set dst (SubVL src1 src2)); 5408 format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %} 5409 ins_encode %{ 5410 int vlen_enc = vector_length_encoding(this); 5411 __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5412 %} 5413 ins_pipe( pipe_slow ); 5414 %} 5415 5416 5417 instruct vsubL_mem(vec dst, vec src, memory mem) %{ 5418 predicate((UseAVX > 0) && 5419 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5420 match(Set dst (SubVL src (LoadVector mem))); 5421 format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %} 5422 ins_encode %{ 5423 int vlen_enc = vector_length_encoding(this); 5424 __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5425 %} 5426 ins_pipe( pipe_slow ); 5427 %} 5428 5429 // Floats vector sub 5430 instruct vsubF(vec dst, vec src) %{ 5431 predicate(UseAVX == 0); 5432 match(Set dst (SubVF dst src)); 5433 format %{ "subps $dst,$src\t! sub packedF" %} 5434 ins_encode %{ 5435 __ subps($dst$$XMMRegister, $src$$XMMRegister); 5436 %} 5437 ins_pipe( pipe_slow ); 5438 %} 5439 5440 instruct vsubF_reg(vec dst, vec src1, vec src2) %{ 5441 predicate(UseAVX > 0); 5442 match(Set dst (SubVF src1 src2)); 5443 format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %} 5444 ins_encode %{ 5445 int vlen_enc = vector_length_encoding(this); 5446 __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5447 %} 5448 ins_pipe( pipe_slow ); 5449 %} 5450 5451 instruct vsubF_mem(vec dst, vec src, memory mem) %{ 5452 predicate((UseAVX > 0) && 5453 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5454 match(Set dst (SubVF src (LoadVector mem))); 5455 format %{ "vsubps $dst,$src,$mem\t! sub packedF" %} 5456 ins_encode %{ 5457 int vlen_enc = vector_length_encoding(this); 5458 __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5459 %} 5460 ins_pipe( pipe_slow ); 5461 %} 5462 5463 // Doubles vector sub 5464 instruct vsubD(vec dst, vec src) %{ 5465 predicate(UseAVX == 0); 5466 match(Set dst (SubVD dst src)); 5467 format %{ "subpd $dst,$src\t! sub packedD" %} 5468 ins_encode %{ 5469 __ subpd($dst$$XMMRegister, $src$$XMMRegister); 5470 %} 5471 ins_pipe( pipe_slow ); 5472 %} 5473 5474 instruct vsubD_reg(vec dst, vec src1, vec src2) %{ 5475 predicate(UseAVX > 0); 5476 match(Set dst (SubVD src1 src2)); 5477 format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %} 5478 ins_encode %{ 5479 int vlen_enc = vector_length_encoding(this); 5480 __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5481 %} 5482 ins_pipe( pipe_slow ); 5483 %} 5484 5485 instruct vsubD_mem(vec dst, vec src, memory mem) %{ 5486 predicate((UseAVX > 0) && 5487 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5488 match(Set dst (SubVD src (LoadVector mem))); 5489 format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %} 5490 ins_encode %{ 5491 int vlen_enc = vector_length_encoding(this); 5492 __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5493 %} 5494 ins_pipe( pipe_slow ); 5495 %} 5496 5497 // --------------------------------- MUL -------------------------------------- 5498 5499 // Byte vector mul 5500 instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 5501 predicate(Matcher::vector_length(n) == 4 || 5502 Matcher::vector_length(n) == 8); 5503 match(Set dst (MulVB src1 src2)); 5504 effect(TEMP dst, TEMP tmp, TEMP scratch); 5505 format %{"vector_mulB $dst,$src1,$src2" %} 5506 ins_encode %{ 5507 assert(UseSSE > 3, "required"); 5508 __ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister); 5509 __ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister); 5510 __ pmullw($tmp$$XMMRegister, $dst$$XMMRegister); 5511 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5512 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 5513 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 5514 %} 5515 ins_pipe( pipe_slow ); 5516 %} 5517 5518 instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5519 predicate(Matcher::vector_length(n) == 16 && UseAVX <= 1); 5520 match(Set dst (MulVB src1 src2)); 5521 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5522 format %{"vector_mulB $dst,$src1,$src2" %} 5523 ins_encode %{ 5524 assert(UseSSE > 3, "required"); 5525 __ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister); 5526 __ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister); 5527 __ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister); 5528 __ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE); 5529 __ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE); 5530 __ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister); 5531 __ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister); 5532 __ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister); 5533 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5534 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 5535 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 5536 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 5537 %} 5538 ins_pipe( pipe_slow ); 5539 %} 5540 5541 instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{ 5542 predicate(Matcher::vector_length(n) == 16 && UseAVX > 1); 5543 match(Set dst (MulVB src1 src2)); 5544 effect(TEMP dst, TEMP tmp, TEMP scratch); 5545 format %{"vector_mulB $dst,$src1,$src2" %} 5546 ins_encode %{ 5547 int vlen_enc = Assembler::AVX_256bit; 5548 __ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5549 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5550 __ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5551 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5552 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5553 __ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister); 5554 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0); 5555 %} 5556 ins_pipe( pipe_slow ); 5557 %} 5558 5559 instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5560 predicate(Matcher::vector_length(n) == 32); 5561 match(Set dst (MulVB src1 src2)); 5562 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5563 format %{"vector_mulB $dst,$src1,$src2" %} 5564 ins_encode %{ 5565 assert(UseAVX > 1, "required"); 5566 int vlen_enc = Assembler::AVX_256bit; 5567 __ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister); 5568 __ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister); 5569 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5570 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5571 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5572 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5573 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5574 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5575 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5576 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5577 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5578 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 5579 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5580 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 5581 %} 5582 ins_pipe( pipe_slow ); 5583 %} 5584 5585 instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{ 5586 predicate(Matcher::vector_length(n) == 64); 5587 match(Set dst (MulVB src1 src2)); 5588 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 5589 format %{"vector_mulB $dst,$src1,$src2\n\t" %} 5590 ins_encode %{ 5591 assert(UseAVX > 2, "required"); 5592 int vlen_enc = Assembler::AVX_512bit; 5593 __ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister); 5594 __ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister); 5595 __ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5596 __ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5597 __ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5598 __ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vlen_enc); 5599 __ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5600 __ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5601 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 5602 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5603 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5604 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5605 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 5606 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register); 5607 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5608 %} 5609 ins_pipe( pipe_slow ); 5610 %} 5611 5612 // Shorts/Chars vector mul 5613 instruct vmulS(vec dst, vec src) %{ 5614 predicate(UseAVX == 0); 5615 match(Set dst (MulVS dst src)); 5616 format %{ "pmullw $dst,$src\t! mul packedS" %} 5617 ins_encode %{ 5618 __ pmullw($dst$$XMMRegister, $src$$XMMRegister); 5619 %} 5620 ins_pipe( pipe_slow ); 5621 %} 5622 5623 instruct vmulS_reg(vec dst, vec src1, vec src2) %{ 5624 predicate(UseAVX > 0); 5625 match(Set dst (MulVS src1 src2)); 5626 format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %} 5627 ins_encode %{ 5628 int vlen_enc = vector_length_encoding(this); 5629 __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5630 %} 5631 ins_pipe( pipe_slow ); 5632 %} 5633 5634 instruct vmulS_mem(vec dst, vec src, memory mem) %{ 5635 predicate((UseAVX > 0) && 5636 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5637 match(Set dst (MulVS src (LoadVector mem))); 5638 format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %} 5639 ins_encode %{ 5640 int vlen_enc = vector_length_encoding(this); 5641 __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5642 %} 5643 ins_pipe( pipe_slow ); 5644 %} 5645 5646 // Integers vector mul 5647 instruct vmulI(vec dst, vec src) %{ 5648 predicate(UseAVX == 0); 5649 match(Set dst (MulVI dst src)); 5650 format %{ "pmulld $dst,$src\t! mul packedI" %} 5651 ins_encode %{ 5652 assert(UseSSE > 3, "required"); 5653 __ pmulld($dst$$XMMRegister, $src$$XMMRegister); 5654 %} 5655 ins_pipe( pipe_slow ); 5656 %} 5657 5658 instruct vmulI_reg(vec dst, vec src1, vec src2) %{ 5659 predicate(UseAVX > 0); 5660 match(Set dst (MulVI src1 src2)); 5661 format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %} 5662 ins_encode %{ 5663 int vlen_enc = vector_length_encoding(this); 5664 __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5665 %} 5666 ins_pipe( pipe_slow ); 5667 %} 5668 5669 instruct vmulI_mem(vec dst, vec src, memory mem) %{ 5670 predicate((UseAVX > 0) && 5671 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5672 match(Set dst (MulVI src (LoadVector mem))); 5673 format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %} 5674 ins_encode %{ 5675 int vlen_enc = vector_length_encoding(this); 5676 __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5677 %} 5678 ins_pipe( pipe_slow ); 5679 %} 5680 5681 // Longs vector mul 5682 instruct vmulL_reg(vec dst, vec src1, vec src2) %{ 5683 predicate(VM_Version::supports_avx512dq()); 5684 match(Set dst (MulVL src1 src2)); 5685 format %{ "vpmullq $dst,$src1,$src2\t! mul packedL" %} 5686 ins_encode %{ 5687 assert(UseAVX > 2, "required"); 5688 int vlen_enc = vector_length_encoding(this); 5689 __ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5690 %} 5691 ins_pipe( pipe_slow ); 5692 %} 5693 5694 instruct vmulL_mem(vec dst, vec src, memory mem) %{ 5695 predicate(VM_Version::supports_avx512dq() && 5696 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5697 match(Set dst (MulVL src (LoadVector mem))); 5698 format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %} 5699 ins_encode %{ 5700 assert(UseAVX > 2, "required"); 5701 int vlen_enc = vector_length_encoding(this); 5702 __ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5703 %} 5704 ins_pipe( pipe_slow ); 5705 %} 5706 5707 instruct mul2L_reg(vec dst, vec src2, legVec tmp) %{ 5708 predicate(Matcher::vector_length(n) == 2 && !VM_Version::supports_avx512dq()); 5709 match(Set dst (MulVL dst src2)); 5710 effect(TEMP dst, TEMP tmp); 5711 format %{ "pshufd $tmp,$src2, 177\n\t" 5712 "pmulld $tmp,$dst\n\t" 5713 "phaddd $tmp,$tmp\n\t" 5714 "pmovzxdq $tmp,$tmp\n\t" 5715 "psllq $tmp, 32\n\t" 5716 "pmuludq $dst,$src2\n\t" 5717 "paddq $dst,$tmp\n\t! mul packed2L" %} 5718 5719 ins_encode %{ 5720 assert(VM_Version::supports_sse4_1(), "required"); 5721 int vlen_enc = Assembler::AVX_128bit; 5722 __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177); 5723 __ pmulld($tmp$$XMMRegister, $dst$$XMMRegister); 5724 __ phaddd($tmp$$XMMRegister, $tmp$$XMMRegister); 5725 __ pmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister); 5726 __ psllq($tmp$$XMMRegister, 32); 5727 __ pmuludq($dst$$XMMRegister, $src2$$XMMRegister); 5728 __ paddq($dst$$XMMRegister, $tmp$$XMMRegister); 5729 %} 5730 ins_pipe( pipe_slow ); 5731 %} 5732 5733 instruct vmul4L_reg_avx(vec dst, vec src1, vec src2, legVec tmp, legVec tmp1) %{ 5734 predicate(Matcher::vector_length(n) == 4 && !VM_Version::supports_avx512dq()); 5735 match(Set dst (MulVL src1 src2)); 5736 effect(TEMP tmp1, TEMP tmp); 5737 format %{ "vpshufd $tmp,$src2\n\t" 5738 "vpmulld $tmp,$src1,$tmp\n\t" 5739 "vphaddd $tmp,$tmp,$tmp\n\t" 5740 "vpmovzxdq $tmp,$tmp\n\t" 5741 "vpsllq $tmp,$tmp\n\t" 5742 "vpmuludq $tmp1,$src1,$src2\n\t" 5743 "vpaddq $dst,$tmp,$tmp1\t! mul packed4L" %} 5744 ins_encode %{ 5745 int vlen_enc = Assembler::AVX_256bit; 5746 __ vpshufd($tmp$$XMMRegister, $src2$$XMMRegister, 177, vlen_enc); 5747 __ vpmulld($tmp$$XMMRegister, $src1$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5748 __ vextracti128_high($tmp1$$XMMRegister, $tmp$$XMMRegister); 5749 __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5750 __ vpmovzxdq($tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 5751 __ vpsllq($tmp$$XMMRegister, $tmp$$XMMRegister, 32, vlen_enc); 5752 __ vpmuludq($tmp1$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5753 __ vpaddq($dst$$XMMRegister, $tmp$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 5754 %} 5755 ins_pipe( pipe_slow ); 5756 %} 5757 5758 // Floats vector mul 5759 instruct vmulF(vec dst, vec src) %{ 5760 predicate(UseAVX == 0); 5761 match(Set dst (MulVF dst src)); 5762 format %{ "mulps $dst,$src\t! mul packedF" %} 5763 ins_encode %{ 5764 __ mulps($dst$$XMMRegister, $src$$XMMRegister); 5765 %} 5766 ins_pipe( pipe_slow ); 5767 %} 5768 5769 instruct vmulF_reg(vec dst, vec src1, vec src2) %{ 5770 predicate(UseAVX > 0); 5771 match(Set dst (MulVF src1 src2)); 5772 format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %} 5773 ins_encode %{ 5774 int vlen_enc = vector_length_encoding(this); 5775 __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5776 %} 5777 ins_pipe( pipe_slow ); 5778 %} 5779 5780 instruct vmulF_mem(vec dst, vec src, memory mem) %{ 5781 predicate((UseAVX > 0) && 5782 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5783 match(Set dst (MulVF src (LoadVector mem))); 5784 format %{ "vmulps $dst,$src,$mem\t! mul packedF" %} 5785 ins_encode %{ 5786 int vlen_enc = vector_length_encoding(this); 5787 __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5788 %} 5789 ins_pipe( pipe_slow ); 5790 %} 5791 5792 // Doubles vector mul 5793 instruct vmulD(vec dst, vec src) %{ 5794 predicate(UseAVX == 0); 5795 match(Set dst (MulVD dst src)); 5796 format %{ "mulpd $dst,$src\t! mul packedD" %} 5797 ins_encode %{ 5798 __ mulpd($dst$$XMMRegister, $src$$XMMRegister); 5799 %} 5800 ins_pipe( pipe_slow ); 5801 %} 5802 5803 instruct vmulD_reg(vec dst, vec src1, vec src2) %{ 5804 predicate(UseAVX > 0); 5805 match(Set dst (MulVD src1 src2)); 5806 format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %} 5807 ins_encode %{ 5808 int vlen_enc = vector_length_encoding(this); 5809 __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5810 %} 5811 ins_pipe( pipe_slow ); 5812 %} 5813 5814 instruct vmulD_mem(vec dst, vec src, memory mem) %{ 5815 predicate((UseAVX > 0) && 5816 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5817 match(Set dst (MulVD src (LoadVector mem))); 5818 format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %} 5819 ins_encode %{ 5820 int vlen_enc = vector_length_encoding(this); 5821 __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5822 %} 5823 ins_pipe( pipe_slow ); 5824 %} 5825 5826 instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 5827 predicate(Matcher::vector_length(n) == 8); 5828 match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2))); 5829 effect(TEMP dst, USE src1, USE src2); 5830 format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t" 5831 "blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t" 5832 %} 5833 ins_encode %{ 5834 assert(UseAVX > 0, "required"); 5835 5836 int vlen_enc = Assembler::AVX_256bit; 5837 int cond = (Assembler::Condition)($copnd$$cmpcode); 5838 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); 5839 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5840 %} 5841 ins_pipe( pipe_slow ); 5842 %} 5843 5844 instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{ 5845 predicate(Matcher::vector_length(n) == 4); 5846 match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2))); 5847 effect(TEMP dst, USE src1, USE src2); 5848 format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t" 5849 "vblendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t" 5850 %} 5851 ins_encode %{ 5852 assert(UseAVX > 0, "required"); 5853 5854 int vlen_enc = Assembler::AVX_256bit; 5855 int cond = (Assembler::Condition)($copnd$$cmpcode); 5856 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vlen_enc); 5857 __ vblendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 5858 %} 5859 ins_pipe( pipe_slow ); 5860 %} 5861 5862 // --------------------------------- DIV -------------------------------------- 5863 5864 // Floats vector div 5865 instruct vdivF(vec dst, vec src) %{ 5866 predicate(UseAVX == 0); 5867 match(Set dst (DivVF dst src)); 5868 format %{ "divps $dst,$src\t! div packedF" %} 5869 ins_encode %{ 5870 __ divps($dst$$XMMRegister, $src$$XMMRegister); 5871 %} 5872 ins_pipe( pipe_slow ); 5873 %} 5874 5875 instruct vdivF_reg(vec dst, vec src1, vec src2) %{ 5876 predicate(UseAVX > 0); 5877 match(Set dst (DivVF src1 src2)); 5878 format %{ "vdivps $dst,$src1,$src2\t! div packedF" %} 5879 ins_encode %{ 5880 int vlen_enc = vector_length_encoding(this); 5881 __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5882 %} 5883 ins_pipe( pipe_slow ); 5884 %} 5885 5886 instruct vdivF_mem(vec dst, vec src, memory mem) %{ 5887 predicate((UseAVX > 0) && 5888 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5889 match(Set dst (DivVF src (LoadVector mem))); 5890 format %{ "vdivps $dst,$src,$mem\t! div packedF" %} 5891 ins_encode %{ 5892 int vlen_enc = vector_length_encoding(this); 5893 __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5894 %} 5895 ins_pipe( pipe_slow ); 5896 %} 5897 5898 // Doubles vector div 5899 instruct vdivD(vec dst, vec src) %{ 5900 predicate(UseAVX == 0); 5901 match(Set dst (DivVD dst src)); 5902 format %{ "divpd $dst,$src\t! div packedD" %} 5903 ins_encode %{ 5904 __ divpd($dst$$XMMRegister, $src$$XMMRegister); 5905 %} 5906 ins_pipe( pipe_slow ); 5907 %} 5908 5909 instruct vdivD_reg(vec dst, vec src1, vec src2) %{ 5910 predicate(UseAVX > 0); 5911 match(Set dst (DivVD src1 src2)); 5912 format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %} 5913 ins_encode %{ 5914 int vlen_enc = vector_length_encoding(this); 5915 __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5916 %} 5917 ins_pipe( pipe_slow ); 5918 %} 5919 5920 instruct vdivD_mem(vec dst, vec src, memory mem) %{ 5921 predicate((UseAVX > 0) && 5922 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 5923 match(Set dst (DivVD src (LoadVector mem))); 5924 format %{ "vdivpd $dst,$src,$mem\t! div packedD" %} 5925 ins_encode %{ 5926 int vlen_enc = vector_length_encoding(this); 5927 __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 5928 %} 5929 ins_pipe( pipe_slow ); 5930 %} 5931 5932 // ------------------------------ MinMax --------------------------------------- 5933 5934 // Byte, Short, Int vector Min/Max 5935 instruct minmax_reg_sse(vec dst, vec src) %{ 5936 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 5937 UseAVX == 0); 5938 match(Set dst (MinV dst src)); 5939 match(Set dst (MaxV dst src)); 5940 format %{ "vector_minmax $dst,$src\t! " %} 5941 ins_encode %{ 5942 assert(UseSSE >= 4, "required"); 5943 5944 int opcode = this->ideal_Opcode(); 5945 BasicType elem_bt = Matcher::vector_element_basic_type(this); 5946 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister); 5947 %} 5948 ins_pipe( pipe_slow ); 5949 %} 5950 5951 instruct vminmax_reg(vec dst, vec src1, vec src2) %{ 5952 predicate(is_integral_type(Matcher::vector_element_basic_type(n)) && Matcher::vector_element_basic_type(n) != T_LONG && // T_BYTE, T_SHORT, T_INT 5953 UseAVX > 0); 5954 match(Set dst (MinV src1 src2)); 5955 match(Set dst (MaxV src1 src2)); 5956 format %{ "vector_minmax $dst,$src1,$src2\t! " %} 5957 ins_encode %{ 5958 int opcode = this->ideal_Opcode(); 5959 int vlen_enc = vector_length_encoding(this); 5960 BasicType elem_bt = Matcher::vector_element_basic_type(this); 5961 5962 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 5963 %} 5964 ins_pipe( pipe_slow ); 5965 %} 5966 5967 // Long vector Min/Max 5968 instruct minmaxL_reg_sse(vec dst, vec src, rxmm0 tmp) %{ 5969 predicate(Matcher::vector_length_in_bytes(n) == 16 && Matcher::vector_element_basic_type(n) == T_LONG && 5970 UseAVX == 0); 5971 match(Set dst (MinV dst src)); 5972 match(Set dst (MaxV src dst)); 5973 effect(TEMP dst, TEMP tmp); 5974 format %{ "vector_minmaxL $dst,$src\t!using $tmp as TEMP" %} 5975 ins_encode %{ 5976 assert(UseSSE >= 4, "required"); 5977 5978 int opcode = this->ideal_Opcode(); 5979 BasicType elem_bt = Matcher::vector_element_basic_type(this); 5980 assert(elem_bt == T_LONG, "sanity"); 5981 5982 __ pminmax(opcode, elem_bt, $dst$$XMMRegister, $src$$XMMRegister, $tmp$$XMMRegister); 5983 %} 5984 ins_pipe( pipe_slow ); 5985 %} 5986 5987 instruct vminmaxL_reg_avx(legVec dst, legVec src1, legVec src2) %{ 5988 predicate(Matcher::vector_length_in_bytes(n) <= 32 && Matcher::vector_element_basic_type(n) == T_LONG && 5989 UseAVX > 0 && !VM_Version::supports_avx512vl()); 5990 match(Set dst (MinV src1 src2)); 5991 match(Set dst (MaxV src1 src2)); 5992 effect(TEMP dst); 5993 format %{ "vector_minmaxL $dst,$src1,$src2\t! " %} 5994 ins_encode %{ 5995 int vlen_enc = vector_length_encoding(this); 5996 int opcode = this->ideal_Opcode(); 5997 BasicType elem_bt = Matcher::vector_element_basic_type(this); 5998 assert(elem_bt == T_LONG, "sanity"); 5999 6000 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6001 %} 6002 ins_pipe( pipe_slow ); 6003 %} 6004 6005 instruct vminmaxL_reg_evex(vec dst, vec src1, vec src2) %{ 6006 predicate((Matcher::vector_length_in_bytes(n) == 64 || VM_Version::supports_avx512vl()) && 6007 Matcher::vector_element_basic_type(n) == T_LONG); 6008 match(Set dst (MinV src1 src2)); 6009 match(Set dst (MaxV src1 src2)); 6010 format %{ "vector_minmaxL $dst,$src1,src2\t! " %} 6011 ins_encode %{ 6012 assert(UseAVX > 2, "required"); 6013 6014 int vlen_enc = vector_length_encoding(this); 6015 int opcode = this->ideal_Opcode(); 6016 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6017 assert(elem_bt == T_LONG, "sanity"); 6018 6019 __ vpminmax(opcode, elem_bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6020 %} 6021 ins_pipe( pipe_slow ); 6022 %} 6023 6024 // Float/Double vector Min/Max 6025 instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, legVec btmp) %{ 6026 predicate(Matcher::vector_length_in_bytes(n) <= 32 && 6027 is_floating_point_type(Matcher::vector_element_basic_type(n)) && // T_FLOAT, T_DOUBLE 6028 UseAVX > 0); 6029 match(Set dst (MinV a b)); 6030 match(Set dst (MaxV a b)); 6031 effect(USE a, USE b, TEMP tmp, TEMP atmp, TEMP btmp); 6032 format %{ "vector_minmaxFP $dst,$a,$b\t!using $tmp, $atmp, $btmp as TEMP" %} 6033 ins_encode %{ 6034 assert(UseAVX > 0, "required"); 6035 6036 int opcode = this->ideal_Opcode(); 6037 int vlen_enc = vector_length_encoding(this); 6038 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6039 6040 __ vminmax_fp(opcode, elem_bt, 6041 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6042 $tmp$$XMMRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6043 %} 6044 ins_pipe( pipe_slow ); 6045 %} 6046 6047 instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{ 6048 predicate(Matcher::vector_length_in_bytes(n) == 64 && 6049 is_floating_point_type(Matcher::vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE 6050 match(Set dst (MinV a b)); 6051 match(Set dst (MaxV a b)); 6052 effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp); 6053 format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %} 6054 ins_encode %{ 6055 assert(UseAVX > 2, "required"); 6056 6057 int opcode = this->ideal_Opcode(); 6058 int vlen_enc = vector_length_encoding(this); 6059 BasicType elem_bt = Matcher::vector_element_basic_type(this); 6060 6061 __ evminmax_fp(opcode, elem_bt, 6062 $dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, 6063 $ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc); 6064 %} 6065 ins_pipe( pipe_slow ); 6066 %} 6067 6068 // --------------------------------- Signum/CopySign --------------------------- 6069 6070 instruct signumF_reg(regF dst, regF zero, regF one, rRegP scratch, rFlagsReg cr) %{ 6071 match(Set dst (SignumF dst (Binary zero one))); 6072 effect(TEMP scratch, KILL cr); 6073 format %{ "signumF $dst, $dst\t! using $scratch as TEMP" %} 6074 ins_encode %{ 6075 int opcode = this->ideal_Opcode(); 6076 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register); 6077 %} 6078 ins_pipe( pipe_slow ); 6079 %} 6080 6081 instruct signumD_reg(regD dst, regD zero, regD one, rRegP scratch, rFlagsReg cr) %{ 6082 match(Set dst (SignumD dst (Binary zero one))); 6083 effect(TEMP scratch, KILL cr); 6084 format %{ "signumD $dst, $dst\t! using $scratch as TEMP" %} 6085 ins_encode %{ 6086 int opcode = this->ideal_Opcode(); 6087 __ signum_fp(opcode, $dst$$XMMRegister, $zero$$XMMRegister, $one$$XMMRegister, $scratch$$Register); 6088 %} 6089 ins_pipe( pipe_slow ); 6090 %} 6091 6092 // --------------------------------------- 6093 // For copySign use 0xE4 as writemask for vpternlog 6094 // Desired Truth Table: A -> xmm0 bit, B -> xmm1 bit, C -> xmm2 bit 6095 // C (xmm2) is set to 0x7FFFFFFF 6096 // Wherever xmm2 is 0, we want to pick from B (sign) 6097 // Wherever xmm2 is 1, we want to pick from A (src) 6098 // 6099 // A B C Result 6100 // 0 0 0 0 6101 // 0 0 1 0 6102 // 0 1 0 1 6103 // 0 1 1 0 6104 // 1 0 0 0 6105 // 1 0 1 1 6106 // 1 1 0 1 6107 // 1 1 1 1 6108 // 6109 // Result going from high bit to low bit is 0x11100100 = 0xe4 6110 // --------------------------------------- 6111 6112 #ifdef _LP64 6113 instruct copySignF_reg(regF dst, regF src, regF tmp1, rRegI tmp2) %{ 6114 match(Set dst (CopySignF dst src)); 6115 effect(TEMP tmp1, TEMP tmp2); 6116 format %{ "CopySignF $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6117 ins_encode %{ 6118 __ movl($tmp2$$Register, 0x7FFFFFFF); 6119 __ movdl($tmp1$$XMMRegister, $tmp2$$Register); 6120 __ vpternlogd($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6121 %} 6122 ins_pipe( pipe_slow ); 6123 %} 6124 6125 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{ 6126 match(Set dst (CopySignD dst (Binary src zero))); 6127 ins_cost(100); 6128 effect(TEMP tmp1, TEMP tmp2); 6129 format %{ "CopySignD $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %} 6130 ins_encode %{ 6131 __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF); 6132 __ movq($tmp1$$XMMRegister, $tmp2$$Register); 6133 __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit); 6134 %} 6135 ins_pipe( pipe_slow ); 6136 %} 6137 #endif // _LP64 6138 6139 // --------------------------------- Sqrt -------------------------------------- 6140 6141 instruct vsqrtF_reg(vec dst, vec src) %{ 6142 match(Set dst (SqrtVF src)); 6143 ins_cost(400); 6144 format %{ "vsqrtps $dst,$src\t! sqrt packedF" %} 6145 ins_encode %{ 6146 assert(UseAVX > 0, "required"); 6147 int vlen_enc = vector_length_encoding(this); 6148 __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6149 %} 6150 ins_pipe( pipe_slow ); 6151 %} 6152 6153 instruct vsqrtF_mem(vec dst, memory mem) %{ 6154 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6155 match(Set dst (SqrtVF (LoadVector mem))); 6156 ins_cost(400); 6157 format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %} 6158 ins_encode %{ 6159 assert(UseAVX > 0, "required"); 6160 int vlen_enc = vector_length_encoding(this); 6161 __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc); 6162 %} 6163 ins_pipe( pipe_slow ); 6164 %} 6165 6166 // Floating point vector sqrt 6167 instruct vsqrtD_reg(vec dst, vec src) %{ 6168 match(Set dst (SqrtVD src)); 6169 ins_cost(400); 6170 format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %} 6171 ins_encode %{ 6172 assert(UseAVX > 0, "required"); 6173 int vlen_enc = vector_length_encoding(this); 6174 __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6175 %} 6176 ins_pipe( pipe_slow ); 6177 %} 6178 6179 instruct vsqrtD_mem(vec dst, memory mem) %{ 6180 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 6181 match(Set dst (SqrtVD (LoadVector mem))); 6182 ins_cost(400); 6183 format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %} 6184 ins_encode %{ 6185 assert(UseAVX > 0, "required"); 6186 int vlen_enc = vector_length_encoding(this); 6187 __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc); 6188 %} 6189 ins_pipe( pipe_slow ); 6190 %} 6191 6192 // ------------------------------ Shift --------------------------------------- 6193 6194 // Left and right shift count vectors are the same on x86 6195 // (only lowest bits of xmm reg are used for count). 6196 instruct vshiftcnt(vec dst, rRegI cnt) %{ 6197 match(Set dst (LShiftCntV cnt)); 6198 match(Set dst (RShiftCntV cnt)); 6199 format %{ "movdl $dst,$cnt\t! load shift count" %} 6200 ins_encode %{ 6201 __ movdl($dst$$XMMRegister, $cnt$$Register); 6202 %} 6203 ins_pipe( pipe_slow ); 6204 %} 6205 6206 // Byte vector shift 6207 instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6208 predicate(Matcher::vector_length(n) <= 8 && !n->as_ShiftV()->is_var_shift()); 6209 match(Set dst ( LShiftVB src shift)); 6210 match(Set dst ( RShiftVB src shift)); 6211 match(Set dst (URShiftVB src shift)); 6212 effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch); 6213 format %{"vector_byte_shift $dst,$src,$shift" %} 6214 ins_encode %{ 6215 assert(UseSSE > 3, "required"); 6216 int opcode = this->ideal_Opcode(); 6217 bool sign = (opcode != Op_URShiftVB); 6218 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister); 6219 __ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister); 6220 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6221 __ pand($dst$$XMMRegister, $tmp$$XMMRegister); 6222 __ packuswb($dst$$XMMRegister, $dst$$XMMRegister); 6223 %} 6224 ins_pipe( pipe_slow ); 6225 %} 6226 6227 instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 6228 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6229 UseAVX <= 1); 6230 match(Set dst ( LShiftVB src shift)); 6231 match(Set dst ( RShiftVB src shift)); 6232 match(Set dst (URShiftVB src shift)); 6233 effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch); 6234 format %{"vector_byte_shift $dst,$src,$shift" %} 6235 ins_encode %{ 6236 assert(UseSSE > 3, "required"); 6237 int opcode = this->ideal_Opcode(); 6238 bool sign = (opcode != Op_URShiftVB); 6239 __ vextendbw(sign, $tmp1$$XMMRegister, $src$$XMMRegister); 6240 __ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister); 6241 __ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE); 6242 __ vextendbw(sign, $tmp2$$XMMRegister, $tmp2$$XMMRegister); 6243 __ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister); 6244 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6245 __ pand($tmp2$$XMMRegister, $dst$$XMMRegister); 6246 __ pand($dst$$XMMRegister, $tmp1$$XMMRegister); 6247 __ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister); 6248 %} 6249 ins_pipe( pipe_slow ); 6250 %} 6251 6252 instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6253 predicate(Matcher::vector_length(n) == 16 && !n->as_ShiftV()->is_var_shift() && 6254 UseAVX > 1); 6255 match(Set dst ( LShiftVB src shift)); 6256 match(Set dst ( RShiftVB src shift)); 6257 match(Set dst (URShiftVB src shift)); 6258 effect(TEMP dst, TEMP tmp, TEMP scratch); 6259 format %{"vector_byte_shift $dst,$src,$shift" %} 6260 ins_encode %{ 6261 int opcode = this->ideal_Opcode(); 6262 bool sign = (opcode != Op_URShiftVB); 6263 int vlen_enc = Assembler::AVX_256bit; 6264 __ vextendbw(sign, $tmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 6265 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6266 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 6267 __ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister); 6268 __ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0); 6269 %} 6270 ins_pipe( pipe_slow ); 6271 %} 6272 6273 instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6274 predicate(Matcher::vector_length(n) == 32 && !n->as_ShiftV()->is_var_shift()); 6275 match(Set dst ( LShiftVB src shift)); 6276 match(Set dst ( RShiftVB src shift)); 6277 match(Set dst (URShiftVB src shift)); 6278 effect(TEMP dst, TEMP tmp, TEMP scratch); 6279 format %{"vector_byte_shift $dst,$src,$shift" %} 6280 ins_encode %{ 6281 assert(UseAVX > 1, "required"); 6282 int opcode = this->ideal_Opcode(); 6283 bool sign = (opcode != Op_URShiftVB); 6284 int vlen_enc = Assembler::AVX_256bit; 6285 __ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister); 6286 __ vextendbw(sign, $tmp$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6287 __ vextendbw(sign, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6288 __ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6289 __ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6290 __ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 6291 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 6292 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6293 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6294 %} 6295 ins_pipe( pipe_slow ); 6296 %} 6297 6298 instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{ 6299 predicate(Matcher::vector_length(n) == 64 && !n->as_ShiftV()->is_var_shift()); 6300 match(Set dst ( LShiftVB src shift)); 6301 match(Set dst (RShiftVB src shift)); 6302 match(Set dst (URShiftVB src shift)); 6303 effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch); 6304 format %{"vector_byte_shift $dst,$src,$shift" %} 6305 ins_encode %{ 6306 assert(UseAVX > 2, "required"); 6307 int opcode = this->ideal_Opcode(); 6308 bool sign = (opcode != Op_URShiftVB); 6309 int vlen_enc = Assembler::AVX_512bit; 6310 __ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1); 6311 __ vextendbw(sign, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vlen_enc); 6312 __ vextendbw(sign, $tmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6313 __ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6314 __ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6315 __ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register); 6316 __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6317 __ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6318 __ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6319 __ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vlen_enc); 6320 __ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vlen_enc, $scratch$$Register); 6321 __ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6322 %} 6323 ins_pipe( pipe_slow ); 6324 %} 6325 6326 // Shorts vector logical right shift produces incorrect Java result 6327 // for negative data because java code convert short value into int with 6328 // sign extension before a shift. But char vectors are fine since chars are 6329 // unsigned values. 6330 // Shorts/Chars vector left shift 6331 instruct vshiftS(vec dst, vec src, vec shift) %{ 6332 predicate(!n->as_ShiftV()->is_var_shift()); 6333 match(Set dst ( LShiftVS src shift)); 6334 match(Set dst ( RShiftVS src shift)); 6335 match(Set dst (URShiftVS src shift)); 6336 effect(TEMP dst, USE src, USE shift); 6337 format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %} 6338 ins_encode %{ 6339 int opcode = this->ideal_Opcode(); 6340 if (UseAVX > 0) { 6341 int vlen_enc = vector_length_encoding(this); 6342 __ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6343 } else { 6344 int vlen = Matcher::vector_length(this); 6345 if (vlen == 2) { 6346 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 6347 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6348 } else if (vlen == 4) { 6349 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6350 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6351 } else { 6352 assert (vlen == 8, "sanity"); 6353 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6354 __ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6355 } 6356 } 6357 %} 6358 ins_pipe( pipe_slow ); 6359 %} 6360 6361 // Integers vector left shift 6362 instruct vshiftI(vec dst, vec src, vec shift) %{ 6363 predicate(!n->as_ShiftV()->is_var_shift()); 6364 match(Set dst ( LShiftVI src shift)); 6365 match(Set dst ( RShiftVI src shift)); 6366 match(Set dst (URShiftVI src shift)); 6367 effect(TEMP dst, USE src, USE shift); 6368 format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %} 6369 ins_encode %{ 6370 int opcode = this->ideal_Opcode(); 6371 if (UseAVX > 0) { 6372 int vlen_enc = vector_length_encoding(this); 6373 __ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6374 } else { 6375 int vlen = Matcher::vector_length(this); 6376 if (vlen == 2) { 6377 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6378 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6379 } else { 6380 assert(vlen == 4, "sanity"); 6381 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6382 __ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6383 } 6384 } 6385 %} 6386 ins_pipe( pipe_slow ); 6387 %} 6388 6389 // Integers vector left constant shift 6390 instruct vshiftI_imm(vec dst, vec src, immI8 shift) %{ 6391 match(Set dst (LShiftVI src (LShiftCntV shift))); 6392 match(Set dst (RShiftVI src (RShiftCntV shift))); 6393 match(Set dst (URShiftVI src (RShiftCntV shift))); 6394 format %{ "vshiftd_imm $dst,$src,$shift\t! shift packedI" %} 6395 ins_encode %{ 6396 int opcode = this->ideal_Opcode(); 6397 if (UseAVX > 0) { 6398 int vector_len = vector_length_encoding(this); 6399 __ vshiftd_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6400 } else { 6401 int vlen = Matcher::vector_length(this); 6402 if (vlen == 2) { 6403 __ movdbl($dst$$XMMRegister, $src$$XMMRegister); 6404 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6405 } else { 6406 assert(vlen == 4, "sanity"); 6407 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6408 __ vshiftd_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6409 } 6410 } 6411 %} 6412 ins_pipe( pipe_slow ); 6413 %} 6414 6415 // Longs vector shift 6416 instruct vshiftL(vec dst, vec src, vec shift) %{ 6417 predicate(!n->as_ShiftV()->is_var_shift()); 6418 match(Set dst ( LShiftVL src shift)); 6419 match(Set dst (URShiftVL src shift)); 6420 effect(TEMP dst, USE src, USE shift); 6421 format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %} 6422 ins_encode %{ 6423 int opcode = this->ideal_Opcode(); 6424 if (UseAVX > 0) { 6425 int vlen_enc = vector_length_encoding(this); 6426 __ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6427 } else { 6428 assert(Matcher::vector_length(this) == 2, ""); 6429 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6430 __ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister); 6431 } 6432 %} 6433 ins_pipe( pipe_slow ); 6434 %} 6435 6436 // Longs vector constant shift 6437 instruct vshiftL_imm(vec dst, vec src, immI8 shift) %{ 6438 match(Set dst (LShiftVL src (LShiftCntV shift))); 6439 match(Set dst (URShiftVL src (RShiftCntV shift))); 6440 format %{ "vshiftq_imm $dst,$src,$shift\t! shift packedL" %} 6441 ins_encode %{ 6442 int opcode = this->ideal_Opcode(); 6443 if (UseAVX > 0) { 6444 int vector_len = vector_length_encoding(this); 6445 __ vshiftq_imm(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 6446 } else { 6447 assert(Matcher::vector_length(this) == 2, ""); 6448 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6449 __ vshiftq_imm(opcode, $dst$$XMMRegister, $shift$$constant); 6450 } 6451 %} 6452 ins_pipe( pipe_slow ); 6453 %} 6454 6455 // -------------------ArithmeticRightShift ----------------------------------- 6456 // Long vector arithmetic right shift 6457 instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{ 6458 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX <= 2); 6459 match(Set dst (RShiftVL src shift)); 6460 effect(TEMP dst, TEMP tmp, TEMP scratch); 6461 format %{ "vshiftq $dst,$src,$shift" %} 6462 ins_encode %{ 6463 uint vlen = Matcher::vector_length(this); 6464 if (vlen == 2) { 6465 assert(UseSSE >= 2, "required"); 6466 __ movdqu($dst$$XMMRegister, $src$$XMMRegister); 6467 __ psrlq($dst$$XMMRegister, $shift$$XMMRegister); 6468 __ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 6469 __ psrlq($tmp$$XMMRegister, $shift$$XMMRegister); 6470 __ pxor($dst$$XMMRegister, $tmp$$XMMRegister); 6471 __ psubq($dst$$XMMRegister, $tmp$$XMMRegister); 6472 } else { 6473 assert(vlen == 4, "sanity"); 6474 assert(UseAVX > 1, "required"); 6475 int vlen_enc = Assembler::AVX_256bit; 6476 __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6477 __ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register); 6478 __ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6479 __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6480 __ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vlen_enc); 6481 } 6482 %} 6483 ins_pipe( pipe_slow ); 6484 %} 6485 6486 instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{ 6487 predicate(!n->as_ShiftV()->is_var_shift() && UseAVX > 2); 6488 match(Set dst (RShiftVL src shift)); 6489 format %{ "vshiftq $dst,$src,$shift" %} 6490 ins_encode %{ 6491 int vlen_enc = vector_length_encoding(this); 6492 __ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6493 %} 6494 ins_pipe( pipe_slow ); 6495 %} 6496 6497 // ------------------- Variable Shift ----------------------------- 6498 // Byte variable shift 6499 instruct vshift8B_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 6500 predicate(Matcher::vector_length(n) <= 8 && 6501 n->as_ShiftV()->is_var_shift() && 6502 !VM_Version::supports_avx512bw()); 6503 match(Set dst ( LShiftVB src shift)); 6504 match(Set dst ( RShiftVB src shift)); 6505 match(Set dst (URShiftVB src shift)); 6506 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6507 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %} 6508 ins_encode %{ 6509 assert(UseAVX >= 2, "required"); 6510 6511 int opcode = this->ideal_Opcode(); 6512 int vlen_enc = Assembler::AVX_128bit; 6513 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register); 6514 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 6515 %} 6516 ins_pipe( pipe_slow ); 6517 %} 6518 6519 instruct vshift16B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 6520 predicate(Matcher::vector_length(n) == 16 && 6521 n->as_ShiftV()->is_var_shift() && 6522 !VM_Version::supports_avx512bw()); 6523 match(Set dst ( LShiftVB src shift)); 6524 match(Set dst ( RShiftVB src shift)); 6525 match(Set dst (URShiftVB src shift)); 6526 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6527 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %} 6528 ins_encode %{ 6529 assert(UseAVX >= 2, "required"); 6530 6531 int opcode = this->ideal_Opcode(); 6532 int vlen_enc = Assembler::AVX_128bit; 6533 // Shift lower half and get word result in dst 6534 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6535 6536 // Shift upper half and get word result in vtmp1 6537 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6538 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6539 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6540 6541 // Merge and down convert the two word results to byte in dst 6542 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6543 %} 6544 ins_pipe( pipe_slow ); 6545 %} 6546 6547 instruct vshift32B_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, vec vtmp3, vec vtmp4, rRegP scratch) %{ 6548 predicate(Matcher::vector_length(n) == 32 && 6549 n->as_ShiftV()->is_var_shift() && 6550 !VM_Version::supports_avx512bw()); 6551 match(Set dst ( LShiftVB src shift)); 6552 match(Set dst ( RShiftVB src shift)); 6553 match(Set dst (URShiftVB src shift)); 6554 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP vtmp4, TEMP scratch); 6555 format %{ "vector_varshift_byte $dst, $src, $shift\n\t using $vtmp1, $vtmp2, $vtmp3, $vtmp4 and $scratch as TEMP" %} 6556 ins_encode %{ 6557 assert(UseAVX >= 2, "required"); 6558 6559 int opcode = this->ideal_Opcode(); 6560 int vlen_enc = Assembler::AVX_128bit; 6561 // Process lower 128 bits and get result in dst 6562 __ varshiftbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6563 __ vpshufd($vtmp1$$XMMRegister, $src$$XMMRegister, 0xE, 0); 6564 __ vpshufd($vtmp2$$XMMRegister, $shift$$XMMRegister, 0xE, 0); 6565 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6566 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0); 6567 6568 // Process higher 128 bits and get result in vtmp3 6569 __ vextracti128_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6570 __ vextracti128_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6571 __ varshiftbw(opcode, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp4$$XMMRegister, $scratch$$Register); 6572 __ vpshufd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, 0xE, 0); 6573 __ vpshufd($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, 0xE, 0); 6574 __ varshiftbw(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6575 __ vpackuswb($vtmp1$$XMMRegister, $vtmp3$$XMMRegister, $vtmp1$$XMMRegister, 0); 6576 6577 // Merge the two results in dst 6578 __ vinserti128($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6579 %} 6580 ins_pipe( pipe_slow ); 6581 %} 6582 6583 instruct vshiftB_var_evex_bw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 6584 predicate(Matcher::vector_length(n) <= 32 && 6585 n->as_ShiftV()->is_var_shift() && 6586 VM_Version::supports_avx512bw()); 6587 match(Set dst ( LShiftVB src shift)); 6588 match(Set dst ( RShiftVB src shift)); 6589 match(Set dst (URShiftVB src shift)); 6590 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6591 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp, $scratch as TEMP" %} 6592 ins_encode %{ 6593 assert(UseAVX > 2, "required"); 6594 6595 int opcode = this->ideal_Opcode(); 6596 int vlen_enc = vector_length_encoding(this); 6597 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp$$XMMRegister, $scratch$$Register); 6598 %} 6599 ins_pipe( pipe_slow ); 6600 %} 6601 6602 instruct vshift64B_var_evex_bw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 6603 predicate(Matcher::vector_length(n) == 64 && 6604 n->as_ShiftV()->is_var_shift() && 6605 VM_Version::supports_avx512bw()); 6606 match(Set dst ( LShiftVB src shift)); 6607 match(Set dst ( RShiftVB src shift)); 6608 match(Set dst (URShiftVB src shift)); 6609 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6610 format %{ "vector_varshift_byte $dst, $src, $shift\n\t! using $vtmp1, $vtmp2 and $scratch as TEMP" %} 6611 ins_encode %{ 6612 assert(UseAVX > 2, "required"); 6613 6614 int opcode = this->ideal_Opcode(); 6615 int vlen_enc = Assembler::AVX_256bit; 6616 __ evarshiftb(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, $vtmp1$$XMMRegister, $scratch$$Register); 6617 __ vextracti64x4_high($vtmp1$$XMMRegister, $src$$XMMRegister); 6618 __ vextracti64x4_high($vtmp2$$XMMRegister, $shift$$XMMRegister); 6619 __ evarshiftb(opcode, $vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, vlen_enc, $vtmp2$$XMMRegister, $scratch$$Register); 6620 __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, 0x1); 6621 %} 6622 ins_pipe( pipe_slow ); 6623 %} 6624 6625 // Short variable shift 6626 instruct vshift8S_var_nobw(vec dst, vec src, vec shift, vec vtmp, rRegP scratch) %{ 6627 predicate(Matcher::vector_length(n) <= 8 && 6628 n->as_ShiftV()->is_var_shift() && 6629 !VM_Version::supports_avx512bw()); 6630 match(Set dst ( LShiftVS src shift)); 6631 match(Set dst ( RShiftVS src shift)); 6632 match(Set dst (URShiftVS src shift)); 6633 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6634 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6635 ins_encode %{ 6636 assert(UseAVX >= 2, "required"); 6637 6638 int opcode = this->ideal_Opcode(); 6639 bool sign = (opcode != Op_URShiftVS); 6640 int vlen_enc = Assembler::AVX_256bit; 6641 __ vextendwd(sign, $dst$$XMMRegister, $src$$XMMRegister, 1); 6642 __ vpmovzxwd($vtmp$$XMMRegister, $shift$$XMMRegister, 1); 6643 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 6644 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6645 __ vextracti128_high($vtmp$$XMMRegister, $dst$$XMMRegister); 6646 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 6647 %} 6648 ins_pipe( pipe_slow ); 6649 %} 6650 6651 instruct vshift16S_var_nobw(vec dst, vec src, vec shift, vec vtmp1, vec vtmp2, rRegP scratch) %{ 6652 predicate(Matcher::vector_length(n) == 16 && 6653 n->as_ShiftV()->is_var_shift() && 6654 !VM_Version::supports_avx512bw()); 6655 match(Set dst ( LShiftVS src shift)); 6656 match(Set dst ( RShiftVS src shift)); 6657 match(Set dst (URShiftVS src shift)); 6658 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 6659 format %{ "vector_var_shift_left_short $dst, $src, $shift\n\t" %} 6660 ins_encode %{ 6661 assert(UseAVX >= 2, "required"); 6662 6663 int opcode = this->ideal_Opcode(); 6664 bool sign = (opcode != Op_URShiftVS); 6665 int vlen_enc = Assembler::AVX_256bit; 6666 // Shift lower half, with result in vtmp2 using vtmp1 as TEMP 6667 __ vextendwd(sign, $vtmp2$$XMMRegister, $src$$XMMRegister, vlen_enc); 6668 __ vpmovzxwd($vtmp1$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6669 __ varshiftd(opcode, $vtmp2$$XMMRegister, $vtmp2$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6670 __ vpand($vtmp2$$XMMRegister, $vtmp2$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6671 6672 // Shift upper half, with result in dst using vtmp1 as TEMP 6673 __ vextracti128_high($dst$$XMMRegister, $src$$XMMRegister); 6674 __ vextracti128_high($vtmp1$$XMMRegister, $shift$$XMMRegister); 6675 __ vextendwd(sign, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6676 __ vpmovzxwd($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6677 __ varshiftd(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, vlen_enc); 6678 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 6679 6680 // Merge lower and upper half result into dst 6681 __ vpackusdw($dst$$XMMRegister, $vtmp2$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6682 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vlen_enc); 6683 %} 6684 ins_pipe( pipe_slow ); 6685 %} 6686 6687 instruct vshift16S_var_evex_bw(vec dst, vec src, vec shift) %{ 6688 predicate(n->as_ShiftV()->is_var_shift() && 6689 VM_Version::supports_avx512bw()); 6690 match(Set dst ( LShiftVS src shift)); 6691 match(Set dst ( RShiftVS src shift)); 6692 match(Set dst (URShiftVS src shift)); 6693 format %{ "vector_varshift_short $dst,$src,$shift\t!" %} 6694 ins_encode %{ 6695 assert(UseAVX > 2, "required"); 6696 6697 int opcode = this->ideal_Opcode(); 6698 int vlen_enc = vector_length_encoding(this); 6699 if (!VM_Version::supports_avx512vl()) { 6700 vlen_enc = Assembler::AVX_512bit; 6701 } 6702 __ varshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6703 %} 6704 ins_pipe( pipe_slow ); 6705 %} 6706 6707 //Integer variable shift 6708 instruct vshiftI_var(vec dst, vec src, vec shift) %{ 6709 predicate(n->as_ShiftV()->is_var_shift()); 6710 match(Set dst ( LShiftVI src shift)); 6711 match(Set dst ( RShiftVI src shift)); 6712 match(Set dst (URShiftVI src shift)); 6713 format %{ "vector_varshift_int $dst,$src,$shift\t!" %} 6714 ins_encode %{ 6715 assert(UseAVX >= 2, "required"); 6716 6717 int opcode = this->ideal_Opcode(); 6718 int vlen_enc = vector_length_encoding(this); 6719 __ varshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6720 %} 6721 ins_pipe( pipe_slow ); 6722 %} 6723 6724 //Long variable shift 6725 instruct vshiftL_var(vec dst, vec src, vec shift) %{ 6726 predicate(n->as_ShiftV()->is_var_shift()); 6727 match(Set dst ( LShiftVL src shift)); 6728 match(Set dst (URShiftVL src shift)); 6729 format %{ "vector_varshift_long $dst,$src,$shift\t!" %} 6730 ins_encode %{ 6731 assert(UseAVX >= 2, "required"); 6732 6733 int opcode = this->ideal_Opcode(); 6734 int vlen_enc = vector_length_encoding(this); 6735 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6736 %} 6737 ins_pipe( pipe_slow ); 6738 %} 6739 6740 //Long variable right shift arithmetic 6741 instruct vshiftL_arith_var(vec dst, vec src, vec shift, vec vtmp) %{ 6742 predicate(Matcher::vector_length(n) <= 4 && 6743 n->as_ShiftV()->is_var_shift() && 6744 UseAVX == 2); 6745 match(Set dst (RShiftVL src shift)); 6746 effect(TEMP dst, TEMP vtmp); 6747 format %{ "vector_varshift_long $dst,$src,$shift\n\t! using $vtmp as TEMP" %} 6748 ins_encode %{ 6749 int opcode = this->ideal_Opcode(); 6750 int vlen_enc = vector_length_encoding(this); 6751 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc, 6752 $vtmp$$XMMRegister); 6753 %} 6754 ins_pipe( pipe_slow ); 6755 %} 6756 6757 instruct vshiftL_arith_var_evex(vec dst, vec src, vec shift) %{ 6758 predicate(n->as_ShiftV()->is_var_shift() && 6759 UseAVX > 2); 6760 match(Set dst (RShiftVL src shift)); 6761 format %{ "vector_varfshift_long $dst,$src,$shift\t!" %} 6762 ins_encode %{ 6763 int opcode = this->ideal_Opcode(); 6764 int vlen_enc = vector_length_encoding(this); 6765 __ varshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc); 6766 %} 6767 ins_pipe( pipe_slow ); 6768 %} 6769 6770 // --------------------------------- AND -------------------------------------- 6771 6772 instruct vand(vec dst, vec src) %{ 6773 predicate(UseAVX == 0); 6774 match(Set dst (AndV dst src)); 6775 format %{ "pand $dst,$src\t! and vectors" %} 6776 ins_encode %{ 6777 __ pand($dst$$XMMRegister, $src$$XMMRegister); 6778 %} 6779 ins_pipe( pipe_slow ); 6780 %} 6781 6782 instruct vand_reg(vec dst, vec src1, vec src2) %{ 6783 predicate(UseAVX > 0); 6784 match(Set dst (AndV src1 src2)); 6785 format %{ "vpand $dst,$src1,$src2\t! and vectors" %} 6786 ins_encode %{ 6787 int vlen_enc = vector_length_encoding(this); 6788 __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6789 %} 6790 ins_pipe( pipe_slow ); 6791 %} 6792 6793 instruct vand_mem(vec dst, vec src, memory mem) %{ 6794 predicate((UseAVX > 0) && 6795 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6796 match(Set dst (AndV src (LoadVector mem))); 6797 format %{ "vpand $dst,$src,$mem\t! and vectors" %} 6798 ins_encode %{ 6799 int vlen_enc = vector_length_encoding(this); 6800 __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6801 %} 6802 ins_pipe( pipe_slow ); 6803 %} 6804 6805 // --------------------------------- OR --------------------------------------- 6806 6807 instruct vor(vec dst, vec src) %{ 6808 predicate(UseAVX == 0); 6809 match(Set dst (OrV dst src)); 6810 format %{ "por $dst,$src\t! or vectors" %} 6811 ins_encode %{ 6812 __ por($dst$$XMMRegister, $src$$XMMRegister); 6813 %} 6814 ins_pipe( pipe_slow ); 6815 %} 6816 6817 instruct vor_reg(vec dst, vec src1, vec src2) %{ 6818 predicate(UseAVX > 0); 6819 match(Set dst (OrV src1 src2)); 6820 format %{ "vpor $dst,$src1,$src2\t! or vectors" %} 6821 ins_encode %{ 6822 int vlen_enc = vector_length_encoding(this); 6823 __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6824 %} 6825 ins_pipe( pipe_slow ); 6826 %} 6827 6828 instruct vor_mem(vec dst, vec src, memory mem) %{ 6829 predicate((UseAVX > 0) && 6830 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6831 match(Set dst (OrV src (LoadVector mem))); 6832 format %{ "vpor $dst,$src,$mem\t! or vectors" %} 6833 ins_encode %{ 6834 int vlen_enc = vector_length_encoding(this); 6835 __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6836 %} 6837 ins_pipe( pipe_slow ); 6838 %} 6839 6840 // --------------------------------- XOR -------------------------------------- 6841 6842 instruct vxor(vec dst, vec src) %{ 6843 predicate(UseAVX == 0); 6844 match(Set dst (XorV dst src)); 6845 format %{ "pxor $dst,$src\t! xor vectors" %} 6846 ins_encode %{ 6847 __ pxor($dst$$XMMRegister, $src$$XMMRegister); 6848 %} 6849 ins_pipe( pipe_slow ); 6850 %} 6851 6852 instruct vxor_reg(vec dst, vec src1, vec src2) %{ 6853 predicate(UseAVX > 0); 6854 match(Set dst (XorV src1 src2)); 6855 format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %} 6856 ins_encode %{ 6857 int vlen_enc = vector_length_encoding(this); 6858 __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 6859 %} 6860 ins_pipe( pipe_slow ); 6861 %} 6862 6863 instruct vxor_mem(vec dst, vec src, memory mem) %{ 6864 predicate((UseAVX > 0) && 6865 (Matcher::vector_length_in_bytes(n->in(1)) > 8)); 6866 match(Set dst (XorV src (LoadVector mem))); 6867 format %{ "vpxor $dst,$src,$mem\t! xor vectors" %} 6868 ins_encode %{ 6869 int vlen_enc = vector_length_encoding(this); 6870 __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vlen_enc); 6871 %} 6872 ins_pipe( pipe_slow ); 6873 %} 6874 6875 // --------------------------------- VectorCast -------------------------------------- 6876 6877 instruct vcastBtoX(vec dst, vec src) %{ 6878 match(Set dst (VectorCastB2X src)); 6879 format %{ "vector_cast_b2x $dst,$src\t!" %} 6880 ins_encode %{ 6881 assert(UseAVX > 0, "required"); 6882 6883 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 6884 int vlen_enc = vector_length_encoding(this); 6885 switch (to_elem_bt) { 6886 case T_SHORT: 6887 __ vpmovsxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6888 break; 6889 case T_INT: 6890 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6891 break; 6892 case T_FLOAT: 6893 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6894 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6895 break; 6896 case T_LONG: 6897 __ vpmovsxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6898 break; 6899 case T_DOUBLE: { 6900 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 6901 __ vpmovsxbd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 6902 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6903 break; 6904 } 6905 default: assert(false, "%s", type2name(to_elem_bt)); 6906 } 6907 %} 6908 ins_pipe( pipe_slow ); 6909 %} 6910 6911 instruct castStoX(vec dst, vec src, rRegP scratch) %{ 6912 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 6913 Matcher::vector_length(n->in(1)) <= 8 && // src 6914 Matcher::vector_element_basic_type(n) == T_BYTE); 6915 effect(TEMP scratch); 6916 match(Set dst (VectorCastS2X src)); 6917 format %{ "vector_cast_s2x $dst,$src\t! using $scratch as TEMP" %} 6918 ins_encode %{ 6919 assert(UseAVX > 0, "required"); 6920 6921 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), 0, $scratch$$Register); 6922 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0); 6923 %} 6924 ins_pipe( pipe_slow ); 6925 %} 6926 6927 instruct vcastStoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ 6928 predicate((UseAVX <= 2 || !VM_Version::supports_avx512vlbw()) && 6929 Matcher::vector_length(n->in(1)) == 16 && // src 6930 Matcher::vector_element_basic_type(n) == T_BYTE); 6931 effect(TEMP dst, TEMP vtmp, TEMP scratch); 6932 match(Set dst (VectorCastS2X src)); 6933 format %{ "vector_cast_s2x $dst,$src\t! using $vtmp, $scratch as TEMP" %} 6934 ins_encode %{ 6935 assert(UseAVX > 0, "required"); 6936 6937 int vlen_enc = vector_length_encoding(Matcher::vector_length_in_bytes(this, $src)); 6938 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vlen_enc, $scratch$$Register); 6939 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 6940 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0); 6941 %} 6942 ins_pipe( pipe_slow ); 6943 %} 6944 6945 instruct vcastStoX_evex(vec dst, vec src) %{ 6946 predicate((UseAVX > 2 && VM_Version::supports_avx512vlbw()) || 6947 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 6948 match(Set dst (VectorCastS2X src)); 6949 format %{ "vector_cast_s2x $dst,$src\t!" %} 6950 ins_encode %{ 6951 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 6952 int src_vlen_enc = vector_length_encoding(this, $src); 6953 int vlen_enc = vector_length_encoding(this); 6954 switch (to_elem_bt) { 6955 case T_BYTE: 6956 if (!VM_Version::supports_avx512vl()) { 6957 vlen_enc = Assembler::AVX_512bit; 6958 } 6959 __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 6960 break; 6961 case T_INT: 6962 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6963 break; 6964 case T_FLOAT: 6965 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6966 __ vcvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6967 break; 6968 case T_LONG: 6969 __ vpmovsxwq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 6970 break; 6971 case T_DOUBLE: { 6972 int mid_vlen_enc = (vlen_enc == Assembler::AVX_512bit) ? Assembler::AVX_256bit : Assembler::AVX_128bit; 6973 __ vpmovsxwd($dst$$XMMRegister, $src$$XMMRegister, mid_vlen_enc); 6974 __ vcvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 6975 break; 6976 } 6977 default: 6978 ShouldNotReachHere(); 6979 } 6980 %} 6981 ins_pipe( pipe_slow ); 6982 %} 6983 6984 instruct castItoX(vec dst, vec src, rRegP scratch) %{ 6985 predicate(UseAVX <= 2 && 6986 (Matcher::vector_length_in_bytes(n->in(1)) <= 16) && 6987 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 6988 match(Set dst (VectorCastI2X src)); 6989 format %{ "vector_cast_i2x $dst,$src\t! using $scratch as TEMP" %} 6990 effect(TEMP scratch); 6991 ins_encode %{ 6992 assert(UseAVX > 0, "required"); 6993 6994 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 6995 int vlen_enc = vector_length_encoding(this, $src); 6996 6997 if (to_elem_bt == T_BYTE) { 6998 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register); 6999 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7000 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7001 } else { 7002 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7003 __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 7004 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7005 } 7006 %} 7007 ins_pipe( pipe_slow ); 7008 %} 7009 7010 instruct vcastItoX(vec dst, vec src, vec vtmp, rRegP scratch) %{ 7011 predicate(UseAVX <= 2 && 7012 (Matcher::vector_length_in_bytes(n->in(1)) == 32) && 7013 (Matcher::vector_length_in_bytes(n) < Matcher::vector_length_in_bytes(n->in(1)))); // dst < src 7014 match(Set dst (VectorCastI2X src)); 7015 format %{ "vector_cast_i2x $dst,$src\t! using $vtmp and $scratch as TEMP" %} 7016 effect(TEMP dst, TEMP vtmp, TEMP scratch); 7017 ins_encode %{ 7018 assert(UseAVX > 0, "required"); 7019 7020 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7021 int vlen_enc = vector_length_encoding(this, $src); 7022 7023 if (to_elem_bt == T_BYTE) { 7024 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_byte_mask()), vlen_enc, $scratch$$Register); 7025 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7026 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7027 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7028 } else { 7029 assert(to_elem_bt == T_SHORT, "%s", type2name(to_elem_bt)); 7030 __ vpand($vtmp$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_int_to_short_mask()), vlen_enc, $scratch$$Register); 7031 __ vextracti128($dst$$XMMRegister, $vtmp$$XMMRegister, 0x1); 7032 __ vpackusdw($dst$$XMMRegister, $vtmp$$XMMRegister, $dst$$XMMRegister, vlen_enc); 7033 } 7034 %} 7035 ins_pipe( pipe_slow ); 7036 %} 7037 7038 instruct vcastItoX_evex(vec dst, vec src) %{ 7039 predicate(UseAVX > 2 || 7040 (Matcher::vector_length_in_bytes(n) >= Matcher::vector_length_in_bytes(n->in(1)))); // dst >= src 7041 match(Set dst (VectorCastI2X src)); 7042 format %{ "vector_cast_i2x $dst,$src\t!" %} 7043 ins_encode %{ 7044 assert(UseAVX > 0, "required"); 7045 7046 BasicType dst_elem_bt = Matcher::vector_element_basic_type(this); 7047 int src_vlen_enc = vector_length_encoding(this, $src); 7048 int dst_vlen_enc = vector_length_encoding(this); 7049 switch (dst_elem_bt) { 7050 case T_BYTE: 7051 if (!VM_Version::supports_avx512vl()) { 7052 src_vlen_enc = Assembler::AVX_512bit; 7053 } 7054 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7055 break; 7056 case T_SHORT: 7057 if (!VM_Version::supports_avx512vl()) { 7058 src_vlen_enc = Assembler::AVX_512bit; 7059 } 7060 __ evpmovdw($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 7061 break; 7062 case T_FLOAT: 7063 __ vcvtdq2ps($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7064 break; 7065 case T_LONG: 7066 __ vpmovsxdq($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7067 break; 7068 case T_DOUBLE: 7069 __ vcvtdq2pd($dst$$XMMRegister, $src$$XMMRegister, dst_vlen_enc); 7070 break; 7071 default: 7072 ShouldNotReachHere(); 7073 } 7074 %} 7075 ins_pipe( pipe_slow ); 7076 %} 7077 7078 instruct vcastLtoBS(vec dst, vec src, rRegP scratch) %{ 7079 predicate((Matcher::vector_element_basic_type(n) == T_BYTE || Matcher::vector_element_basic_type(n) == T_SHORT) && 7080 UseAVX <= 2); 7081 match(Set dst (VectorCastL2X src)); 7082 effect(TEMP scratch); 7083 format %{ "vector_cast_l2x $dst,$src\t! using $scratch as TEMP" %} 7084 ins_encode %{ 7085 assert(UseAVX > 0, "required"); 7086 7087 int vlen = Matcher::vector_length_in_bytes(this, $src); 7088 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7089 AddressLiteral mask_addr = (to_elem_bt == T_BYTE) ? ExternalAddress(vector_int_to_byte_mask()) 7090 : ExternalAddress(vector_int_to_short_mask()); 7091 if (vlen <= 16) { 7092 __ vpshufd($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_128bit); 7093 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register); 7094 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7095 } else { 7096 assert(vlen <= 32, "required"); 7097 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, Assembler::AVX_256bit); 7098 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, Assembler::AVX_256bit); 7099 __ vpand($dst$$XMMRegister, $dst$$XMMRegister, mask_addr, Assembler::AVX_128bit, $scratch$$Register); 7100 __ vpackusdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7101 } 7102 if (to_elem_bt == T_BYTE) { 7103 __ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_128bit); 7104 } 7105 %} 7106 ins_pipe( pipe_slow ); 7107 %} 7108 7109 instruct vcastLtoX_evex(vec dst, vec src) %{ 7110 predicate(UseAVX > 2 || 7111 (Matcher::vector_element_basic_type(n) == T_INT || 7112 Matcher::vector_element_basic_type(n) == T_FLOAT || 7113 Matcher::vector_element_basic_type(n) == T_DOUBLE)); 7114 match(Set dst (VectorCastL2X src)); 7115 format %{ "vector_cast_l2x $dst,$src\t!" %} 7116 ins_encode %{ 7117 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7118 int vlen = Matcher::vector_length_in_bytes(this, $src); 7119 int vlen_enc = vector_length_encoding(this, $src); 7120 switch (to_elem_bt) { 7121 case T_BYTE: 7122 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7123 vlen_enc = Assembler::AVX_512bit; 7124 } 7125 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7126 break; 7127 case T_SHORT: 7128 if (UseAVX > 2 && !VM_Version::supports_avx512vl()) { 7129 vlen_enc = Assembler::AVX_512bit; 7130 } 7131 __ evpmovqw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7132 break; 7133 case T_INT: 7134 if (vlen == 8) { 7135 if ($dst$$XMMRegister != $src$$XMMRegister) { 7136 __ movflt($dst$$XMMRegister, $src$$XMMRegister); 7137 } 7138 } else if (vlen == 16) { 7139 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 8); 7140 } else if (vlen == 32) { 7141 if (UseAVX > 2) { 7142 if (!VM_Version::supports_avx512vl()) { 7143 vlen_enc = Assembler::AVX_512bit; 7144 } 7145 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7146 } else { 7147 __ vpermilps($dst$$XMMRegister, $src$$XMMRegister, 8, vlen_enc); 7148 __ vpermpd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc); 7149 } 7150 } else { // vlen == 64 7151 __ evpmovqd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7152 } 7153 break; 7154 case T_FLOAT: 7155 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7156 __ evcvtqq2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7157 break; 7158 case T_DOUBLE: 7159 assert(UseAVX > 2 && VM_Version::supports_avx512dq(), "required"); 7160 __ evcvtqq2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7161 break; 7162 7163 default: assert(false, "%s", type2name(to_elem_bt)); 7164 } 7165 %} 7166 ins_pipe( pipe_slow ); 7167 %} 7168 7169 instruct vcastFtoD_reg(vec dst, vec src) %{ 7170 predicate(Matcher::vector_element_basic_type(n) == T_DOUBLE); 7171 match(Set dst (VectorCastF2X src)); 7172 format %{ "vector_cast_f2d $dst,$src\t!" %} 7173 ins_encode %{ 7174 int vlen_enc = vector_length_encoding(this); 7175 __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7176 %} 7177 ins_pipe( pipe_slow ); 7178 %} 7179 7180 7181 instruct castFtoI_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{ 7182 predicate(!VM_Version::supports_avx512vl() && 7183 Matcher::vector_length_in_bytes(n) < 64 && 7184 Matcher::vector_element_basic_type(n) == T_INT); 7185 match(Set dst (VectorCastF2X src)); 7186 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP scratch, KILL cr); 7187 format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $scratch as TEMP" %} 7188 ins_encode %{ 7189 int vlen_enc = vector_length_encoding(this); 7190 __ vector_castF2I_avx($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7191 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7192 ExternalAddress(vector_float_signflip()), $scratch$$Register, vlen_enc); 7193 %} 7194 ins_pipe( pipe_slow ); 7195 %} 7196 7197 instruct castFtoI_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{ 7198 predicate((VM_Version::supports_avx512vl() || 7199 Matcher::vector_length_in_bytes(n) == 64) && 7200 Matcher::vector_element_basic_type(n) == T_INT); 7201 match(Set dst (VectorCastF2X src)); 7202 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr); 7203 format %{ "vector_cast_f2i $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %} 7204 ins_encode %{ 7205 int vlen_enc = vector_length_encoding(this); 7206 __ vector_castF2I_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7207 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7208 ExternalAddress(vector_float_signflip()), $scratch$$Register, vlen_enc); 7209 %} 7210 ins_pipe( pipe_slow ); 7211 %} 7212 7213 instruct vcastDtoF_reg(vec dst, vec src) %{ 7214 predicate(Matcher::vector_element_basic_type(n) == T_FLOAT); 7215 match(Set dst (VectorCastD2X src)); 7216 format %{ "vector_cast_d2x $dst,$src\t!" %} 7217 ins_encode %{ 7218 int vlen_enc = vector_length_encoding(this, $src); 7219 __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7220 %} 7221 ins_pipe( pipe_slow ); 7222 %} 7223 7224 instruct castDtoL_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{ 7225 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7226 match(Set dst (VectorCastD2X src)); 7227 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr); 7228 format %{ "vector_cast_d2l $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %} 7229 ins_encode %{ 7230 int vlen_enc = vector_length_encoding(this); 7231 __ vector_castD2L_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7232 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7233 ExternalAddress(vector_double_signflip()), $scratch$$Register, vlen_enc); 7234 %} 7235 ins_pipe( pipe_slow ); 7236 %} 7237 7238 instruct vucast(vec dst, vec src) %{ 7239 match(Set dst (VectorUCastB2X src)); 7240 match(Set dst (VectorUCastS2X src)); 7241 match(Set dst (VectorUCastI2X src)); 7242 format %{ "vector_ucast $dst,$src\t!" %} 7243 ins_encode %{ 7244 assert(UseAVX > 0, "required"); 7245 7246 BasicType from_elem_bt = Matcher::vector_element_basic_type(this, $src); 7247 BasicType to_elem_bt = Matcher::vector_element_basic_type(this); 7248 int vlen_enc = vector_length_encoding(this); 7249 __ vector_unsigned_cast($dst$$XMMRegister, $src$$XMMRegister, vlen_enc, from_elem_bt, to_elem_bt); 7250 %} 7251 ins_pipe( pipe_slow ); 7252 %} 7253 7254 #ifdef _LP64 7255 instruct vround_float_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, rRegP scratch, rFlagsReg cr) %{ 7256 predicate(!VM_Version::supports_avx512vl() && 7257 Matcher::vector_length_in_bytes(n) < 64 && 7258 Matcher::vector_element_basic_type(n) == T_INT); 7259 match(Set dst (RoundVF src)); 7260 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP scratch, KILL cr); 7261 format %{ "vector_round_float $dst,$src\t! using $xtmp1, $xtmp2, $xtmp3, $xtmp4 and $scratch as TEMP" %} 7262 ins_encode %{ 7263 int vlen_enc = vector_length_encoding(this); 7264 InternalAddress new_mxcsr = $constantaddress((jint)0x3F80); 7265 __ vector_round_float_avx($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7266 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister, 7267 ExternalAddress(vector_float_signflip()), new_mxcsr, $scratch$$Register, vlen_enc); 7268 %} 7269 ins_pipe( pipe_slow ); 7270 %} 7271 7272 instruct vround_float_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{ 7273 predicate((VM_Version::supports_avx512vl() || 7274 Matcher::vector_length_in_bytes(n) == 64) && 7275 Matcher::vector_element_basic_type(n) == T_INT); 7276 match(Set dst (RoundVF src)); 7277 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr); 7278 format %{ "vector_round_float $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %} 7279 ins_encode %{ 7280 int vlen_enc = vector_length_encoding(this); 7281 InternalAddress new_mxcsr = $constantaddress((jint)0x3F80); 7282 __ vector_round_float_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7283 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7284 ExternalAddress(vector_float_signflip()), new_mxcsr, $scratch$$Register, vlen_enc); 7285 %} 7286 ins_pipe( pipe_slow ); 7287 %} 7288 7289 instruct vround_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rRegP scratch, rFlagsReg cr) %{ 7290 predicate(Matcher::vector_element_basic_type(n) == T_LONG); 7291 match(Set dst (RoundVD src)); 7292 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, TEMP scratch, KILL cr); 7293 format %{ "vector_round_long $dst,$src\t! using $xtmp1, $xtmp2, $ktmp1, $ktmp2 and $scratch as TEMP" %} 7294 ins_encode %{ 7295 int vlen_enc = vector_length_encoding(this); 7296 InternalAddress new_mxcsr = $constantaddress((jint)0x3F80); 7297 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, 7298 $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister, 7299 ExternalAddress(vector_double_signflip()), new_mxcsr, $scratch$$Register, vlen_enc); 7300 %} 7301 ins_pipe( pipe_slow ); 7302 %} 7303 #endif 7304 // --------------------------------- VectorMaskCmp -------------------------------------- 7305 7306 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7307 predicate(n->bottom_type()->isa_vectmask() == NULL && 7308 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1 7309 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7310 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7311 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7312 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7313 ins_encode %{ 7314 int vlen_enc = vector_length_encoding(this, $src1); 7315 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7316 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7317 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7318 } else { 7319 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7320 } 7321 %} 7322 ins_pipe( pipe_slow ); 7323 %} 7324 7325 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{ 7326 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1 7327 n->bottom_type()->isa_vectmask() == NULL && 7328 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7329 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7330 effect(TEMP scratch, TEMP ktmp); 7331 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} 7332 ins_encode %{ 7333 int vlen_enc = Assembler::AVX_512bit; 7334 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7335 KRegister mask = k0; // The comparison itself is not being masked. 7336 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7337 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7338 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register); 7339 } else { 7340 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7341 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register); 7342 } 7343 %} 7344 ins_pipe( pipe_slow ); 7345 %} 7346 7347 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{ 7348 predicate(n->bottom_type()->isa_vectmask() && 7349 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE 7350 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7351 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %} 7352 ins_encode %{ 7353 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7354 int vlen_enc = vector_length_encoding(this, $src1); 7355 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 7356 KRegister mask = k0; // The comparison itself is not being masked. 7357 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) { 7358 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7359 } else { 7360 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 7361 } 7362 %} 7363 ins_pipe( pipe_slow ); 7364 %} 7365 7366 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{ 7367 predicate(n->bottom_type()->isa_vectmask() == NULL && 7368 !is_unsigned_booltest_pred(n->in(2)->get_int()) && 7369 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7370 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7371 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7372 (n->in(2)->get_int() == BoolTest::eq || 7373 n->in(2)->get_int() == BoolTest::lt || 7374 n->in(2)->get_int() == BoolTest::gt)); // cond 7375 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7376 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %} 7377 ins_encode %{ 7378 int vlen_enc = vector_length_encoding(this, $src1); 7379 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7380 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7381 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc); 7382 %} 7383 ins_pipe( pipe_slow ); 7384 %} 7385 7386 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7387 predicate(n->bottom_type()->isa_vectmask() == NULL && 7388 !is_unsigned_booltest_pred(n->in(2)->get_int()) && 7389 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7390 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7391 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) && 7392 (n->in(2)->get_int() == BoolTest::ne || 7393 n->in(2)->get_int() == BoolTest::le || 7394 n->in(2)->get_int() == BoolTest::ge)); // cond 7395 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7396 effect(TEMP dst, TEMP xtmp); 7397 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7398 ins_encode %{ 7399 int vlen_enc = vector_length_encoding(this, $src1); 7400 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7401 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7402 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7403 %} 7404 ins_pipe( pipe_slow ); 7405 %} 7406 7407 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{ 7408 predicate(n->bottom_type()->isa_vectmask() == NULL && 7409 is_unsigned_booltest_pred(n->in(2)->get_int()) && 7410 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1 7411 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1 7412 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7413 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7414 effect(TEMP dst, TEMP xtmp); 7415 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %} 7416 ins_encode %{ 7417 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1))); 7418 int vlen_enc = vector_length_encoding(this, $src1); 7419 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7420 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1)); 7421 7422 if (vlen_enc == Assembler::AVX_128bit) { 7423 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7424 } else { 7425 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg); 7426 } 7427 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc); 7428 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc); 7429 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc); 7430 %} 7431 ins_pipe( pipe_slow ); 7432 %} 7433 7434 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{ 7435 predicate((n->bottom_type()->isa_vectmask() == NULL && 7436 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1 7437 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7438 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7439 effect(TEMP scratch, TEMP ktmp); 7440 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %} 7441 ins_encode %{ 7442 assert(UseAVX > 2, "required"); 7443 7444 int vlen_enc = vector_length_encoding(this, $src1); 7445 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7446 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 7447 KRegister mask = k0; // The comparison itself is not being masked. 7448 bool merge = false; 7449 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7450 7451 switch (src1_elem_bt) { 7452 case T_INT: { 7453 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7454 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 7455 break; 7456 } 7457 case T_LONG: { 7458 __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7459 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register); 7460 break; 7461 } 7462 default: assert(false, "%s", type2name(src1_elem_bt)); 7463 } 7464 %} 7465 ins_pipe( pipe_slow ); 7466 %} 7467 7468 7469 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{ 7470 predicate(n->bottom_type()->isa_vectmask() && 7471 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 7472 match(Set dst (VectorMaskCmp (Binary src1 src2) cond)); 7473 format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %} 7474 ins_encode %{ 7475 assert(UseAVX > 2, "required"); 7476 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 7477 7478 int vlen_enc = vector_length_encoding(this, $src1); 7479 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 7480 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 7481 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 7482 7483 // Comparison i 7484 switch (src1_elem_bt) { 7485 case T_BYTE: { 7486 __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7487 break; 7488 } 7489 case T_SHORT: { 7490 __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7491 break; 7492 } 7493 case T_INT: { 7494 __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7495 break; 7496 } 7497 case T_LONG: { 7498 __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 7499 break; 7500 } 7501 default: assert(false, "%s", type2name(src1_elem_bt)); 7502 } 7503 %} 7504 ins_pipe( pipe_slow ); 7505 %} 7506 7507 // Extract 7508 7509 instruct extractI(rRegI dst, legVec src, immU8 idx) %{ 7510 predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src 7511 match(Set dst (ExtractI src idx)); 7512 match(Set dst (ExtractS src idx)); 7513 #ifdef _LP64 7514 match(Set dst (ExtractB src idx)); 7515 #endif 7516 format %{ "extractI $dst,$src,$idx\t!" %} 7517 ins_encode %{ 7518 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7519 7520 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 7521 __ get_elem(elem_bt, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7522 %} 7523 ins_pipe( pipe_slow ); 7524 %} 7525 7526 instruct vextractI(rRegI dst, legVec src, immI idx, legVec vtmp) %{ 7527 predicate(Matcher::vector_length_in_bytes(n->in(1)) == 32 || // src 7528 Matcher::vector_length_in_bytes(n->in(1)) == 64); // src 7529 match(Set dst (ExtractI src idx)); 7530 match(Set dst (ExtractS src idx)); 7531 #ifdef _LP64 7532 match(Set dst (ExtractB src idx)); 7533 #endif 7534 effect(TEMP vtmp); 7535 format %{ "vextractI $dst,$src,$idx\t! using $vtmp as TEMP" %} 7536 ins_encode %{ 7537 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7538 7539 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src); 7540 XMMRegister lane_xmm = __ get_lane(elem_bt, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7541 __ get_elem(elem_bt, $dst$$Register, lane_xmm, $idx$$constant); 7542 %} 7543 ins_pipe( pipe_slow ); 7544 %} 7545 7546 #ifdef _LP64 7547 instruct extractL(rRegL dst, legVec src, immU8 idx) %{ 7548 predicate(Matcher::vector_length(n->in(1)) <= 2); // src 7549 match(Set dst (ExtractL src idx)); 7550 format %{ "extractL $dst,$src,$idx\t!" %} 7551 ins_encode %{ 7552 assert(UseSSE >= 4, "required"); 7553 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7554 7555 __ get_elem(T_LONG, $dst$$Register, $src$$XMMRegister, $idx$$constant); 7556 %} 7557 ins_pipe( pipe_slow ); 7558 %} 7559 7560 instruct vextractL(rRegL dst, legVec src, immU8 idx, legVec vtmp) %{ 7561 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 7562 Matcher::vector_length(n->in(1)) == 8); // src 7563 match(Set dst (ExtractL src idx)); 7564 effect(TEMP vtmp); 7565 format %{ "vextractL $dst,$src,$idx\t! using $vtmp as TEMP" %} 7566 ins_encode %{ 7567 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7568 7569 XMMRegister lane_reg = __ get_lane(T_LONG, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7570 __ get_elem(T_LONG, $dst$$Register, lane_reg, $idx$$constant); 7571 %} 7572 ins_pipe( pipe_slow ); 7573 %} 7574 #endif 7575 7576 instruct extractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ 7577 predicate(Matcher::vector_length(n->in(1)) <= 4); 7578 match(Set dst (ExtractF src idx)); 7579 effect(TEMP dst, TEMP tmp, TEMP vtmp); 7580 format %{ "extractF $dst,$src,$idx\t! using $tmp, $vtmp as TEMP" %} 7581 ins_encode %{ 7582 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7583 7584 __ get_elem(T_FLOAT, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant, $tmp$$Register, $vtmp$$XMMRegister); 7585 %} 7586 ins_pipe( pipe_slow ); 7587 %} 7588 7589 instruct vextractF(legRegF dst, legVec src, immU8 idx, rRegI tmp, legVec vtmp) %{ 7590 predicate(Matcher::vector_length(n->in(1)/*src*/) == 8 || 7591 Matcher::vector_length(n->in(1)/*src*/) == 16); 7592 match(Set dst (ExtractF src idx)); 7593 effect(TEMP tmp, TEMP vtmp); 7594 format %{ "vextractF $dst,$src,$idx\t! using $tmp, $vtmp as TEMP" %} 7595 ins_encode %{ 7596 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7597 7598 XMMRegister lane_reg = __ get_lane(T_FLOAT, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7599 __ get_elem(T_FLOAT, $dst$$XMMRegister, lane_reg, $idx$$constant, $tmp$$Register); 7600 %} 7601 ins_pipe( pipe_slow ); 7602 %} 7603 7604 instruct extractD(legRegD dst, legVec src, immU8 idx) %{ 7605 predicate(Matcher::vector_length(n->in(1)) == 2); // src 7606 match(Set dst (ExtractD src idx)); 7607 format %{ "extractD $dst,$src,$idx\t!" %} 7608 ins_encode %{ 7609 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7610 7611 __ get_elem(T_DOUBLE, $dst$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7612 %} 7613 ins_pipe( pipe_slow ); 7614 %} 7615 7616 instruct vextractD(legRegD dst, legVec src, immU8 idx, legVec vtmp) %{ 7617 predicate(Matcher::vector_length(n->in(1)) == 4 || // src 7618 Matcher::vector_length(n->in(1)) == 8); // src 7619 match(Set dst (ExtractD src idx)); 7620 effect(TEMP vtmp); 7621 format %{ "vextractD $dst,$src,$idx\t! using $vtmp as TEMP" %} 7622 ins_encode %{ 7623 assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds"); 7624 7625 XMMRegister lane_reg = __ get_lane(T_DOUBLE, $vtmp$$XMMRegister, $src$$XMMRegister, $idx$$constant); 7626 __ get_elem(T_DOUBLE, $dst$$XMMRegister, lane_reg, $idx$$constant); 7627 %} 7628 ins_pipe( pipe_slow ); 7629 %} 7630 7631 // --------------------------------- Vector Blend -------------------------------------- 7632 7633 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{ 7634 predicate(UseAVX == 0); 7635 match(Set dst (VectorBlend (Binary dst src) mask)); 7636 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %} 7637 effect(TEMP tmp); 7638 ins_encode %{ 7639 assert(UseSSE >= 4, "required"); 7640 7641 if ($mask$$XMMRegister != $tmp$$XMMRegister) { 7642 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister); 7643 } 7644 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask 7645 %} 7646 ins_pipe( pipe_slow ); 7647 %} 7648 7649 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7650 predicate(UseAVX > 0 && 7651 n->in(2)->bottom_type()->isa_vectmask() == NULL && 7652 Matcher::vector_length_in_bytes(n) <= 32 && 7653 is_integral_type(Matcher::vector_element_basic_type(n))); 7654 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7655 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7656 ins_encode %{ 7657 int vlen_enc = vector_length_encoding(this); 7658 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7659 %} 7660 ins_pipe( pipe_slow ); 7661 %} 7662 7663 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{ 7664 predicate(UseAVX > 0 && 7665 n->in(2)->bottom_type()->isa_vectmask() == NULL && 7666 Matcher::vector_length_in_bytes(n) <= 32 && 7667 !is_integral_type(Matcher::vector_element_basic_type(n))); 7668 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7669 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %} 7670 ins_encode %{ 7671 int vlen_enc = vector_length_encoding(this); 7672 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc); 7673 %} 7674 ins_pipe( pipe_slow ); 7675 %} 7676 7677 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch, kReg ktmp) %{ 7678 predicate(Matcher::vector_length_in_bytes(n) == 64 && 7679 n->in(2)->bottom_type()->isa_vectmask() == NULL); 7680 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7681 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %} 7682 effect(TEMP scratch, TEMP ktmp); 7683 ins_encode %{ 7684 int vlen_enc = Assembler::AVX_512bit; 7685 BasicType elem_bt = Matcher::vector_element_basic_type(this); 7686 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, $scratch$$Register); 7687 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7688 %} 7689 ins_pipe( pipe_slow ); 7690 %} 7691 7692 7693 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask, rRegP scratch) %{ 7694 predicate(n->in(2)->bottom_type()->isa_vectmask() && 7695 (!is_subword_type(Matcher::vector_element_basic_type(n)) || 7696 VM_Version::supports_avx512bw())); 7697 match(Set dst (VectorBlend (Binary src1 src2) mask)); 7698 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %} 7699 effect(TEMP scratch); 7700 ins_encode %{ 7701 int vlen_enc = vector_length_encoding(this); 7702 BasicType elem_bt = Matcher::vector_element_basic_type(this); 7703 __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 7704 %} 7705 ins_pipe( pipe_slow ); 7706 %} 7707 7708 // --------------------------------- ABS -------------------------------------- 7709 // a = |a| 7710 instruct vabsB_reg(vec dst, vec src) %{ 7711 match(Set dst (AbsVB src)); 7712 ins_cost(450); 7713 format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %} 7714 ins_encode %{ 7715 uint vlen = Matcher::vector_length(this); 7716 if (vlen <= 16) { 7717 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 7718 } else { 7719 int vlen_enc = vector_length_encoding(this); 7720 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7721 } 7722 %} 7723 ins_pipe( pipe_slow ); 7724 %} 7725 7726 instruct vabsS_reg(vec dst, vec src) %{ 7727 match(Set dst (AbsVS src)); 7728 ins_cost(450); 7729 format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %} 7730 ins_encode %{ 7731 uint vlen = Matcher::vector_length(this); 7732 if (vlen <= 8) { 7733 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 7734 } else { 7735 int vlen_enc = vector_length_encoding(this); 7736 __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7737 } 7738 %} 7739 ins_pipe( pipe_slow ); 7740 %} 7741 7742 instruct vabsI_reg(vec dst, vec src) %{ 7743 match(Set dst (AbsVI src)); 7744 format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %} 7745 ins_cost(250); 7746 ins_encode %{ 7747 uint vlen = Matcher::vector_length(this); 7748 if (vlen <= 4) { 7749 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 7750 } else { 7751 int vlen_enc = vector_length_encoding(this); 7752 __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7753 } 7754 %} 7755 ins_pipe( pipe_slow ); 7756 %} 7757 7758 instruct vabsL_reg(vec dst, vec src) %{ 7759 match(Set dst (AbsVL src)); 7760 ins_cost(450); 7761 format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %} 7762 ins_encode %{ 7763 assert(UseAVX > 2, "required"); 7764 int vlen_enc = vector_length_encoding(this); 7765 if (!VM_Version::supports_avx512vl()) { 7766 vlen_enc = Assembler::AVX_512bit; 7767 } 7768 __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 7769 %} 7770 ins_pipe( pipe_slow ); 7771 %} 7772 7773 // --------------------------------- ABSNEG -------------------------------------- 7774 7775 instruct vabsnegF(vec dst, vec src, rRegI scratch) %{ 7776 predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F 7777 match(Set dst (AbsVF src)); 7778 match(Set dst (NegVF src)); 7779 effect(TEMP scratch); 7780 format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %} 7781 ins_cost(150); 7782 ins_encode %{ 7783 int opcode = this->ideal_Opcode(); 7784 int vlen = Matcher::vector_length(this); 7785 if (vlen == 2) { 7786 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 7787 } else { 7788 assert(vlen == 8 || vlen == 16, "required"); 7789 int vlen_enc = vector_length_encoding(this); 7790 __ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 7791 } 7792 %} 7793 ins_pipe( pipe_slow ); 7794 %} 7795 7796 instruct vabsneg4F(vec dst, rRegI scratch) %{ 7797 predicate(Matcher::vector_length(n) == 4); 7798 match(Set dst (AbsVF dst)); 7799 match(Set dst (NegVF dst)); 7800 effect(TEMP scratch); 7801 format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %} 7802 ins_cost(150); 7803 ins_encode %{ 7804 int opcode = this->ideal_Opcode(); 7805 __ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $scratch$$Register); 7806 %} 7807 ins_pipe( pipe_slow ); 7808 %} 7809 7810 instruct vabsnegD(vec dst, vec src, rRegI scratch) %{ 7811 match(Set dst (AbsVD src)); 7812 match(Set dst (NegVD src)); 7813 effect(TEMP scratch); 7814 format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %} 7815 ins_encode %{ 7816 int opcode = this->ideal_Opcode(); 7817 uint vlen = Matcher::vector_length(this); 7818 if (vlen == 2) { 7819 assert(UseSSE >= 2, "required"); 7820 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register); 7821 } else { 7822 int vlen_enc = vector_length_encoding(this); 7823 __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register); 7824 } 7825 %} 7826 ins_pipe( pipe_slow ); 7827 %} 7828 7829 //------------------------------------- VectorTest -------------------------------------------- 7830 7831 #ifdef _LP64 7832 instruct vptest_alltrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp1, legVec vtmp2, rFlagsReg cr) %{ 7833 predicate(!VM_Version::supports_avx512bwdq() && 7834 Matcher::vector_length_in_bytes(n->in(1)) >= 4 && 7835 Matcher::vector_length_in_bytes(n->in(1)) < 16 && 7836 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 7837 match(Set dst (VectorTest src1 src2 )); 7838 effect(TEMP vtmp1, TEMP vtmp2, KILL cr); 7839 format %{ "vptest_alltrue_lt16 $dst,$src1, $src2\t! using $vtmp1, $vtmp2 and $cr as TEMP" %} 7840 ins_encode %{ 7841 int vlen = Matcher::vector_length_in_bytes(this, $src1); 7842 __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister); 7843 __ setb(Assembler::carrySet, $dst$$Register); 7844 __ movzbl($dst$$Register, $dst$$Register); 7845 %} 7846 ins_pipe( pipe_slow ); 7847 %} 7848 7849 instruct vptest_alltrue_ge16(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ 7850 predicate(!VM_Version::supports_avx512bwdq() && 7851 Matcher::vector_length_in_bytes(n->in(1)) >= 16 && 7852 Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7853 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow); 7854 match(Set dst (VectorTest src1 src2 )); 7855 effect(KILL cr); 7856 format %{ "vptest_alltrue_ge16 $dst,$src1, $src2\t! using $cr as TEMP" %} 7857 ins_encode %{ 7858 int vlen = Matcher::vector_length_in_bytes(this, $src1); 7859 __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); 7860 __ setb(Assembler::carrySet, $dst$$Register); 7861 __ movzbl($dst$$Register, $dst$$Register); 7862 %} 7863 ins_pipe( pipe_slow ); 7864 %} 7865 7866 instruct vptest_alltrue_lt8_evex(rRegI dst, kReg src1, kReg src2, kReg kscratch, rFlagsReg cr) %{ 7867 predicate(VM_Version::supports_avx512bwdq() && 7868 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow && 7869 n->in(1)->bottom_type()->isa_vectmask() && 7870 Matcher::vector_length(n->in(1)) < 8); 7871 match(Set dst (VectorTest src1 src2)); 7872 effect(KILL cr, TEMP kscratch); 7873 format %{ "vptest_alltrue_lt8_evex $dst,$src1,$src2\t! using $cr as TEMP" %} 7874 ins_encode %{ 7875 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 7876 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 7877 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 7878 uint masklen = Matcher::vector_length(this, $src1); 7879 __ alltrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister, $kscratch$$KRegister); 7880 %} 7881 ins_pipe( pipe_slow ); 7882 %} 7883 7884 7885 instruct vptest_alltrue_ge8_evex(rRegI dst, kReg src1, kReg src2, rFlagsReg cr) %{ 7886 predicate(VM_Version::supports_avx512bwdq() && 7887 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow && 7888 n->in(1)->bottom_type()->isa_vectmask() && 7889 Matcher::vector_length(n->in(1)) >= 8); 7890 match(Set dst (VectorTest src1 src2)); 7891 effect(KILL cr); 7892 format %{ "vptest_alltrue_ge8_evex $dst,$src1,$src2\t! using $cr as TEMP" %} 7893 ins_encode %{ 7894 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 7895 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 7896 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 7897 uint masklen = Matcher::vector_length(this, $src1); 7898 __ alltrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister, knoreg); 7899 %} 7900 ins_pipe( pipe_slow ); 7901 %} 7902 7903 7904 instruct vptest_anytrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp, rFlagsReg cr) %{ 7905 predicate(!VM_Version::supports_avx512bwdq() && 7906 Matcher::vector_length_in_bytes(n->in(1)) >= 4 && 7907 Matcher::vector_length_in_bytes(n->in(1)) < 16 && 7908 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 7909 match(Set dst (VectorTest src1 src2 )); 7910 effect(TEMP vtmp, KILL cr); 7911 format %{ "vptest_anytrue_lt16 $dst,$src1,$src2\t! using $vtmp, $cr as TEMP" %} 7912 ins_encode %{ 7913 int vlen = Matcher::vector_length_in_bytes(this, $src1); 7914 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 7915 __ setb(Assembler::notZero, $dst$$Register); 7916 __ movzbl($dst$$Register, $dst$$Register); 7917 %} 7918 ins_pipe( pipe_slow ); 7919 %} 7920 7921 instruct vptest_anytrue_ge16(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{ 7922 predicate(!VM_Version::supports_avx512bwdq() && 7923 Matcher::vector_length_in_bytes(n->in(1)) >= 16 && 7924 Matcher::vector_length_in_bytes(n->in(1)) < 64 && 7925 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 7926 match(Set dst (VectorTest src1 src2 )); 7927 effect(KILL cr); 7928 format %{ "vptest_anytrue_ge16 $dst,$src1,$src2\t! using $cr as TEMP" %} 7929 ins_encode %{ 7930 int vlen = Matcher::vector_length_in_bytes(this, $src1); 7931 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); 7932 __ setb(Assembler::notZero, $dst$$Register); 7933 __ movzbl($dst$$Register, $dst$$Register); 7934 %} 7935 ins_pipe( pipe_slow ); 7936 %} 7937 7938 instruct vptest_anytrue_evex(rRegI dst, kReg src1, kReg src2, rFlagsReg cr) %{ 7939 predicate(VM_Version::supports_avx512bwdq() && 7940 static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne); 7941 match(Set dst (VectorTest src1 src2)); 7942 effect(KILL cr); 7943 format %{ "vptest_anytrue_lt8_evex $dst,$src1,$src2\t! using $cr as TEMP" %} 7944 ins_encode %{ 7945 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 7946 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 7947 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 7948 uint masklen = Matcher::vector_length(this, $src1); 7949 __ anytrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister); 7950 %} 7951 ins_pipe( pipe_slow ); 7952 %} 7953 7954 instruct cmpvptest_anytrue_lt16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero, legVec vtmp) %{ 7955 predicate(!VM_Version::supports_avx512bwdq() && 7956 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && 7957 Matcher::vector_length_in_bytes(n->in(1)->in(1)) < 16 && 7958 static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne); 7959 match(Set cr (CmpI (VectorTest src1 src2) zero)); 7960 effect(TEMP vtmp); 7961 format %{ "cmpvptest_anytrue_lt16 $src1,$src2\t! using $vtmp as TEMP" %} 7962 ins_encode %{ 7963 int vlen = Matcher::vector_length_in_bytes(this, $src1); 7964 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister); 7965 %} 7966 ins_pipe( pipe_slow ); 7967 %} 7968 7969 instruct cmpvptest_anytrue_ge16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero) %{ 7970 predicate(!VM_Version::supports_avx512bwdq() && 7971 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 16 && 7972 Matcher::vector_length_in_bytes(n->in(1)->in(1)) < 64 && 7973 static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne); 7974 match(Set cr (CmpI (VectorTest src1 src2) zero)); 7975 format %{ "cmpvptest_anytrue_ge16 $src1,$src2\t!" %} 7976 ins_encode %{ 7977 int vlen = Matcher::vector_length_in_bytes(this, $src1); 7978 __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg); 7979 %} 7980 ins_pipe( pipe_slow ); 7981 %} 7982 7983 instruct cmpvptest_anytrue_evex(rFlagsReg cr, kReg src1, kReg src2, immI_0 zero) %{ 7984 predicate(VM_Version::supports_avx512bwdq() && 7985 static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne); 7986 match(Set cr (CmpI (VectorTest src1 src2) zero)); 7987 format %{ "cmpvptest_anytrue_evex $src1,$src2\t!" %} 7988 ins_encode %{ 7989 uint masklen = Matcher::vector_length(this, $src1); 7990 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 7991 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 7992 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 7993 masklen = masklen < 8 ? 8 : masklen; 7994 __ ktest(masklen, $src1$$KRegister, $src2$$KRegister); 7995 %} 7996 ins_pipe( pipe_slow ); 7997 %} 7998 #endif 7999 8000 //------------------------------------- LoadMask -------------------------------------------- 8001 8002 instruct loadMask(legVec dst, legVec src) %{ 8003 predicate(n->bottom_type()->isa_vectmask() == NULL && !VM_Version::supports_avx512vlbw()); 8004 match(Set dst (VectorLoadMask src)); 8005 effect(TEMP dst); 8006 format %{ "vector_loadmask_byte $dst, $src\n\t" %} 8007 ins_encode %{ 8008 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8009 BasicType elem_bt = Matcher::vector_element_basic_type(this); 8010 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true); 8011 %} 8012 ins_pipe( pipe_slow ); 8013 %} 8014 8015 instruct loadMask64(kReg dst, vec src, vec xtmp, rRegI tmp) %{ 8016 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8017 match(Set dst (VectorLoadMask src)); 8018 effect(TEMP xtmp, TEMP tmp); 8019 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp and $tmp as TEMP" %} 8020 ins_encode %{ 8021 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8022 $tmp$$Register, true, Assembler::AVX_512bit); 8023 %} 8024 ins_pipe( pipe_slow ); 8025 %} 8026 8027 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{ 8028 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8029 match(Set dst (VectorLoadMask src)); 8030 effect(TEMP xtmp); 8031 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %} 8032 ins_encode %{ 8033 int vlen_enc = vector_length_encoding(in(1)); 8034 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister, 8035 noreg, false, vlen_enc); 8036 %} 8037 ins_pipe( pipe_slow ); 8038 %} 8039 8040 //------------------------------------- StoreMask -------------------------------------------- 8041 8042 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{ 8043 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8044 match(Set dst (VectorStoreMask src size)); 8045 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8046 ins_encode %{ 8047 int vlen = Matcher::vector_length(this); 8048 if (vlen <= 16 && UseAVX <= 2) { 8049 assert(UseSSE >= 3, "required"); 8050 __ pabsb($dst$$XMMRegister, $src$$XMMRegister); 8051 } else { 8052 assert(UseAVX > 0, "required"); 8053 int src_vlen_enc = vector_length_encoding(this, $src); 8054 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8055 } 8056 %} 8057 ins_pipe( pipe_slow ); 8058 %} 8059 8060 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{ 8061 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8062 match(Set dst (VectorStoreMask src size)); 8063 effect(TEMP_DEF dst, TEMP xtmp); 8064 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8065 ins_encode %{ 8066 int vlen_enc = Assembler::AVX_128bit; 8067 int vlen = Matcher::vector_length(this); 8068 if (vlen <= 8) { 8069 assert(UseSSE >= 3, "required"); 8070 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8071 __ pabsw($dst$$XMMRegister, $src$$XMMRegister); 8072 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8073 } else { 8074 assert(UseAVX > 0, "required"); 8075 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8076 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8077 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8078 } 8079 %} 8080 ins_pipe( pipe_slow ); 8081 %} 8082 8083 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{ 8084 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8085 match(Set dst (VectorStoreMask src size)); 8086 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8087 effect(TEMP_DEF dst, TEMP xtmp); 8088 ins_encode %{ 8089 int vlen_enc = Assembler::AVX_128bit; 8090 int vlen = Matcher::vector_length(this); 8091 if (vlen <= 4) { 8092 assert(UseSSE >= 3, "required"); 8093 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8094 __ pabsd($dst$$XMMRegister, $src$$XMMRegister); 8095 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8096 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8097 } else { 8098 assert(UseAVX > 0, "required"); 8099 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8100 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1); 8101 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8102 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc); 8103 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8104 } 8105 %} 8106 ins_pipe( pipe_slow ); 8107 %} 8108 8109 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{ 8110 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2); 8111 match(Set dst (VectorStoreMask src size)); 8112 effect(TEMP_DEF dst, TEMP xtmp); 8113 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8114 ins_encode %{ 8115 assert(UseSSE >= 3, "required"); 8116 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister); 8117 __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8); 8118 __ pabsd($dst$$XMMRegister, $dst$$XMMRegister); 8119 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister); 8120 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister); 8121 %} 8122 ins_pipe( pipe_slow ); 8123 %} 8124 8125 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{ 8126 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4); 8127 match(Set dst (VectorStoreMask src size)); 8128 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %} 8129 effect(TEMP_DEF dst, TEMP vtmp); 8130 ins_encode %{ 8131 int vlen_enc = Assembler::AVX_128bit; 8132 __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit); 8133 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1); 8134 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc); 8135 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8136 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8137 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8138 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); 8139 %} 8140 ins_pipe( pipe_slow ); 8141 %} 8142 8143 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{ 8144 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8145 match(Set dst (VectorStoreMask src size)); 8146 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8147 ins_encode %{ 8148 int src_vlen_enc = vector_length_encoding(this, $src); 8149 int dst_vlen_enc = vector_length_encoding(this); 8150 if (!VM_Version::supports_avx512vl()) { 8151 src_vlen_enc = Assembler::AVX_512bit; 8152 } 8153 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8154 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8155 %} 8156 ins_pipe( pipe_slow ); 8157 %} 8158 8159 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{ 8160 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == NULL); 8161 match(Set dst (VectorStoreMask src size)); 8162 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %} 8163 ins_encode %{ 8164 int src_vlen_enc = vector_length_encoding(this, $src); 8165 int dst_vlen_enc = vector_length_encoding(this); 8166 if (!VM_Version::supports_avx512vl()) { 8167 src_vlen_enc = Assembler::AVX_512bit; 8168 } 8169 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc); 8170 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8171 %} 8172 ins_pipe( pipe_slow ); 8173 %} 8174 8175 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size, rRegI tmp) %{ 8176 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw()); 8177 match(Set dst (VectorStoreMask mask size)); 8178 effect(TEMP_DEF dst, TEMP tmp); 8179 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8180 ins_encode %{ 8181 assert(Matcher::vector_length_in_bytes(this, $mask) == 64, ""); 8182 __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()), 8183 false, Assembler::AVX_512bit, $tmp$$Register); 8184 __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit); 8185 %} 8186 ins_pipe( pipe_slow ); 8187 %} 8188 8189 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{ 8190 predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw()); 8191 match(Set dst (VectorStoreMask mask size)); 8192 effect(TEMP_DEF dst); 8193 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %} 8194 ins_encode %{ 8195 int dst_vlen_enc = vector_length_encoding(this); 8196 __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc); 8197 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc); 8198 %} 8199 ins_pipe( pipe_slow ); 8200 %} 8201 8202 instruct vmaskcast_evex(kReg dst) %{ 8203 predicate(Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); 8204 match(Set dst (VectorMaskCast dst)); 8205 ins_cost(0); 8206 format %{ "vector_mask_cast $dst" %} 8207 ins_encode %{ 8208 // empty 8209 %} 8210 ins_pipe(empty); 8211 %} 8212 8213 instruct vmaskcast(vec dst) %{ 8214 predicate((Matcher::vector_length(n) == Matcher::vector_length(n->in(1))) && 8215 (Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1)))); 8216 match(Set dst (VectorMaskCast dst)); 8217 ins_cost(0); 8218 format %{ "vector_mask_cast $dst" %} 8219 ins_encode %{ 8220 // empty 8221 %} 8222 ins_pipe(empty); 8223 %} 8224 8225 //-------------------------------- Load Iota Indices ---------------------------------- 8226 8227 instruct loadIotaIndices(vec dst, immI_0 src, rRegP scratch) %{ 8228 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 8229 match(Set dst (VectorLoadConst src)); 8230 effect(TEMP scratch); 8231 format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %} 8232 ins_encode %{ 8233 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8234 __ load_iota_indices($dst$$XMMRegister, $scratch$$Register, vlen_in_bytes); 8235 %} 8236 ins_pipe( pipe_slow ); 8237 %} 8238 8239 //-------------------------------- Rearrange ---------------------------------- 8240 8241 // LoadShuffle/Rearrange for Byte 8242 8243 instruct loadShuffleB(vec dst) %{ 8244 predicate(Matcher::vector_element_basic_type(n) == T_BYTE); 8245 match(Set dst (VectorLoadShuffle dst)); 8246 format %{ "vector_load_shuffle $dst, $dst" %} 8247 ins_encode %{ 8248 // empty 8249 %} 8250 ins_pipe( pipe_slow ); 8251 %} 8252 8253 instruct rearrangeB(vec dst, vec shuffle) %{ 8254 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8255 Matcher::vector_length(n) < 32); 8256 match(Set dst (VectorRearrange dst shuffle)); 8257 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8258 ins_encode %{ 8259 assert(UseSSE >= 4, "required"); 8260 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8261 %} 8262 ins_pipe( pipe_slow ); 8263 %} 8264 8265 instruct rearrangeB_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2, rRegP scratch) %{ 8266 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8267 Matcher::vector_length(n) == 32 && !VM_Version::supports_avx512_vbmi()); 8268 match(Set dst (VectorRearrange src shuffle)); 8269 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 8270 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2, $scratch as TEMP" %} 8271 ins_encode %{ 8272 assert(UseAVX >= 2, "required"); 8273 // Swap src into vtmp1 8274 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8275 // Shuffle swapped src to get entries from other 128 bit lane 8276 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8277 // Shuffle original src to get entries from self 128 bit lane 8278 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8279 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8280 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, $scratch$$Register); 8281 // Perform the blend 8282 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8283 %} 8284 ins_pipe( pipe_slow ); 8285 %} 8286 8287 instruct rearrangeB_evex(vec dst, vec src, vec shuffle) %{ 8288 predicate(Matcher::vector_element_basic_type(n) == T_BYTE && 8289 Matcher::vector_length(n) >= 32 && VM_Version::supports_avx512_vbmi()); 8290 match(Set dst (VectorRearrange src shuffle)); 8291 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8292 ins_encode %{ 8293 int vlen_enc = vector_length_encoding(this); 8294 __ vpermb($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8295 %} 8296 ins_pipe( pipe_slow ); 8297 %} 8298 8299 // LoadShuffle/Rearrange for Short 8300 8301 instruct loadShuffleS(vec dst, vec src, vec vtmp, rRegP scratch) %{ 8302 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8303 Matcher::vector_length(n) <= 16 && !VM_Version::supports_avx512bw()); // NB! aligned with rearrangeS 8304 match(Set dst (VectorLoadShuffle src)); 8305 effect(TEMP dst, TEMP vtmp, TEMP scratch); 8306 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 8307 ins_encode %{ 8308 // Create a byte shuffle mask from short shuffle mask 8309 // only byte shuffle instruction available on these platforms 8310 int vlen_in_bytes = Matcher::vector_length_in_bytes(this); 8311 if (UseAVX == 0) { 8312 assert(vlen_in_bytes <= 16, "required"); 8313 // Multiply each shuffle by two to get byte index 8314 __ pmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister); 8315 __ psllw($vtmp$$XMMRegister, 1); 8316 8317 // Duplicate to create 2 copies of byte index 8318 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8319 __ psllw($dst$$XMMRegister, 8); 8320 __ por($dst$$XMMRegister, $vtmp$$XMMRegister); 8321 8322 // Add one to get alternate byte index 8323 __ movdqu($vtmp$$XMMRegister, ExternalAddress(vector_short_shufflemask()), $scratch$$Register); 8324 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8325 } else { 8326 assert(UseAVX > 1 || vlen_in_bytes <= 16, "required"); 8327 int vlen_enc = vector_length_encoding(this); 8328 // Multiply each shuffle by two to get byte index 8329 __ vpmovzxbw($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8330 __ vpsllw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8331 8332 // Duplicate to create 2 copies of byte index 8333 __ vpsllw($dst$$XMMRegister, $vtmp$$XMMRegister, 8, vlen_enc); 8334 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8335 8336 // Add one to get alternate byte index 8337 __ vpaddb($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_shufflemask()), vlen_enc, $scratch$$Register); 8338 } 8339 %} 8340 ins_pipe( pipe_slow ); 8341 %} 8342 8343 instruct rearrangeS(vec dst, vec shuffle) %{ 8344 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8345 Matcher::vector_length(n) <= 8 && !VM_Version::supports_avx512bw()); 8346 match(Set dst (VectorRearrange dst shuffle)); 8347 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8348 ins_encode %{ 8349 assert(UseSSE >= 4, "required"); 8350 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8351 %} 8352 ins_pipe( pipe_slow ); 8353 %} 8354 8355 instruct rearrangeS_avx(legVec dst, legVec src, vec shuffle, legVec vtmp1, legVec vtmp2, rRegP scratch) %{ 8356 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8357 Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw()); 8358 match(Set dst (VectorRearrange src shuffle)); 8359 effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP scratch); 8360 format %{ "vector_rearrange $dst, $shuffle, $src\t! using $vtmp1, $vtmp2, $scratch as TEMP" %} 8361 ins_encode %{ 8362 assert(UseAVX >= 2, "required"); 8363 // Swap src into vtmp1 8364 __ vperm2i128($vtmp1$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 1); 8365 // Shuffle swapped src to get entries from other 128 bit lane 8366 __ vpshufb($vtmp1$$XMMRegister, $vtmp1$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8367 // Shuffle original src to get entries from self 128 bit lane 8368 __ vpshufb($dst$$XMMRegister, $src$$XMMRegister, $shuffle$$XMMRegister, Assembler::AVX_256bit); 8369 // Create a blend mask by setting high bits for entries coming from other lane in shuffle 8370 __ vpaddb($vtmp2$$XMMRegister, $shuffle$$XMMRegister, ExternalAddress(vector_byte_shufflemask()), Assembler::AVX_256bit, $scratch$$Register); 8371 // Perform the blend 8372 __ vpblendvb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister, Assembler::AVX_256bit); 8373 %} 8374 ins_pipe( pipe_slow ); 8375 %} 8376 8377 instruct loadShuffleS_evex(vec dst, vec src) %{ 8378 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8379 VM_Version::supports_avx512bw()); 8380 match(Set dst (VectorLoadShuffle src)); 8381 format %{ "vector_load_shuffle $dst, $src" %} 8382 ins_encode %{ 8383 int vlen_enc = vector_length_encoding(this); 8384 if (!VM_Version::supports_avx512vl()) { 8385 vlen_enc = Assembler::AVX_512bit; 8386 } 8387 __ vpmovzxbw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8388 %} 8389 ins_pipe( pipe_slow ); 8390 %} 8391 8392 instruct rearrangeS_evex(vec dst, vec src, vec shuffle) %{ 8393 predicate(Matcher::vector_element_basic_type(n) == T_SHORT && 8394 VM_Version::supports_avx512bw()); 8395 match(Set dst (VectorRearrange src shuffle)); 8396 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8397 ins_encode %{ 8398 int vlen_enc = vector_length_encoding(this); 8399 if (!VM_Version::supports_avx512vl()) { 8400 vlen_enc = Assembler::AVX_512bit; 8401 } 8402 __ vpermw($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8403 %} 8404 ins_pipe( pipe_slow ); 8405 %} 8406 8407 // LoadShuffle/Rearrange for Integer and Float 8408 8409 instruct loadShuffleI(vec dst, vec src, vec vtmp, rRegP scratch) %{ 8410 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8411 Matcher::vector_length(n) == 4 && UseAVX < 2); 8412 match(Set dst (VectorLoadShuffle src)); 8413 effect(TEMP dst, TEMP vtmp, TEMP scratch); 8414 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 8415 ins_encode %{ 8416 assert(UseSSE >= 4, "required"); 8417 8418 // Create a byte shuffle mask from int shuffle mask 8419 // only byte shuffle instruction available on these platforms 8420 8421 // Duplicate and multiply each shuffle by 4 8422 __ pmovzxbd($vtmp$$XMMRegister, $src$$XMMRegister); 8423 __ pshuflw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8424 __ pshufhw($vtmp$$XMMRegister, $vtmp$$XMMRegister, 0xA0); 8425 __ psllw($vtmp$$XMMRegister, 2); 8426 8427 // Duplicate again to create 4 copies of byte index 8428 __ movdqu($dst$$XMMRegister, $vtmp$$XMMRegister); 8429 __ psllw($dst$$XMMRegister, 8); 8430 __ por($vtmp$$XMMRegister, $dst$$XMMRegister); 8431 8432 // Add 3,2,1,0 to get alternate byte index 8433 __ movdqu($dst$$XMMRegister, ExternalAddress(vector_int_shufflemask()), $scratch$$Register); 8434 __ paddb($dst$$XMMRegister, $vtmp$$XMMRegister); 8435 %} 8436 ins_pipe( pipe_slow ); 8437 %} 8438 8439 instruct rearrangeI(vec dst, vec shuffle) %{ 8440 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8441 Matcher::vector_length(n) == 4 && UseAVX < 2); 8442 match(Set dst (VectorRearrange dst shuffle)); 8443 format %{ "vector_rearrange $dst, $shuffle, $dst" %} 8444 ins_encode %{ 8445 assert(UseSSE >= 4, "required"); 8446 __ pshufb($dst$$XMMRegister, $shuffle$$XMMRegister); 8447 %} 8448 ins_pipe( pipe_slow ); 8449 %} 8450 8451 instruct loadShuffleI_avx(vec dst, vec src) %{ 8452 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8453 UseAVX >= 2); 8454 match(Set dst (VectorLoadShuffle src)); 8455 format %{ "vector_load_shuffle $dst, $src" %} 8456 ins_encode %{ 8457 int vlen_enc = vector_length_encoding(this); 8458 __ vpmovzxbd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8459 %} 8460 ins_pipe( pipe_slow ); 8461 %} 8462 8463 instruct rearrangeI_avx(vec dst, vec src, vec shuffle) %{ 8464 predicate((Matcher::vector_element_basic_type(n) == T_INT || Matcher::vector_element_basic_type(n) == T_FLOAT) && 8465 UseAVX >= 2); 8466 match(Set dst (VectorRearrange src shuffle)); 8467 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8468 ins_encode %{ 8469 int vlen_enc = vector_length_encoding(this); 8470 if (vlen_enc == Assembler::AVX_128bit) { 8471 vlen_enc = Assembler::AVX_256bit; 8472 } 8473 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8474 %} 8475 ins_pipe( pipe_slow ); 8476 %} 8477 8478 // LoadShuffle/Rearrange for Long and Double 8479 8480 instruct loadShuffleL(vec dst, vec src, vec vtmp, rRegP scratch) %{ 8481 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8482 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8483 match(Set dst (VectorLoadShuffle src)); 8484 effect(TEMP dst, TEMP vtmp, TEMP scratch); 8485 format %{ "vector_load_shuffle $dst, $src\t! using $vtmp and $scratch as TEMP" %} 8486 ins_encode %{ 8487 assert(UseAVX >= 2, "required"); 8488 8489 int vlen_enc = vector_length_encoding(this); 8490 // Create a double word shuffle mask from long shuffle mask 8491 // only double word shuffle instruction available on these platforms 8492 8493 // Multiply each shuffle by two to get double word index 8494 __ vpmovzxbq($vtmp$$XMMRegister, $src$$XMMRegister, vlen_enc); 8495 __ vpsllq($vtmp$$XMMRegister, $vtmp$$XMMRegister, 1, vlen_enc); 8496 8497 // Duplicate each double word shuffle 8498 __ vpsllq($dst$$XMMRegister, $vtmp$$XMMRegister, 32, vlen_enc); 8499 __ vpor($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc); 8500 8501 // Add one to get alternate double word index 8502 __ vpaddd($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_long_shufflemask()), vlen_enc, $scratch$$Register); 8503 %} 8504 ins_pipe( pipe_slow ); 8505 %} 8506 8507 instruct rearrangeL(vec dst, vec src, vec shuffle) %{ 8508 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8509 Matcher::vector_length(n) < 8 && !VM_Version::supports_avx512vl()); 8510 match(Set dst (VectorRearrange src shuffle)); 8511 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8512 ins_encode %{ 8513 assert(UseAVX >= 2, "required"); 8514 8515 int vlen_enc = vector_length_encoding(this); 8516 __ vpermd($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8517 %} 8518 ins_pipe( pipe_slow ); 8519 %} 8520 8521 instruct loadShuffleL_evex(vec dst, vec src) %{ 8522 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8523 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8524 match(Set dst (VectorLoadShuffle src)); 8525 format %{ "vector_load_shuffle $dst, $src" %} 8526 ins_encode %{ 8527 assert(UseAVX > 2, "required"); 8528 8529 int vlen_enc = vector_length_encoding(this); 8530 __ vpmovzxbq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc); 8531 %} 8532 ins_pipe( pipe_slow ); 8533 %} 8534 8535 instruct rearrangeL_evex(vec dst, vec src, vec shuffle) %{ 8536 predicate(is_double_word_type(Matcher::vector_element_basic_type(n)) && // T_LONG, T_DOUBLE 8537 (Matcher::vector_length(n) == 8 || VM_Version::supports_avx512vl())); 8538 match(Set dst (VectorRearrange src shuffle)); 8539 format %{ "vector_rearrange $dst, $shuffle, $src" %} 8540 ins_encode %{ 8541 assert(UseAVX > 2, "required"); 8542 8543 int vlen_enc = vector_length_encoding(this); 8544 if (vlen_enc == Assembler::AVX_128bit) { 8545 vlen_enc = Assembler::AVX_256bit; 8546 } 8547 __ vpermq($dst$$XMMRegister, $shuffle$$XMMRegister, $src$$XMMRegister, vlen_enc); 8548 %} 8549 ins_pipe( pipe_slow ); 8550 %} 8551 8552 // --------------------------------- FMA -------------------------------------- 8553 // a * b + c 8554 8555 instruct vfmaF_reg(vec a, vec b, vec c) %{ 8556 match(Set c (FmaVF c (Binary a b))); 8557 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8558 ins_cost(150); 8559 ins_encode %{ 8560 assert(UseFMA, "not enabled"); 8561 int vlen_enc = vector_length_encoding(this); 8562 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8563 %} 8564 ins_pipe( pipe_slow ); 8565 %} 8566 8567 instruct vfmaF_mem(vec a, memory b, vec c) %{ 8568 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8569 match(Set c (FmaVF c (Binary a (LoadVector b)))); 8570 format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %} 8571 ins_cost(150); 8572 ins_encode %{ 8573 assert(UseFMA, "not enabled"); 8574 int vlen_enc = vector_length_encoding(this); 8575 __ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8576 %} 8577 ins_pipe( pipe_slow ); 8578 %} 8579 8580 instruct vfmaD_reg(vec a, vec b, vec c) %{ 8581 match(Set c (FmaVD c (Binary a b))); 8582 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8583 ins_cost(150); 8584 ins_encode %{ 8585 assert(UseFMA, "not enabled"); 8586 int vlen_enc = vector_length_encoding(this); 8587 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vlen_enc); 8588 %} 8589 ins_pipe( pipe_slow ); 8590 %} 8591 8592 instruct vfmaD_mem(vec a, memory b, vec c) %{ 8593 predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8); 8594 match(Set c (FmaVD c (Binary a (LoadVector b)))); 8595 format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %} 8596 ins_cost(150); 8597 ins_encode %{ 8598 assert(UseFMA, "not enabled"); 8599 int vlen_enc = vector_length_encoding(this); 8600 __ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vlen_enc); 8601 %} 8602 ins_pipe( pipe_slow ); 8603 %} 8604 8605 // --------------------------------- Vector Multiply Add -------------------------------------- 8606 8607 instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{ 8608 predicate(UseAVX == 0); 8609 match(Set dst (MulAddVS2VI dst src1)); 8610 format %{ "pmaddwd $dst,$src1\t! muladd packedStoI" %} 8611 ins_encode %{ 8612 __ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister); 8613 %} 8614 ins_pipe( pipe_slow ); 8615 %} 8616 8617 instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{ 8618 predicate(UseAVX > 0); 8619 match(Set dst (MulAddVS2VI src1 src2)); 8620 format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %} 8621 ins_encode %{ 8622 int vlen_enc = vector_length_encoding(this); 8623 __ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8624 %} 8625 ins_pipe( pipe_slow ); 8626 %} 8627 8628 // --------------------------------- Vector Multiply Add Add ---------------------------------- 8629 8630 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{ 8631 predicate(VM_Version::supports_avx512_vnni()); 8632 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst)); 8633 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %} 8634 ins_encode %{ 8635 assert(UseAVX > 2, "required"); 8636 int vlen_enc = vector_length_encoding(this); 8637 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc); 8638 %} 8639 ins_pipe( pipe_slow ); 8640 ins_cost(10); 8641 %} 8642 8643 // --------------------------------- PopCount -------------------------------------- 8644 8645 instruct vpopcountI_popcntd(vec dst, vec src) %{ 8646 predicate(VM_Version::supports_avx512_vpopcntdq()); 8647 match(Set dst (PopCountVI src)); 8648 format %{ "vector_popcount_int $dst, $src\t! vector popcount packedI" %} 8649 ins_encode %{ 8650 assert(UsePopCountInstruction, "not enabled"); 8651 int vlen_enc = vector_length_encoding(this); 8652 __ vector_popcount_int($dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, noreg, vlen_enc); 8653 %} 8654 ins_pipe( pipe_slow ); 8655 %} 8656 8657 instruct vpopcountI(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp, rFlagsReg cc) %{ 8658 predicate(!VM_Version::supports_avx512_vpopcntdq()); 8659 match(Set dst (PopCountVI src)); 8660 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, KILL cc); 8661 format %{ "vector_popcount_int $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 8662 ins_encode %{ 8663 assert(UsePopCountInstruction, "not enabled"); 8664 int vlen_enc = vector_length_encoding(this); 8665 __ vector_popcount_int($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 8666 $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 8667 %} 8668 ins_pipe( pipe_slow ); 8669 %} 8670 8671 instruct vpopcountL_popcntd(vec dst, vec src) %{ 8672 predicate(VM_Version::supports_avx512_vpopcntdq()); 8673 match(Set dst (PopCountVL src)); 8674 format %{ "vector_popcount_long $dst, $src\t! vector popcount packedL" %} 8675 ins_encode %{ 8676 assert(UsePopCountInstruction, "not enabled"); 8677 int vlen_enc = vector_length_encoding(this, $src); 8678 __ vector_popcount_long($dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, noreg, vlen_enc); 8679 %} 8680 ins_pipe( pipe_slow ); 8681 %} 8682 8683 instruct vpopcountL(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp, rFlagsReg cc) %{ 8684 predicate(!VM_Version::supports_avx512_vpopcntdq()); 8685 match(Set dst (PopCountVL src)); 8686 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, KILL cc); 8687 format %{ "vector_popcount_long $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %} 8688 ins_encode %{ 8689 assert(UsePopCountInstruction, "not enabled"); 8690 int vlen_enc = vector_length_encoding(this, $src); 8691 __ vector_popcount_long($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, 8692 $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc); 8693 %} 8694 ins_pipe( pipe_slow ); 8695 %} 8696 8697 // --------------------------------- Bitwise Ternary Logic ---------------------------------- 8698 8699 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{ 8700 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func))); 8701 effect(TEMP dst); 8702 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8703 ins_encode %{ 8704 int vector_len = vector_length_encoding(this); 8705 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len); 8706 %} 8707 ins_pipe( pipe_slow ); 8708 %} 8709 8710 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{ 8711 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8); 8712 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func))); 8713 effect(TEMP dst); 8714 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %} 8715 ins_encode %{ 8716 int vector_len = vector_length_encoding(this); 8717 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len); 8718 %} 8719 ins_pipe( pipe_slow ); 8720 %} 8721 8722 // --------------------------------- Rotation Operations ---------------------------------- 8723 instruct vprotate_immI8(vec dst, vec src, immI8 shift) %{ 8724 match(Set dst (RotateLeftV src shift)); 8725 match(Set dst (RotateRightV src shift)); 8726 format %{ "vprotate_imm8 $dst,$src,$shift\t! vector rotate" %} 8727 ins_encode %{ 8728 int opcode = this->ideal_Opcode(); 8729 int vector_len = vector_length_encoding(this); 8730 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 8731 __ vprotate_imm(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$constant, vector_len); 8732 %} 8733 ins_pipe( pipe_slow ); 8734 %} 8735 8736 instruct vprorate(vec dst, vec src, vec shift) %{ 8737 match(Set dst (RotateLeftV src shift)); 8738 match(Set dst (RotateRightV src shift)); 8739 format %{ "vprotate $dst,$src,$shift\t! vector rotate" %} 8740 ins_encode %{ 8741 int opcode = this->ideal_Opcode(); 8742 int vector_len = vector_length_encoding(this); 8743 BasicType etype = this->bottom_type()->is_vect()->element_basic_type(); 8744 __ vprotate_var(opcode, etype, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len); 8745 %} 8746 ins_pipe( pipe_slow ); 8747 %} 8748 8749 #ifdef _LP64 8750 // ---------------------------------- Masked Operations ------------------------------------ 8751 8752 instruct vmask_cmp_node(rRegI dst, vec src1, vec src2, kReg mask, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{ 8753 match(Set dst (VectorCmpMasked src1 (Binary src2 mask))); 8754 effect(TEMP_DEF dst, TEMP ktmp1, TEMP ktmp2, KILL cr); 8755 format %{ "vector_mask_cmp $src1, $src2, $mask \t! vector mask comparison" %} 8756 ins_encode %{ 8757 assert(vector_length_encoding(this, $src1) == vector_length_encoding(this, $src2), "mismatch"); 8758 assert(Matcher::vector_element_basic_type(this, $src1) == Matcher::vector_element_basic_type(this, $src2), "mismatch"); 8759 8760 Label DONE; 8761 int vlen_enc = vector_length_encoding(this, $src1); 8762 BasicType elem_bt = Matcher::vector_element_basic_type(this, $src1); 8763 8764 __ knotql($ktmp2$$KRegister, $mask$$KRegister); 8765 __ mov64($dst$$Register, -1L); 8766 __ evpcmp(elem_bt, $ktmp1$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, Assembler::eq, vlen_enc); 8767 __ kortestql($ktmp2$$KRegister, $ktmp1$$KRegister); 8768 __ jccb(Assembler::carrySet, DONE); 8769 __ kmovql($dst$$Register, $ktmp1$$KRegister); 8770 __ notq($dst$$Register); 8771 __ tzcntq($dst$$Register, $dst$$Register); 8772 __ bind(DONE); 8773 %} 8774 ins_pipe( pipe_slow ); 8775 %} 8776 8777 8778 instruct vmasked_load64(vec dst, memory mem, kReg mask) %{ 8779 match(Set dst (LoadVectorMasked mem mask)); 8780 format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %} 8781 ins_encode %{ 8782 BasicType elmType = this->bottom_type()->is_vect()->element_basic_type(); 8783 int vector_len = vector_length_encoding(this); 8784 __ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, vector_len); 8785 %} 8786 ins_pipe( pipe_slow ); 8787 %} 8788 8789 instruct vmask_gen(kReg dst, rRegL len, rRegL temp) %{ 8790 match(Set dst (VectorMaskGen len)); 8791 effect(TEMP temp); 8792 format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %} 8793 ins_encode %{ 8794 __ genmask($dst$$KRegister, $len$$Register, $temp$$Register); 8795 %} 8796 ins_pipe( pipe_slow ); 8797 %} 8798 8799 instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{ 8800 match(Set dst (VectorMaskGen len)); 8801 format %{ "vector_mask_gen $len \t! vector mask generator" %} 8802 effect(TEMP temp); 8803 ins_encode %{ 8804 __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant))); 8805 __ kmovql($dst$$KRegister, $temp$$Register); 8806 %} 8807 ins_pipe( pipe_slow ); 8808 %} 8809 8810 instruct vmasked_store64(memory mem, vec src, kReg mask) %{ 8811 match(Set mem (StoreVectorMasked mem (Binary src mask))); 8812 format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %} 8813 ins_encode %{ 8814 const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src))); 8815 BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type(); 8816 int vector_len = vector_length_encoding(src_node); 8817 __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, vector_len); 8818 %} 8819 ins_pipe( pipe_slow ); 8820 %} 8821 8822 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{ 8823 predicate(n->in(1)->bottom_type()->isa_vectmask()); 8824 match(Set dst (VectorMaskToLong mask)); 8825 effect(TEMP dst, KILL cr); 8826 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %} 8827 ins_encode %{ 8828 int opcode = this->ideal_Opcode(); 8829 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8830 int mask_len = Matcher::vector_length(this, $mask); 8831 int mask_size = mask_len * type2aelembytes(mbt); 8832 int vlen_enc = vector_length_encoding(this, $mask); 8833 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 8834 $dst$$Register, mask_len, mask_size, vlen_enc); 8835 %} 8836 ins_pipe( pipe_slow ); 8837 %} 8838 8839 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{ 8840 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL); 8841 match(Set dst (VectorMaskToLong mask)); 8842 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %} 8843 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 8844 ins_encode %{ 8845 int opcode = this->ideal_Opcode(); 8846 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8847 int mask_len = Matcher::vector_length(this, $mask); 8848 int vlen_enc = vector_length_encoding(this, $mask); 8849 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 8850 $dst$$Register, mask_len, mbt, vlen_enc); 8851 %} 8852 ins_pipe( pipe_slow ); 8853 %} 8854 8855 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{ 8856 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL); 8857 match(Set dst (VectorMaskToLong (VectorStoreMask mask size))); 8858 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %} 8859 effect(TEMP_DEF dst, TEMP xtmp, KILL cr); 8860 ins_encode %{ 8861 int opcode = this->ideal_Opcode(); 8862 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8863 int mask_len = Matcher::vector_length(this, $mask); 8864 int vlen_enc = vector_length_encoding(this, $mask); 8865 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 8866 $dst$$Register, mask_len, mbt, vlen_enc); 8867 %} 8868 ins_pipe( pipe_slow ); 8869 %} 8870 8871 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 8872 predicate(n->in(1)->bottom_type()->isa_vectmask()); 8873 match(Set dst (VectorMaskTrueCount mask)); 8874 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 8875 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %} 8876 ins_encode %{ 8877 int opcode = this->ideal_Opcode(); 8878 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8879 int mask_len = Matcher::vector_length(this, $mask); 8880 int mask_size = mask_len * type2aelembytes(mbt); 8881 int vlen_enc = vector_length_encoding(this, $mask); 8882 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 8883 $tmp$$Register, mask_len, mask_size, vlen_enc); 8884 %} 8885 ins_pipe( pipe_slow ); 8886 %} 8887 8888 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 8889 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL); 8890 match(Set dst (VectorMaskTrueCount mask)); 8891 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 8892 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 8893 ins_encode %{ 8894 int opcode = this->ideal_Opcode(); 8895 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8896 int mask_len = Matcher::vector_length(this, $mask); 8897 int vlen_enc = vector_length_encoding(this, $mask); 8898 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 8899 $tmp$$Register, mask_len, mbt, vlen_enc); 8900 %} 8901 ins_pipe( pipe_slow ); 8902 %} 8903 8904 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 8905 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL); 8906 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size))); 8907 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 8908 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 8909 ins_encode %{ 8910 int opcode = this->ideal_Opcode(); 8911 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8912 int mask_len = Matcher::vector_length(this, $mask); 8913 int vlen_enc = vector_length_encoding(this, $mask); 8914 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 8915 $tmp$$Register, mask_len, mbt, vlen_enc); 8916 %} 8917 ins_pipe( pipe_slow ); 8918 %} 8919 8920 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{ 8921 predicate(n->in(1)->bottom_type()->isa_vectmask()); 8922 match(Set dst (VectorMaskFirstTrue mask)); 8923 match(Set dst (VectorMaskLastTrue mask)); 8924 effect(TEMP_DEF dst, TEMP tmp, KILL cr); 8925 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %} 8926 ins_encode %{ 8927 int opcode = this->ideal_Opcode(); 8928 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8929 int mask_len = Matcher::vector_length(this, $mask); 8930 int mask_size = mask_len * type2aelembytes(mbt); 8931 int vlen_enc = vector_length_encoding(this, $mask); 8932 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, 8933 $tmp$$Register, mask_len, mask_size, vlen_enc); 8934 %} 8935 ins_pipe( pipe_slow ); 8936 %} 8937 8938 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 8939 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL); 8940 match(Set dst (VectorMaskFirstTrue mask)); 8941 match(Set dst (VectorMaskLastTrue mask)); 8942 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 8943 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 8944 ins_encode %{ 8945 int opcode = this->ideal_Opcode(); 8946 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8947 int mask_len = Matcher::vector_length(this, $mask); 8948 int vlen_enc = vector_length_encoding(this, $mask); 8949 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 8950 $tmp$$Register, mask_len, mbt, vlen_enc); 8951 %} 8952 ins_pipe( pipe_slow ); 8953 %} 8954 8955 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{ 8956 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL); 8957 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size))); 8958 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size))); 8959 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr); 8960 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %} 8961 ins_encode %{ 8962 int opcode = this->ideal_Opcode(); 8963 BasicType mbt = Matcher::vector_element_basic_type(this, $mask); 8964 int mask_len = Matcher::vector_length(this, $mask); 8965 int vlen_enc = vector_length_encoding(this, $mask); 8966 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister, 8967 $tmp$$Register, mask_len, mbt, vlen_enc); 8968 %} 8969 ins_pipe( pipe_slow ); 8970 %} 8971 #endif // _LP64 8972 8973 // ---------------------------------- Vector Masked Operations ------------------------------------ 8974 8975 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{ 8976 match(Set dst (AddVB (Binary dst src2) mask)); 8977 match(Set dst (AddVS (Binary dst src2) mask)); 8978 match(Set dst (AddVI (Binary dst src2) mask)); 8979 match(Set dst (AddVL (Binary dst src2) mask)); 8980 match(Set dst (AddVF (Binary dst src2) mask)); 8981 match(Set dst (AddVD (Binary dst src2) mask)); 8982 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 8983 ins_encode %{ 8984 int vlen_enc = vector_length_encoding(this); 8985 BasicType bt = Matcher::vector_element_basic_type(this); 8986 int opc = this->ideal_Opcode(); 8987 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 8988 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 8989 %} 8990 ins_pipe( pipe_slow ); 8991 %} 8992 8993 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{ 8994 match(Set dst (AddVB (Binary dst (LoadVector src2)) mask)); 8995 match(Set dst (AddVS (Binary dst (LoadVector src2)) mask)); 8996 match(Set dst (AddVI (Binary dst (LoadVector src2)) mask)); 8997 match(Set dst (AddVL (Binary dst (LoadVector src2)) mask)); 8998 match(Set dst (AddVF (Binary dst (LoadVector src2)) mask)); 8999 match(Set dst (AddVD (Binary dst (LoadVector src2)) mask)); 9000 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %} 9001 ins_encode %{ 9002 int vlen_enc = vector_length_encoding(this); 9003 BasicType bt = Matcher::vector_element_basic_type(this); 9004 int opc = this->ideal_Opcode(); 9005 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9006 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9007 %} 9008 ins_pipe( pipe_slow ); 9009 %} 9010 9011 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{ 9012 match(Set dst (XorV (Binary dst src2) mask)); 9013 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9014 ins_encode %{ 9015 int vlen_enc = vector_length_encoding(this); 9016 BasicType bt = Matcher::vector_element_basic_type(this); 9017 int opc = this->ideal_Opcode(); 9018 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9019 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9020 %} 9021 ins_pipe( pipe_slow ); 9022 %} 9023 9024 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{ 9025 match(Set dst (XorV (Binary dst (LoadVector src2)) mask)); 9026 format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %} 9027 ins_encode %{ 9028 int vlen_enc = vector_length_encoding(this); 9029 BasicType bt = Matcher::vector_element_basic_type(this); 9030 int opc = this->ideal_Opcode(); 9031 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9032 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9033 %} 9034 ins_pipe( pipe_slow ); 9035 %} 9036 9037 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{ 9038 match(Set dst (OrV (Binary dst src2) mask)); 9039 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9040 ins_encode %{ 9041 int vlen_enc = vector_length_encoding(this); 9042 BasicType bt = Matcher::vector_element_basic_type(this); 9043 int opc = this->ideal_Opcode(); 9044 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9045 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9046 %} 9047 ins_pipe( pipe_slow ); 9048 %} 9049 9050 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{ 9051 match(Set dst (OrV (Binary dst (LoadVector src2)) mask)); 9052 format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %} 9053 ins_encode %{ 9054 int vlen_enc = vector_length_encoding(this); 9055 BasicType bt = Matcher::vector_element_basic_type(this); 9056 int opc = this->ideal_Opcode(); 9057 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9058 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9059 %} 9060 ins_pipe( pipe_slow ); 9061 %} 9062 9063 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{ 9064 match(Set dst (AndV (Binary dst src2) mask)); 9065 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9066 ins_encode %{ 9067 int vlen_enc = vector_length_encoding(this); 9068 BasicType bt = Matcher::vector_element_basic_type(this); 9069 int opc = this->ideal_Opcode(); 9070 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9071 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9072 %} 9073 ins_pipe( pipe_slow ); 9074 %} 9075 9076 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{ 9077 match(Set dst (AndV (Binary dst (LoadVector src2)) mask)); 9078 format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %} 9079 ins_encode %{ 9080 int vlen_enc = vector_length_encoding(this); 9081 BasicType bt = Matcher::vector_element_basic_type(this); 9082 int opc = this->ideal_Opcode(); 9083 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9084 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9085 %} 9086 ins_pipe( pipe_slow ); 9087 %} 9088 9089 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{ 9090 match(Set dst (SubVB (Binary dst src2) mask)); 9091 match(Set dst (SubVS (Binary dst src2) mask)); 9092 match(Set dst (SubVI (Binary dst src2) mask)); 9093 match(Set dst (SubVL (Binary dst src2) mask)); 9094 match(Set dst (SubVF (Binary dst src2) mask)); 9095 match(Set dst (SubVD (Binary dst src2) mask)); 9096 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9097 ins_encode %{ 9098 int vlen_enc = vector_length_encoding(this); 9099 BasicType bt = Matcher::vector_element_basic_type(this); 9100 int opc = this->ideal_Opcode(); 9101 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9102 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9103 %} 9104 ins_pipe( pipe_slow ); 9105 %} 9106 9107 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{ 9108 match(Set dst (SubVB (Binary dst (LoadVector src2)) mask)); 9109 match(Set dst (SubVS (Binary dst (LoadVector src2)) mask)); 9110 match(Set dst (SubVI (Binary dst (LoadVector src2)) mask)); 9111 match(Set dst (SubVL (Binary dst (LoadVector src2)) mask)); 9112 match(Set dst (SubVF (Binary dst (LoadVector src2)) mask)); 9113 match(Set dst (SubVD (Binary dst (LoadVector src2)) mask)); 9114 format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %} 9115 ins_encode %{ 9116 int vlen_enc = vector_length_encoding(this); 9117 BasicType bt = Matcher::vector_element_basic_type(this); 9118 int opc = this->ideal_Opcode(); 9119 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9120 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9121 %} 9122 ins_pipe( pipe_slow ); 9123 %} 9124 9125 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{ 9126 match(Set dst (MulVS (Binary dst src2) mask)); 9127 match(Set dst (MulVI (Binary dst src2) mask)); 9128 match(Set dst (MulVL (Binary dst src2) mask)); 9129 match(Set dst (MulVF (Binary dst src2) mask)); 9130 match(Set dst (MulVD (Binary dst src2) mask)); 9131 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9132 ins_encode %{ 9133 int vlen_enc = vector_length_encoding(this); 9134 BasicType bt = Matcher::vector_element_basic_type(this); 9135 int opc = this->ideal_Opcode(); 9136 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9137 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9138 %} 9139 ins_pipe( pipe_slow ); 9140 %} 9141 9142 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{ 9143 match(Set dst (MulVS (Binary dst (LoadVector src2)) mask)); 9144 match(Set dst (MulVI (Binary dst (LoadVector src2)) mask)); 9145 match(Set dst (MulVL (Binary dst (LoadVector src2)) mask)); 9146 match(Set dst (MulVF (Binary dst (LoadVector src2)) mask)); 9147 match(Set dst (MulVD (Binary dst (LoadVector src2)) mask)); 9148 format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %} 9149 ins_encode %{ 9150 int vlen_enc = vector_length_encoding(this); 9151 BasicType bt = Matcher::vector_element_basic_type(this); 9152 int opc = this->ideal_Opcode(); 9153 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9154 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9155 %} 9156 ins_pipe( pipe_slow ); 9157 %} 9158 9159 instruct vsqrt_reg_masked(vec dst, kReg mask) %{ 9160 match(Set dst (SqrtVF dst mask)); 9161 match(Set dst (SqrtVD dst mask)); 9162 ins_cost(100); 9163 format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %} 9164 ins_encode %{ 9165 int vlen_enc = vector_length_encoding(this); 9166 BasicType bt = Matcher::vector_element_basic_type(this); 9167 int opc = this->ideal_Opcode(); 9168 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9169 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9170 %} 9171 ins_pipe( pipe_slow ); 9172 %} 9173 9174 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{ 9175 match(Set dst (DivVF (Binary dst src2) mask)); 9176 match(Set dst (DivVD (Binary dst src2) mask)); 9177 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9178 ins_encode %{ 9179 int vlen_enc = vector_length_encoding(this); 9180 BasicType bt = Matcher::vector_element_basic_type(this); 9181 int opc = this->ideal_Opcode(); 9182 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9183 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9184 %} 9185 ins_pipe( pipe_slow ); 9186 %} 9187 9188 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{ 9189 match(Set dst (DivVF (Binary dst (LoadVector src2)) mask)); 9190 match(Set dst (DivVD (Binary dst (LoadVector src2)) mask)); 9191 format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %} 9192 ins_encode %{ 9193 int vlen_enc = vector_length_encoding(this); 9194 BasicType bt = Matcher::vector_element_basic_type(this); 9195 int opc = this->ideal_Opcode(); 9196 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9197 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9198 %} 9199 ins_pipe( pipe_slow ); 9200 %} 9201 9202 9203 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9204 match(Set dst (RotateLeftV (Binary dst shift) mask)); 9205 match(Set dst (RotateRightV (Binary dst shift) mask)); 9206 format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %} 9207 ins_encode %{ 9208 int vlen_enc = vector_length_encoding(this); 9209 BasicType bt = Matcher::vector_element_basic_type(this); 9210 int opc = this->ideal_Opcode(); 9211 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9212 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9213 %} 9214 ins_pipe( pipe_slow ); 9215 %} 9216 9217 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{ 9218 match(Set dst (RotateLeftV (Binary dst src2) mask)); 9219 match(Set dst (RotateRightV (Binary dst src2) mask)); 9220 format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %} 9221 ins_encode %{ 9222 int vlen_enc = vector_length_encoding(this); 9223 BasicType bt = Matcher::vector_element_basic_type(this); 9224 int opc = this->ideal_Opcode(); 9225 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9226 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9227 %} 9228 ins_pipe( pipe_slow ); 9229 %} 9230 9231 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9232 match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask)); 9233 match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask)); 9234 match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask)); 9235 format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %} 9236 ins_encode %{ 9237 int vlen_enc = vector_length_encoding(this); 9238 BasicType bt = Matcher::vector_element_basic_type(this); 9239 int opc = this->ideal_Opcode(); 9240 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9241 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9242 %} 9243 ins_pipe( pipe_slow ); 9244 %} 9245 9246 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9247 predicate(!n->as_ShiftV()->is_var_shift()); 9248 match(Set dst (LShiftVS (Binary dst src2) mask)); 9249 match(Set dst (LShiftVI (Binary dst src2) mask)); 9250 match(Set dst (LShiftVL (Binary dst src2) mask)); 9251 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9252 ins_encode %{ 9253 int vlen_enc = vector_length_encoding(this); 9254 BasicType bt = Matcher::vector_element_basic_type(this); 9255 int opc = this->ideal_Opcode(); 9256 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9257 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9258 %} 9259 ins_pipe( pipe_slow ); 9260 %} 9261 9262 instruct vlshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9263 predicate(n->as_ShiftV()->is_var_shift()); 9264 match(Set dst (LShiftVS (Binary dst src2) mask)); 9265 match(Set dst (LShiftVI (Binary dst src2) mask)); 9266 match(Set dst (LShiftVL (Binary dst src2) mask)); 9267 format %{ "vplshiftv_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9268 ins_encode %{ 9269 int vlen_enc = vector_length_encoding(this); 9270 BasicType bt = Matcher::vector_element_basic_type(this); 9271 int opc = this->ideal_Opcode(); 9272 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9273 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9274 %} 9275 ins_pipe( pipe_slow ); 9276 %} 9277 9278 instruct vlshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9279 match(Set dst (LShiftVS (Binary dst (LoadVector src2)) mask)); 9280 match(Set dst (LShiftVI (Binary dst (LoadVector src2)) mask)); 9281 match(Set dst (LShiftVL (Binary dst (LoadVector src2)) mask)); 9282 format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %} 9283 ins_encode %{ 9284 int vlen_enc = vector_length_encoding(this); 9285 BasicType bt = Matcher::vector_element_basic_type(this); 9286 int opc = this->ideal_Opcode(); 9287 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9288 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9289 %} 9290 ins_pipe( pipe_slow ); 9291 %} 9292 9293 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9294 match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask)); 9295 match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask)); 9296 match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask)); 9297 format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %} 9298 ins_encode %{ 9299 int vlen_enc = vector_length_encoding(this); 9300 BasicType bt = Matcher::vector_element_basic_type(this); 9301 int opc = this->ideal_Opcode(); 9302 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9303 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9304 %} 9305 ins_pipe( pipe_slow ); 9306 %} 9307 9308 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9309 predicate(!n->as_ShiftV()->is_var_shift()); 9310 match(Set dst (RShiftVS (Binary dst src2) mask)); 9311 match(Set dst (RShiftVI (Binary dst src2) mask)); 9312 match(Set dst (RShiftVL (Binary dst src2) mask)); 9313 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9314 ins_encode %{ 9315 int vlen_enc = vector_length_encoding(this); 9316 BasicType bt = Matcher::vector_element_basic_type(this); 9317 int opc = this->ideal_Opcode(); 9318 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9319 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9320 %} 9321 ins_pipe( pipe_slow ); 9322 %} 9323 9324 instruct vrshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9325 predicate(n->as_ShiftV()->is_var_shift()); 9326 match(Set dst (RShiftVS (Binary dst src2) mask)); 9327 match(Set dst (RShiftVI (Binary dst src2) mask)); 9328 match(Set dst (RShiftVL (Binary dst src2) mask)); 9329 format %{ "vprshiftv_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9330 ins_encode %{ 9331 int vlen_enc = vector_length_encoding(this); 9332 BasicType bt = Matcher::vector_element_basic_type(this); 9333 int opc = this->ideal_Opcode(); 9334 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9335 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9336 %} 9337 ins_pipe( pipe_slow ); 9338 %} 9339 9340 instruct vrshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9341 match(Set dst (RShiftVS (Binary dst (LoadVector src2)) mask)); 9342 match(Set dst (RShiftVI (Binary dst (LoadVector src2)) mask)); 9343 match(Set dst (RShiftVL (Binary dst (LoadVector src2)) mask)); 9344 format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %} 9345 ins_encode %{ 9346 int vlen_enc = vector_length_encoding(this); 9347 BasicType bt = Matcher::vector_element_basic_type(this); 9348 int opc = this->ideal_Opcode(); 9349 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9350 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9351 %} 9352 ins_pipe( pipe_slow ); 9353 %} 9354 9355 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{ 9356 match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask)); 9357 match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask)); 9358 match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask)); 9359 format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %} 9360 ins_encode %{ 9361 int vlen_enc = vector_length_encoding(this); 9362 BasicType bt = Matcher::vector_element_basic_type(this); 9363 int opc = this->ideal_Opcode(); 9364 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9365 $dst$$XMMRegister, $shift$$constant, true, vlen_enc); 9366 %} 9367 ins_pipe( pipe_slow ); 9368 %} 9369 9370 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{ 9371 predicate(!n->as_ShiftV()->is_var_shift()); 9372 match(Set dst (URShiftVS (Binary dst src2) mask)); 9373 match(Set dst (URShiftVI (Binary dst src2) mask)); 9374 match(Set dst (URShiftVL (Binary dst src2) mask)); 9375 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9376 ins_encode %{ 9377 int vlen_enc = vector_length_encoding(this); 9378 BasicType bt = Matcher::vector_element_basic_type(this); 9379 int opc = this->ideal_Opcode(); 9380 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9381 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, false); 9382 %} 9383 ins_pipe( pipe_slow ); 9384 %} 9385 9386 instruct vurshiftv_reg_masked(vec dst, vec src2, kReg mask) %{ 9387 predicate(n->as_ShiftV()->is_var_shift()); 9388 match(Set dst (URShiftVS (Binary dst src2) mask)); 9389 match(Set dst (URShiftVI (Binary dst src2) mask)); 9390 match(Set dst (URShiftVL (Binary dst src2) mask)); 9391 format %{ "vpurshiftv_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9392 ins_encode %{ 9393 int vlen_enc = vector_length_encoding(this); 9394 BasicType bt = Matcher::vector_element_basic_type(this); 9395 int opc = this->ideal_Opcode(); 9396 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9397 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, true); 9398 %} 9399 ins_pipe( pipe_slow ); 9400 %} 9401 9402 instruct vurshift_mem_masked(vec dst, memory src2, kReg mask) %{ 9403 match(Set dst (URShiftVS (Binary dst (LoadVector src2)) mask)); 9404 match(Set dst (URShiftVI (Binary dst (LoadVector src2)) mask)); 9405 match(Set dst (URShiftVL (Binary dst (LoadVector src2)) mask)); 9406 format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %} 9407 ins_encode %{ 9408 int vlen_enc = vector_length_encoding(this); 9409 BasicType bt = Matcher::vector_element_basic_type(this); 9410 int opc = this->ideal_Opcode(); 9411 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9412 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9413 %} 9414 ins_pipe( pipe_slow ); 9415 %} 9416 9417 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{ 9418 match(Set dst (MaxV (Binary dst src2) mask)); 9419 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 9420 ins_encode %{ 9421 int vlen_enc = vector_length_encoding(this); 9422 BasicType bt = Matcher::vector_element_basic_type(this); 9423 int opc = this->ideal_Opcode(); 9424 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9425 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9426 %} 9427 ins_pipe( pipe_slow ); 9428 %} 9429 9430 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{ 9431 match(Set dst (MaxV (Binary dst (LoadVector src2)) mask)); 9432 format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %} 9433 ins_encode %{ 9434 int vlen_enc = vector_length_encoding(this); 9435 BasicType bt = Matcher::vector_element_basic_type(this); 9436 int opc = this->ideal_Opcode(); 9437 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9438 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9439 %} 9440 ins_pipe( pipe_slow ); 9441 %} 9442 9443 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{ 9444 match(Set dst (MinV (Binary dst src2) mask)); 9445 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 9446 ins_encode %{ 9447 int vlen_enc = vector_length_encoding(this); 9448 BasicType bt = Matcher::vector_element_basic_type(this); 9449 int opc = this->ideal_Opcode(); 9450 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9451 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc); 9452 %} 9453 ins_pipe( pipe_slow ); 9454 %} 9455 9456 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{ 9457 match(Set dst (MinV (Binary dst (LoadVector src2)) mask)); 9458 format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %} 9459 ins_encode %{ 9460 int vlen_enc = vector_length_encoding(this); 9461 BasicType bt = Matcher::vector_element_basic_type(this); 9462 int opc = this->ideal_Opcode(); 9463 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9464 $dst$$XMMRegister, $src2$$Address, true, vlen_enc); 9465 %} 9466 ins_pipe( pipe_slow ); 9467 %} 9468 9469 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{ 9470 match(Set dst (VectorRearrange (Binary dst src2) mask)); 9471 format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %} 9472 ins_encode %{ 9473 int vlen_enc = vector_length_encoding(this); 9474 BasicType bt = Matcher::vector_element_basic_type(this); 9475 int opc = this->ideal_Opcode(); 9476 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9477 $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc); 9478 %} 9479 ins_pipe( pipe_slow ); 9480 %} 9481 9482 instruct vabs_masked(vec dst, kReg mask) %{ 9483 match(Set dst (AbsVB dst mask)); 9484 match(Set dst (AbsVS dst mask)); 9485 match(Set dst (AbsVI dst mask)); 9486 match(Set dst (AbsVL dst mask)); 9487 format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %} 9488 ins_cost(100); 9489 ins_encode %{ 9490 int vlen_enc = vector_length_encoding(this); 9491 BasicType bt = Matcher::vector_element_basic_type(this); 9492 int opc = this->ideal_Opcode(); 9493 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9494 $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc); 9495 %} 9496 ins_pipe( pipe_slow ); 9497 %} 9498 9499 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{ 9500 match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask))); 9501 match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask))); 9502 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 9503 ins_encode %{ 9504 int vlen_enc = vector_length_encoding(this); 9505 BasicType bt = Matcher::vector_element_basic_type(this); 9506 int opc = this->ideal_Opcode(); 9507 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9508 $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc); 9509 %} 9510 ins_pipe( pipe_slow ); 9511 %} 9512 9513 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{ 9514 match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask))); 9515 match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask))); 9516 format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %} 9517 ins_encode %{ 9518 int vlen_enc = vector_length_encoding(this); 9519 BasicType bt = Matcher::vector_element_basic_type(this); 9520 int opc = this->ideal_Opcode(); 9521 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister, 9522 $src2$$XMMRegister, $src3$$Address, true, vlen_enc); 9523 %} 9524 ins_pipe( pipe_slow ); 9525 %} 9526 9527 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask, rRegP scratch) %{ 9528 match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask))); 9529 effect(TEMP scratch); 9530 format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask\t! using $scratch as TEMP" %} 9531 ins_encode %{ 9532 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected"); 9533 int vlen_enc = vector_length_encoding(this, $src1); 9534 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1); 9535 9536 // Comparison i 9537 switch (src1_elem_bt) { 9538 case T_BYTE: { 9539 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 9540 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9541 __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9542 break; 9543 } 9544 case T_SHORT: { 9545 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 9546 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9547 __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9548 break; 9549 } 9550 case T_INT: { 9551 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 9552 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9553 __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9554 break; 9555 } 9556 case T_LONG: { 9557 bool is_unsigned = is_unsigned_booltest_pred($cond$$constant); 9558 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant); 9559 __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc); 9560 break; 9561 } 9562 case T_FLOAT: { 9563 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 9564 __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 9565 break; 9566 } 9567 case T_DOUBLE: { 9568 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant); 9569 __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc); 9570 break; 9571 } 9572 default: assert(false, "%s", type2name(src1_elem_bt)); break; 9573 } 9574 %} 9575 ins_pipe( pipe_slow ); 9576 %} 9577 9578 instruct mask_all_evexI_LE32(kReg dst, rRegI src) %{ 9579 predicate(Matcher::vector_length(n) <= 32); 9580 match(Set dst (MaskAll src)); 9581 format %{ "mask_all_evexI_LE32 $dst, $src \t" %} 9582 ins_encode %{ 9583 int mask_len = Matcher::vector_length(this); 9584 __ vector_maskall_operation($dst$$KRegister, $src$$Register, mask_len); 9585 %} 9586 ins_pipe( pipe_slow ); 9587 %} 9588 9589 #ifdef _LP64 9590 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{ 9591 predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq()); 9592 match(Set dst (XorVMask src (MaskAll cnt))); 9593 effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp); 9594 format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %} 9595 ins_encode %{ 9596 uint masklen = Matcher::vector_length(this); 9597 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register); 9598 %} 9599 ins_pipe( pipe_slow ); 9600 %} 9601 9602 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{ 9603 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) || 9604 (Matcher::vector_length(n) == 16) || 9605 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw())); 9606 match(Set dst (XorVMask src (MaskAll cnt))); 9607 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %} 9608 ins_encode %{ 9609 uint masklen = Matcher::vector_length(this); 9610 __ knot(masklen, $dst$$KRegister, $src$$KRegister); 9611 %} 9612 ins_pipe( pipe_slow ); 9613 %} 9614 9615 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{ 9616 predicate(n->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length(n) <= 8); 9617 match(Set dst (VectorLongToMask src)); 9618 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp); 9619 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %} 9620 ins_encode %{ 9621 int mask_len = Matcher::vector_length(this); 9622 int vec_enc = vector_length_encoding(mask_len); 9623 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 9624 $rtmp2$$Register, xnoreg, mask_len, vec_enc); 9625 %} 9626 ins_pipe( pipe_slow ); 9627 %} 9628 9629 9630 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{ 9631 predicate(n->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length(n) > 8); 9632 match(Set dst (VectorLongToMask src)); 9633 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr); 9634 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %} 9635 ins_encode %{ 9636 int mask_len = Matcher::vector_length(this); 9637 assert(mask_len <= 32, "invalid mask length"); 9638 int vec_enc = vector_length_encoding(mask_len); 9639 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register, 9640 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc); 9641 %} 9642 ins_pipe( pipe_slow ); 9643 %} 9644 9645 instruct long_to_mask_evex(kReg dst, rRegL src) %{ 9646 predicate(n->bottom_type()->isa_vectmask()); 9647 match(Set dst (VectorLongToMask src)); 9648 format %{ "long_to_mask_evex $dst, $src\t!" %} 9649 ins_encode %{ 9650 __ kmov($dst$$KRegister, $src$$Register); 9651 %} 9652 ins_pipe( pipe_slow ); 9653 %} 9654 #endif 9655 9656 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{ 9657 match(Set dst (AndVMask src1 src2)); 9658 match(Set dst (OrVMask src1 src2)); 9659 match(Set dst (XorVMask src1 src2)); 9660 effect(TEMP kscratch); 9661 format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %} 9662 ins_encode %{ 9663 const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1))); 9664 const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2))); 9665 assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), ""); 9666 uint masklen = Matcher::vector_length(this); 9667 masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen; 9668 __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister); 9669 %} 9670 ins_pipe( pipe_slow ); 9671 %} 9672 9673 instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{ 9674 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 9675 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 9676 ins_encode %{ 9677 int vlen_enc = vector_length_encoding(this); 9678 BasicType bt = Matcher::vector_element_basic_type(this); 9679 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 9680 $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc); 9681 %} 9682 ins_pipe( pipe_slow ); 9683 %} 9684 9685 instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{ 9686 match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask))))); 9687 format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %} 9688 ins_encode %{ 9689 int vlen_enc = vector_length_encoding(this); 9690 BasicType bt = Matcher::vector_element_basic_type(this); 9691 __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister, 9692 $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc); 9693 %} 9694 ins_pipe( pipe_slow ); 9695 %} 9696 9697 instruct castMM(kReg dst) 9698 %{ 9699 match(Set dst (CastVV dst)); 9700 9701 size(0); 9702 format %{ "# castVV of $dst" %} 9703 ins_encode(/* empty encoding */); 9704 ins_cost(0); 9705 ins_pipe(empty); 9706 %} 9707 9708 instruct castVV(vec dst) 9709 %{ 9710 match(Set dst (CastVV dst)); 9711 9712 size(0); 9713 format %{ "# castVV of $dst" %} 9714 ins_encode(/* empty encoding */); 9715 ins_cost(0); 9716 ins_pipe(empty); 9717 %} 9718 9719 instruct castVVLeg(legVec dst) 9720 %{ 9721 match(Set dst (CastVV dst)); 9722 9723 size(0); 9724 format %{ "# castVV of $dst" %} 9725 ins_encode(/* empty encoding */); 9726 ins_cost(0); 9727 ins_pipe(empty); 9728 %}