1 /* 2 * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2016, 2024 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "asm/macroAssembler.inline.hpp" 27 #include "code/debugInfoRec.hpp" 28 #include "code/vtableStubs.hpp" 29 #include "code/compiledIC.hpp" 30 #include "compiler/oopMap.hpp" 31 #include "gc/shared/barrierSetAssembler.hpp" 32 #include "gc/shared/gcLocker.hpp" 33 #include "interpreter/interpreter.hpp" 34 #include "interpreter/interp_masm.hpp" 35 #include "memory/resourceArea.hpp" 36 #include "nativeInst_s390.hpp" 37 #include "oops/klass.inline.hpp" 38 #include "prims/methodHandles.hpp" 39 #include "registerSaver_s390.hpp" 40 #include "runtime/jniHandles.hpp" 41 #include "runtime/safepointMechanism.hpp" 42 #include "runtime/sharedRuntime.hpp" 43 #include "runtime/signature.hpp" 44 #include "runtime/stubRoutines.hpp" 45 #include "runtime/timerTrace.hpp" 46 #include "runtime/vframeArray.hpp" 47 #include "utilities/align.hpp" 48 #include "utilities/macros.hpp" 49 #include "vmreg_s390.inline.hpp" 50 #ifdef COMPILER1 51 #include "c1/c1_Runtime1.hpp" 52 #endif 53 #ifdef COMPILER2 54 #include "opto/ad.hpp" 55 #include "opto/runtime.hpp" 56 #endif 57 58 #ifdef PRODUCT 59 #define __ masm-> 60 #else 61 #define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)-> 62 #endif 63 64 #define BLOCK_COMMENT(str) __ block_comment(str) 65 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 66 67 #define RegisterSaver_LiveIntReg(regname) \ 68 { RegisterSaver::int_reg, regname->encoding(), regname->as_VMReg() } 69 70 #define RegisterSaver_LiveFloatReg(regname) \ 71 { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() } 72 73 // Registers which are not saved/restored, but still they have got a frame slot. 74 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2 75 #define RegisterSaver_ExcludedIntReg(regname) \ 76 { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() } 77 78 // Registers which are not saved/restored, but still they have got a frame slot. 79 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2. 80 #define RegisterSaver_ExcludedFloatReg(regname) \ 81 { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() } 82 83 #define RegisterSaver_LiveVReg(regname) \ 84 { RegisterSaver::v_reg, regname->encoding(), regname->as_VMReg() } 85 86 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = { 87 // Live registers which get spilled to the stack. Register positions 88 // in this array correspond directly to the stack layout. 89 // 90 // live float registers: 91 // 92 RegisterSaver_LiveFloatReg(Z_F0 ), 93 // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1) 94 RegisterSaver_LiveFloatReg(Z_F2 ), 95 RegisterSaver_LiveFloatReg(Z_F3 ), 96 RegisterSaver_LiveFloatReg(Z_F4 ), 97 RegisterSaver_LiveFloatReg(Z_F5 ), 98 RegisterSaver_LiveFloatReg(Z_F6 ), 99 RegisterSaver_LiveFloatReg(Z_F7 ), 100 RegisterSaver_LiveFloatReg(Z_F8 ), 101 RegisterSaver_LiveFloatReg(Z_F9 ), 102 RegisterSaver_LiveFloatReg(Z_F10), 103 RegisterSaver_LiveFloatReg(Z_F11), 104 RegisterSaver_LiveFloatReg(Z_F12), 105 RegisterSaver_LiveFloatReg(Z_F13), 106 RegisterSaver_LiveFloatReg(Z_F14), 107 RegisterSaver_LiveFloatReg(Z_F15), 108 // 109 // RegisterSaver_ExcludedIntReg(Z_R0), // scratch 110 // RegisterSaver_ExcludedIntReg(Z_R1), // scratch 111 RegisterSaver_LiveIntReg(Z_R2 ), 112 RegisterSaver_LiveIntReg(Z_R3 ), 113 RegisterSaver_LiveIntReg(Z_R4 ), 114 RegisterSaver_LiveIntReg(Z_R5 ), 115 RegisterSaver_LiveIntReg(Z_R6 ), 116 RegisterSaver_LiveIntReg(Z_R7 ), 117 RegisterSaver_LiveIntReg(Z_R8 ), 118 RegisterSaver_LiveIntReg(Z_R9 ), 119 RegisterSaver_LiveIntReg(Z_R10), 120 RegisterSaver_LiveIntReg(Z_R11), 121 RegisterSaver_LiveIntReg(Z_R12), 122 RegisterSaver_LiveIntReg(Z_R13), 123 // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.) 124 // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer 125 }; 126 127 static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = { 128 // Live registers which get spilled to the stack. Register positions 129 // in this array correspond directly to the stack layout. 130 // 131 // live float registers: All excluded, but still they get a stack slot to get same frame size. 132 // 133 RegisterSaver_ExcludedFloatReg(Z_F0 ), 134 // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1) 135 RegisterSaver_ExcludedFloatReg(Z_F2 ), 136 RegisterSaver_ExcludedFloatReg(Z_F3 ), 137 RegisterSaver_ExcludedFloatReg(Z_F4 ), 138 RegisterSaver_ExcludedFloatReg(Z_F5 ), 139 RegisterSaver_ExcludedFloatReg(Z_F6 ), 140 RegisterSaver_ExcludedFloatReg(Z_F7 ), 141 RegisterSaver_ExcludedFloatReg(Z_F8 ), 142 RegisterSaver_ExcludedFloatReg(Z_F9 ), 143 RegisterSaver_ExcludedFloatReg(Z_F10), 144 RegisterSaver_ExcludedFloatReg(Z_F11), 145 RegisterSaver_ExcludedFloatReg(Z_F12), 146 RegisterSaver_ExcludedFloatReg(Z_F13), 147 RegisterSaver_ExcludedFloatReg(Z_F14), 148 RegisterSaver_ExcludedFloatReg(Z_F15), 149 // 150 // RegisterSaver_ExcludedIntReg(Z_R0), // scratch 151 // RegisterSaver_ExcludedIntReg(Z_R1), // scratch 152 RegisterSaver_LiveIntReg(Z_R2 ), 153 RegisterSaver_LiveIntReg(Z_R3 ), 154 RegisterSaver_LiveIntReg(Z_R4 ), 155 RegisterSaver_LiveIntReg(Z_R5 ), 156 RegisterSaver_LiveIntReg(Z_R6 ), 157 RegisterSaver_LiveIntReg(Z_R7 ), 158 RegisterSaver_LiveIntReg(Z_R8 ), 159 RegisterSaver_LiveIntReg(Z_R9 ), 160 RegisterSaver_LiveIntReg(Z_R10), 161 RegisterSaver_LiveIntReg(Z_R11), 162 RegisterSaver_LiveIntReg(Z_R12), 163 RegisterSaver_LiveIntReg(Z_R13), 164 // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.) 165 // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer 166 }; 167 168 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = { 169 // Live registers which get spilled to the stack. Register positions 170 // in this array correspond directly to the stack layout. 171 // 172 // live float registers: 173 // 174 RegisterSaver_LiveFloatReg(Z_F0 ), 175 // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1) 176 RegisterSaver_LiveFloatReg(Z_F2 ), 177 RegisterSaver_LiveFloatReg(Z_F3 ), 178 RegisterSaver_LiveFloatReg(Z_F4 ), 179 RegisterSaver_LiveFloatReg(Z_F5 ), 180 RegisterSaver_LiveFloatReg(Z_F6 ), 181 RegisterSaver_LiveFloatReg(Z_F7 ), 182 RegisterSaver_LiveFloatReg(Z_F8 ), 183 RegisterSaver_LiveFloatReg(Z_F9 ), 184 RegisterSaver_LiveFloatReg(Z_F10), 185 RegisterSaver_LiveFloatReg(Z_F11), 186 RegisterSaver_LiveFloatReg(Z_F12), 187 RegisterSaver_LiveFloatReg(Z_F13), 188 RegisterSaver_LiveFloatReg(Z_F14), 189 RegisterSaver_LiveFloatReg(Z_F15), 190 // 191 // RegisterSaver_ExcludedIntReg(Z_R0), // scratch 192 // RegisterSaver_ExcludedIntReg(Z_R1), // scratch 193 RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2. 194 RegisterSaver_LiveIntReg(Z_R3 ), 195 RegisterSaver_LiveIntReg(Z_R4 ), 196 RegisterSaver_LiveIntReg(Z_R5 ), 197 RegisterSaver_LiveIntReg(Z_R6 ), 198 RegisterSaver_LiveIntReg(Z_R7 ), 199 RegisterSaver_LiveIntReg(Z_R8 ), 200 RegisterSaver_LiveIntReg(Z_R9 ), 201 RegisterSaver_LiveIntReg(Z_R10), 202 RegisterSaver_LiveIntReg(Z_R11), 203 RegisterSaver_LiveIntReg(Z_R12), 204 RegisterSaver_LiveIntReg(Z_R13), 205 // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.) 206 // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer 207 }; 208 209 // Live argument registers which get spilled to the stack. 210 static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = { 211 RegisterSaver_LiveFloatReg(Z_FARG1), 212 RegisterSaver_LiveFloatReg(Z_FARG2), 213 RegisterSaver_LiveFloatReg(Z_FARG3), 214 RegisterSaver_LiveFloatReg(Z_FARG4), 215 RegisterSaver_LiveIntReg(Z_ARG1), 216 RegisterSaver_LiveIntReg(Z_ARG2), 217 RegisterSaver_LiveIntReg(Z_ARG3), 218 RegisterSaver_LiveIntReg(Z_ARG4), 219 RegisterSaver_LiveIntReg(Z_ARG5) 220 }; 221 222 static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = { 223 // Live registers which get spilled to the stack. Register positions 224 // in this array correspond directly to the stack layout. 225 // 226 // live float registers: 227 // 228 RegisterSaver_LiveFloatReg(Z_F0 ), 229 // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1) 230 RegisterSaver_LiveFloatReg(Z_F2 ), 231 RegisterSaver_LiveFloatReg(Z_F3 ), 232 RegisterSaver_LiveFloatReg(Z_F4 ), 233 RegisterSaver_LiveFloatReg(Z_F5 ), 234 RegisterSaver_LiveFloatReg(Z_F6 ), 235 RegisterSaver_LiveFloatReg(Z_F7 ), 236 // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile 237 // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile 238 // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile 239 // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile 240 // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile 241 // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile 242 // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile 243 // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile 244 // 245 // RegisterSaver_ExcludedIntReg(Z_R0), // scratch 246 // RegisterSaver_ExcludedIntReg(Z_R1), // scratch 247 RegisterSaver_LiveIntReg(Z_R2 ), 248 RegisterSaver_LiveIntReg(Z_R3 ), 249 RegisterSaver_LiveIntReg(Z_R4 ), 250 RegisterSaver_LiveIntReg(Z_R5 ), 251 // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile 252 // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile 253 // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile 254 // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile 255 // RegisterSaver_LiveIntReg(Z_R10), // non-volatile 256 // RegisterSaver_LiveIntReg(Z_R11), // non-volatile 257 // RegisterSaver_LiveIntReg(Z_R12), // non-volatile 258 // RegisterSaver_LiveIntReg(Z_R13), // non-volatile 259 // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.) 260 // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer 261 }; 262 263 static const RegisterSaver::LiveRegType RegisterSaver_LiveVRegs[] = { 264 // live vector registers (optional, only these are used by C2): 265 RegisterSaver_LiveVReg( Z_V16 ), 266 RegisterSaver_LiveVReg( Z_V17 ), 267 RegisterSaver_LiveVReg( Z_V18 ), 268 RegisterSaver_LiveVReg( Z_V19 ), 269 RegisterSaver_LiveVReg( Z_V20 ), 270 RegisterSaver_LiveVReg( Z_V21 ), 271 RegisterSaver_LiveVReg( Z_V22 ), 272 RegisterSaver_LiveVReg( Z_V23 ), 273 RegisterSaver_LiveVReg( Z_V24 ), 274 RegisterSaver_LiveVReg( Z_V25 ), 275 RegisterSaver_LiveVReg( Z_V26 ), 276 RegisterSaver_LiveVReg( Z_V27 ), 277 RegisterSaver_LiveVReg( Z_V28 ), 278 RegisterSaver_LiveVReg( Z_V29 ), 279 RegisterSaver_LiveVReg( Z_V30 ), 280 RegisterSaver_LiveVReg( Z_V31 ) 281 }; 282 283 int RegisterSaver::live_reg_save_size(RegisterSet reg_set) { 284 int reg_space = -1; 285 switch (reg_set) { 286 case all_registers: reg_space = sizeof(RegisterSaver_LiveRegs); break; 287 case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break; 288 case all_integer_registers: reg_space = sizeof(RegisterSaver_LiveIntRegs); break; 289 case all_volatile_registers: reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break; 290 case arg_registers: reg_space = sizeof(RegisterSaver_LiveArgRegs); break; 291 default: ShouldNotReachHere(); 292 } 293 return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size; 294 } 295 296 int RegisterSaver::calculate_vregstosave_num() { 297 return (sizeof(RegisterSaver_LiveVRegs) / sizeof(RegisterSaver::LiveRegType)); 298 } 299 300 int RegisterSaver::live_reg_frame_size(RegisterSet reg_set, bool save_vectors) { 301 const int vregstosave_num = save_vectors ? calculate_vregstosave_num() : 0; 302 return live_reg_save_size(reg_set) + vregstosave_num * v_reg_size + frame::z_abi_160_size; 303 } 304 305 306 // return_pc: Specify the register that should be stored as the return pc in the current frame. 307 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc, bool save_vectors) { 308 // Record volatile registers as callee-save values in an OopMap so 309 // their save locations will be propagated to the caller frame's 310 // RegisterMap during StackFrameStream construction (needed for 311 // deoptimization; see compiledVFrame::create_stack_value). 312 313 // Calculate frame size. 314 const int frame_size_in_bytes = live_reg_frame_size(reg_set, save_vectors); 315 const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint); 316 const int vregstosave_num = save_vectors ? calculate_vregstosave_num() : 0; 317 const int register_save_offset = frame_size_in_bytes - (live_reg_save_size(reg_set) + vregstosave_num * v_reg_size); 318 319 // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words. 320 OopMap* map = new OopMap(frame_size_in_slots, 0); 321 322 int regstosave_num = 0; 323 const RegisterSaver::LiveRegType* live_regs = nullptr; 324 325 switch (reg_set) { 326 case all_registers: 327 regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType); 328 live_regs = RegisterSaver_LiveRegs; 329 break; 330 case all_registers_except_r2: 331 regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);; 332 live_regs = RegisterSaver_LiveRegsWithoutR2; 333 break; 334 case all_integer_registers: 335 regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType); 336 live_regs = RegisterSaver_LiveIntRegs; 337 break; 338 case all_volatile_registers: 339 regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType); 340 live_regs = RegisterSaver_LiveVolatileRegs; 341 break; 342 case arg_registers: 343 regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);; 344 live_regs = RegisterSaver_LiveArgRegs; 345 break; 346 default: ShouldNotReachHere(); 347 } 348 349 // Save return pc in old frame. 350 __ save_return_pc(return_pc); 351 352 // Push a new frame (includes stack linkage). 353 // Use return_pc as scratch for push_frame. Z_R0_scratch (the default) and Z_R1_scratch are 354 // illegally used to pass parameters by RangeCheckStub::emit_code(). 355 __ push_frame(frame_size_in_bytes, return_pc); 356 // We have to restore return_pc right away. 357 // Nobody else will. Furthermore, return_pc isn't necessarily the default (Z_R14). 358 // Nobody else knows which register we saved. 359 __ z_lg(return_pc, _z_common_abi(return_pc) + frame_size_in_bytes, Z_SP); 360 361 // Register save area in new frame starts above z_abi_160 area. 362 int offset = register_save_offset; 363 364 Register first = noreg; 365 Register last = noreg; 366 int first_offset = -1; 367 bool float_spilled = false; 368 369 for (int i = 0; i < regstosave_num; i++, offset += reg_size) { 370 int reg_num = live_regs[i].reg_num; 371 int reg_type = live_regs[i].reg_type; 372 373 switch (reg_type) { 374 case RegisterSaver::int_reg: { 375 Register reg = as_Register(reg_num); 376 if (last != reg->predecessor()) { 377 if (first != noreg) { 378 __ z_stmg(first, last, first_offset, Z_SP); 379 } 380 first = reg; 381 first_offset = offset; 382 DEBUG_ONLY(float_spilled = false); 383 } 384 last = reg; 385 assert(last != Z_R0, "r0 would require special treatment"); 386 assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]"); 387 break; 388 } 389 390 case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot. 391 continue; // Continue with next loop iteration. 392 393 case RegisterSaver::float_reg: { 394 FloatRegister freg = as_FloatRegister(reg_num); 395 __ z_std(freg, offset, Z_SP); 396 DEBUG_ONLY(float_spilled = true); 397 break; 398 } 399 400 default: 401 ShouldNotReachHere(); 402 break; 403 } 404 405 // Second set_callee_saved is really a waste but we'll keep things as they were for now 406 map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg); 407 map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next()); 408 } 409 assert(first != noreg, "Should spill at least one int reg."); 410 __ z_stmg(first, last, first_offset, Z_SP); 411 412 for (int i = 0; i < vregstosave_num; i++, offset += v_reg_size) { 413 int reg_num = RegisterSaver_LiveVRegs[i].reg_num; 414 415 __ z_vst(as_VectorRegister(reg_num), Address(Z_SP, offset)); 416 417 map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), 418 RegisterSaver_LiveVRegs[i].vmreg); 419 map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size ) >> 2), 420 RegisterSaver_LiveVRegs[i].vmreg->next()); 421 map->set_callee_saved(VMRegImpl::stack2reg((offset + (half_reg_size * 2)) >> 2), 422 RegisterSaver_LiveVRegs[i].vmreg->next(2)); 423 map->set_callee_saved(VMRegImpl::stack2reg((offset + (half_reg_size * 3)) >> 2), 424 RegisterSaver_LiveVRegs[i].vmreg->next(3)); 425 } 426 427 assert(offset == frame_size_in_bytes, "consistency check"); 428 429 // And we're done. 430 return map; 431 } 432 433 434 // Generate the OopMap (again, regs where saved before). 435 OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) { 436 // Calculate frame size. 437 const int frame_size_in_bytes = live_reg_frame_size(reg_set); 438 const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint); 439 const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set); 440 441 // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words. 442 OopMap* map = new OopMap(frame_size_in_slots, 0); 443 444 int regstosave_num = 0; 445 const RegisterSaver::LiveRegType* live_regs = nullptr; 446 447 switch (reg_set) { 448 case all_registers: 449 regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType); 450 live_regs = RegisterSaver_LiveRegs; 451 break; 452 case all_registers_except_r2: 453 regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);; 454 live_regs = RegisterSaver_LiveRegsWithoutR2; 455 break; 456 case all_integer_registers: 457 regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType); 458 live_regs = RegisterSaver_LiveIntRegs; 459 break; 460 case all_volatile_registers: 461 regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType); 462 live_regs = RegisterSaver_LiveVolatileRegs; 463 break; 464 case arg_registers: 465 regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);; 466 live_regs = RegisterSaver_LiveArgRegs; 467 break; 468 default: ShouldNotReachHere(); 469 } 470 471 // Register save area in new frame starts above z_abi_160 area. 472 int offset = register_save_offset; 473 for (int i = 0; i < regstosave_num; i++) { 474 if (live_regs[i].reg_type < RegisterSaver::excluded_reg) { 475 map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg); 476 map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next()); 477 } 478 offset += reg_size; 479 } 480 #ifdef ASSERT 481 assert(offset == frame_size_in_bytes, "consistency check"); 482 #endif 483 return map; 484 } 485 486 487 // Pop the current frame and restore all the registers that we saved. 488 void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set, bool save_vectors) { 489 int offset; 490 const int vregstosave_num = save_vectors ? calculate_vregstosave_num() : 0; 491 const int register_save_offset = live_reg_frame_size(reg_set, save_vectors) - (live_reg_save_size(reg_set) + vregstosave_num * v_reg_size); 492 493 Register first = noreg; 494 Register last = noreg; 495 int first_offset = -1; 496 bool float_spilled = false; 497 498 int regstosave_num = 0; 499 const RegisterSaver::LiveRegType* live_regs = nullptr; 500 501 switch (reg_set) { 502 case all_registers: 503 regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);; 504 live_regs = RegisterSaver_LiveRegs; 505 break; 506 case all_registers_except_r2: 507 regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);; 508 live_regs = RegisterSaver_LiveRegsWithoutR2; 509 break; 510 case all_integer_registers: 511 regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType); 512 live_regs = RegisterSaver_LiveIntRegs; 513 break; 514 case all_volatile_registers: 515 regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);; 516 live_regs = RegisterSaver_LiveVolatileRegs; 517 break; 518 case arg_registers: 519 regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);; 520 live_regs = RegisterSaver_LiveArgRegs; 521 break; 522 default: ShouldNotReachHere(); 523 } 524 525 // Restore all registers (ints and floats). 526 527 // Register save area in new frame starts above z_abi_160 area. 528 offset = register_save_offset; 529 530 for (int i = 0; i < regstosave_num; i++, offset += reg_size) { 531 int reg_num = live_regs[i].reg_num; 532 int reg_type = live_regs[i].reg_type; 533 534 switch (reg_type) { 535 case RegisterSaver::excluded_reg: 536 continue; // Continue with next loop iteration. 537 538 case RegisterSaver::int_reg: { 539 Register reg = as_Register(reg_num); 540 if (last != reg->predecessor()) { 541 if (first != noreg) { 542 __ z_lmg(first, last, first_offset, Z_SP); 543 } 544 first = reg; 545 first_offset = offset; 546 DEBUG_ONLY(float_spilled = false); 547 } 548 last = reg; 549 assert(last != Z_R0, "r0 would require special treatment"); 550 assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]"); 551 break; 552 } 553 554 case RegisterSaver::float_reg: { 555 FloatRegister freg = as_FloatRegister(reg_num); 556 __ z_ld(freg, offset, Z_SP); 557 DEBUG_ONLY(float_spilled = true); 558 break; 559 } 560 561 default: 562 ShouldNotReachHere(); 563 } 564 } 565 assert(first != noreg, "Should spill at least one int reg."); 566 __ z_lmg(first, last, first_offset, Z_SP); 567 568 for (int i = 0; i < vregstosave_num; i++, offset += v_reg_size) { 569 int reg_num = RegisterSaver_LiveVRegs[i].reg_num; 570 571 __ z_vl(as_VectorRegister(reg_num), Address(Z_SP, offset)); 572 } 573 574 // Pop the frame. 575 __ pop_frame(); 576 577 // Restore the flags. 578 __ restore_return_pc(); 579 } 580 581 582 // Pop the current frame and restore the registers that might be holding a result. 583 void RegisterSaver::restore_result_registers(MacroAssembler* masm) { 584 const int regstosave_num = sizeof(RegisterSaver_LiveRegs) / 585 sizeof(RegisterSaver::LiveRegType); 586 const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers); 587 588 // Restore all result registers (ints and floats). 589 int offset = register_save_offset; 590 for (int i = 0; i < regstosave_num; i++, offset += reg_size) { 591 int reg_num = RegisterSaver_LiveRegs[i].reg_num; 592 int reg_type = RegisterSaver_LiveRegs[i].reg_type; 593 switch (reg_type) { 594 case RegisterSaver::excluded_reg: 595 continue; // Continue with next loop iteration. 596 case RegisterSaver::int_reg: { 597 if (as_Register(reg_num) == Z_RET) { // int result_reg 598 __ z_lg(as_Register(reg_num), offset, Z_SP); 599 } 600 break; 601 } 602 case RegisterSaver::float_reg: { 603 if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg 604 __ z_ld(as_FloatRegister(reg_num), offset, Z_SP); 605 } 606 break; 607 } 608 default: 609 ShouldNotReachHere(); 610 } 611 } 612 assert(offset == live_reg_frame_size(all_registers), "consistency check"); 613 } 614 615 // --------------------------------------------------------------------------- 616 void SharedRuntime::save_native_result(MacroAssembler * masm, 617 BasicType ret_type, 618 int frame_slots) { 619 Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size); 620 621 switch (ret_type) { 622 case T_BOOLEAN: // Save shorter types as int. Do we need sign extension at restore?? 623 case T_BYTE: 624 case T_CHAR: 625 case T_SHORT: 626 case T_INT: 627 __ reg2mem_opt(Z_RET, memaddr, false); 628 break; 629 case T_OBJECT: // Save pointer types as long. 630 case T_ARRAY: 631 case T_ADDRESS: 632 case T_VOID: 633 case T_LONG: 634 __ reg2mem_opt(Z_RET, memaddr); 635 break; 636 case T_FLOAT: 637 __ freg2mem_opt(Z_FRET, memaddr, false); 638 break; 639 case T_DOUBLE: 640 __ freg2mem_opt(Z_FRET, memaddr); 641 break; 642 default: 643 ShouldNotReachHere(); 644 break; 645 } 646 } 647 648 void SharedRuntime::restore_native_result(MacroAssembler *masm, 649 BasicType ret_type, 650 int frame_slots) { 651 Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size); 652 653 switch (ret_type) { 654 case T_BOOLEAN: // Restore shorter types as int. Do we need sign extension at restore?? 655 case T_BYTE: 656 case T_CHAR: 657 case T_SHORT: 658 case T_INT: 659 __ mem2reg_opt(Z_RET, memaddr, false); 660 break; 661 case T_OBJECT: // Restore pointer types as long. 662 case T_ARRAY: 663 case T_ADDRESS: 664 case T_VOID: 665 case T_LONG: 666 __ mem2reg_opt(Z_RET, memaddr); 667 break; 668 case T_FLOAT: 669 __ mem2freg_opt(Z_FRET, memaddr, false); 670 break; 671 case T_DOUBLE: 672 __ mem2freg_opt(Z_FRET, memaddr); 673 break; 674 default: 675 ShouldNotReachHere(); 676 break; 677 } 678 } 679 680 // --------------------------------------------------------------------------- 681 // Read the array of BasicTypes from a signature, and compute where the 682 // arguments should go. Values in the VMRegPair regs array refer to 4-byte 683 // quantities. Values less than VMRegImpl::stack0 are registers, those above 684 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer 685 // as framesizes are fixed. 686 // VMRegImpl::stack0 refers to the first slot 0(sp). 687 // VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers 688 // up to Register::number_of_registers are the 64-bit integer registers. 689 690 // Note: the INPUTS in sig_bt are in units of Java argument words, which are 691 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit 692 // units regardless of build. 693 694 // The Java calling convention is a "shifted" version of the C ABI. 695 // By skipping the first C ABI register we can call non-static jni methods 696 // with small numbers of arguments without having to shuffle the arguments 697 // at all. Since we control the java ABI we ought to at least get some 698 // advantage out of it. 699 int SharedRuntime::java_calling_convention(const BasicType *sig_bt, 700 VMRegPair *regs, 701 int total_args_passed) { 702 // c2c calling conventions for compiled-compiled calls. 703 704 // An int/float occupies 1 slot here. 705 const int inc_stk_for_intfloat = 1; // 1 slots for ints and floats. 706 const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles. 707 708 const VMReg z_iarg_reg[5] = { 709 Z_R2->as_VMReg(), 710 Z_R3->as_VMReg(), 711 Z_R4->as_VMReg(), 712 Z_R5->as_VMReg(), 713 Z_R6->as_VMReg() 714 }; 715 const VMReg z_farg_reg[4] = { 716 Z_F0->as_VMReg(), 717 Z_F2->as_VMReg(), 718 Z_F4->as_VMReg(), 719 Z_F6->as_VMReg() 720 }; 721 const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]); 722 const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]); 723 724 assert(Register::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch"); 725 assert(FloatRegister::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch"); 726 727 int i; 728 int stk = 0; 729 int ireg = 0; 730 int freg = 0; 731 732 for (int i = 0; i < total_args_passed; ++i) { 733 switch (sig_bt[i]) { 734 case T_BOOLEAN: 735 case T_CHAR: 736 case T_BYTE: 737 case T_SHORT: 738 case T_INT: 739 if (ireg < z_num_iarg_registers) { 740 // Put int/ptr in register. 741 regs[i].set1(z_iarg_reg[ireg]); 742 ++ireg; 743 } else { 744 // Put int/ptr on stack. 745 regs[i].set1(VMRegImpl::stack2reg(stk)); 746 stk += inc_stk_for_intfloat; 747 } 748 break; 749 case T_LONG: 750 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half"); 751 if (ireg < z_num_iarg_registers) { 752 // Put long in register. 753 regs[i].set2(z_iarg_reg[ireg]); 754 ++ireg; 755 } else { 756 // Put long on stack and align to 2 slots. 757 if (stk & 0x1) { ++stk; } 758 regs[i].set2(VMRegImpl::stack2reg(stk)); 759 stk += inc_stk_for_longdouble; 760 } 761 break; 762 case T_OBJECT: 763 case T_ARRAY: 764 case T_ADDRESS: 765 if (ireg < z_num_iarg_registers) { 766 // Put ptr in register. 767 regs[i].set2(z_iarg_reg[ireg]); 768 ++ireg; 769 } else { 770 // Put ptr on stack and align to 2 slots, because 771 // "64-bit pointers record oop-ishness on 2 aligned adjacent 772 // registers." (see OopFlow::build_oop_map). 773 if (stk & 0x1) { ++stk; } 774 regs[i].set2(VMRegImpl::stack2reg(stk)); 775 stk += inc_stk_for_longdouble; 776 } 777 break; 778 case T_FLOAT: 779 if (freg < z_num_farg_registers) { 780 // Put float in register. 781 regs[i].set1(z_farg_reg[freg]); 782 ++freg; 783 } else { 784 // Put float on stack. 785 regs[i].set1(VMRegImpl::stack2reg(stk)); 786 stk += inc_stk_for_intfloat; 787 } 788 break; 789 case T_DOUBLE: 790 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half"); 791 if (freg < z_num_farg_registers) { 792 // Put double in register. 793 regs[i].set2(z_farg_reg[freg]); 794 ++freg; 795 } else { 796 // Put double on stack and align to 2 slots. 797 if (stk & 0x1) { ++stk; } 798 regs[i].set2(VMRegImpl::stack2reg(stk)); 799 stk += inc_stk_for_longdouble; 800 } 801 break; 802 case T_VOID: 803 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); 804 // Do not count halves. 805 regs[i].set_bad(); 806 break; 807 default: 808 ShouldNotReachHere(); 809 } 810 } 811 return stk; 812 } 813 814 int SharedRuntime::c_calling_convention(const BasicType *sig_bt, 815 VMRegPair *regs, 816 int total_args_passed) { 817 818 // Calling conventions for C runtime calls and calls to JNI native methods. 819 const VMReg z_iarg_reg[5] = { 820 Z_R2->as_VMReg(), 821 Z_R3->as_VMReg(), 822 Z_R4->as_VMReg(), 823 Z_R5->as_VMReg(), 824 Z_R6->as_VMReg() 825 }; 826 const VMReg z_farg_reg[4] = { 827 Z_F0->as_VMReg(), 828 Z_F2->as_VMReg(), 829 Z_F4->as_VMReg(), 830 Z_F6->as_VMReg() 831 }; 832 const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]); 833 const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]); 834 835 // Check calling conventions consistency. 836 assert(Register::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch"); 837 assert(FloatRegister::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch"); 838 839 // Avoid passing C arguments in the wrong stack slots. 840 841 // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy 842 // 2 such slots, like 64 bit values do. 843 const int inc_stk_for_intfloat = 2; // 2 slots for ints and floats. 844 const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles. 845 846 int i; 847 // Leave room for C-compatible ABI 848 int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size; 849 int freg = 0; 850 int ireg = 0; 851 852 // We put the first 5 arguments into registers and the rest on the 853 // stack. Float arguments are already in their argument registers 854 // due to c2c calling conventions (see calling_convention). 855 for (int i = 0; i < total_args_passed; ++i) { 856 switch (sig_bt[i]) { 857 case T_BOOLEAN: 858 case T_CHAR: 859 case T_BYTE: 860 case T_SHORT: 861 case T_INT: 862 // Fall through, handle as long. 863 case T_LONG: 864 case T_OBJECT: 865 case T_ARRAY: 866 case T_ADDRESS: 867 case T_METADATA: 868 // Oops are already boxed if required (JNI). 869 if (ireg < z_num_iarg_registers) { 870 regs[i].set2(z_iarg_reg[ireg]); 871 ++ireg; 872 } else { 873 regs[i].set2(VMRegImpl::stack2reg(stk)); 874 stk += inc_stk_for_longdouble; 875 } 876 break; 877 case T_FLOAT: 878 if (freg < z_num_farg_registers) { 879 regs[i].set1(z_farg_reg[freg]); 880 ++freg; 881 } else { 882 regs[i].set1(VMRegImpl::stack2reg(stk+1)); 883 stk += inc_stk_for_intfloat; 884 } 885 break; 886 case T_DOUBLE: 887 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half"); 888 if (freg < z_num_farg_registers) { 889 regs[i].set2(z_farg_reg[freg]); 890 ++freg; 891 } else { 892 // Put double on stack. 893 regs[i].set2(VMRegImpl::stack2reg(stk)); 894 stk += inc_stk_for_longdouble; 895 } 896 break; 897 case T_VOID: 898 // Do not count halves. 899 regs[i].set_bad(); 900 break; 901 default: 902 ShouldNotReachHere(); 903 } 904 } 905 return align_up(stk, 2); 906 } 907 908 int SharedRuntime::vector_calling_convention(VMRegPair *regs, 909 uint num_bits, 910 uint total_args_passed) { 911 Unimplemented(); 912 return 0; 913 } 914 915 //////////////////////////////////////////////////////////////////////// 916 // 917 // Argument shufflers 918 // 919 //////////////////////////////////////////////////////////////////////// 920 921 //---------------------------------------------------------------------- 922 // The java_calling_convention describes stack locations as ideal slots on 923 // a frame with no abi restrictions. Since we must observe abi restrictions 924 // (like the placement of the register window) the slots must be biased by 925 // the following value. 926 //---------------------------------------------------------------------- 927 static int reg2slot(VMReg r) { 928 return r->reg2stack() + SharedRuntime::out_preserve_stack_slots(); 929 } 930 931 static int reg2offset(VMReg r) { 932 return reg2slot(r) * VMRegImpl::stack_slot_size; 933 } 934 935 static void verify_oop_args(MacroAssembler *masm, 936 int total_args_passed, 937 const BasicType *sig_bt, 938 const VMRegPair *regs) { 939 if (!VerifyOops) { return; } 940 941 for (int i = 0; i < total_args_passed; i++) { 942 if (is_reference_type(sig_bt[i])) { 943 VMReg r = regs[i].first(); 944 assert(r->is_valid(), "bad oop arg"); 945 946 if (r->is_stack()) { 947 __ z_lg(Z_R0_scratch, 948 Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); 949 __ verify_oop(Z_R0_scratch, FILE_AND_LINE); 950 } else { 951 __ verify_oop(r->as_Register(), FILE_AND_LINE); 952 } 953 } 954 } 955 } 956 957 static void gen_special_dispatch(MacroAssembler *masm, 958 int total_args_passed, 959 vmIntrinsics::ID special_dispatch, 960 const BasicType *sig_bt, 961 const VMRegPair *regs) { 962 verify_oop_args(masm, total_args_passed, sig_bt, regs); 963 964 // Now write the args into the outgoing interpreter space. 965 bool has_receiver = false; 966 Register receiver_reg = noreg; 967 int member_arg_pos = -1; 968 Register member_reg = noreg; 969 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch); 970 971 if (ref_kind != 0) { 972 member_arg_pos = total_args_passed - 1; // trailing MemberName argument 973 member_reg = Z_R9; // Known to be free at this point. 974 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); 975 } else if (special_dispatch == vmIntrinsics::_linkToNative) { 976 member_arg_pos = total_args_passed - 1; // trailing NativeEntryPoint argument 977 member_reg = Z_R9; // known to be free at this point 978 } else { 979 guarantee(special_dispatch == vmIntrinsics::_invokeBasic, 980 "special_dispatch=%d", vmIntrinsics::as_int(special_dispatch)); 981 has_receiver = true; 982 } 983 984 if (member_reg != noreg) { 985 // Load the member_arg into register, if necessary. 986 assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob"); 987 assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object"); 988 989 VMReg r = regs[member_arg_pos].first(); 990 assert(r->is_valid(), "bad member arg"); 991 992 if (r->is_stack()) { 993 __ z_lg(member_reg, Address(Z_SP, reg2offset(r))); 994 } else { 995 // No data motion is needed. 996 member_reg = r->as_Register(); 997 } 998 } 999 1000 if (has_receiver) { 1001 // Make sure the receiver is loaded into a register. 1002 assert(total_args_passed > 0, "oob"); 1003 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); 1004 1005 VMReg r = regs[0].first(); 1006 assert(r->is_valid(), "bad receiver arg"); 1007 1008 if (r->is_stack()) { 1009 // Porting note: This assumes that compiled calling conventions always 1010 // pass the receiver oop in a register. If this is not true on some 1011 // platform, pick a temp and load the receiver from stack. 1012 assert(false, "receiver always in a register"); 1013 receiver_reg = Z_R13; // Known to be free at this point. 1014 __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r))); 1015 } else { 1016 // No data motion is needed. 1017 receiver_reg = r->as_Register(); 1018 } 1019 } 1020 1021 // Figure out which address we are really jumping to: 1022 MethodHandles::generate_method_handle_dispatch(masm, special_dispatch, 1023 receiver_reg, member_reg, 1024 /*for_compiler_entry:*/ true); 1025 } 1026 1027 //////////////////////////////////////////////////////////////////////// 1028 // 1029 // Argument shufflers 1030 // 1031 //////////////////////////////////////////////////////////////////////// 1032 1033 // Is the size of a vector size (in bytes) bigger than a size saved by default? 1034 // 8 bytes registers are saved by default on z/Architecture. 1035 bool SharedRuntime::is_wide_vector(int size) { 1036 // Note, MaxVectorSize == 8/16 on this platform. 1037 assert(size <= (SuperwordUseVX ? 16 : 8), "%d bytes vectors are not supported", size); 1038 return size > 8; 1039 } 1040 1041 //---------------------------------------------------------------------- 1042 // An oop arg. Must pass a handle not the oop itself 1043 //---------------------------------------------------------------------- 1044 static void object_move(MacroAssembler *masm, 1045 OopMap *map, 1046 int oop_handle_offset, 1047 int framesize_in_slots, 1048 VMRegPair src, 1049 VMRegPair dst, 1050 bool is_receiver, 1051 int *receiver_offset) { 1052 int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size; 1053 1054 assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please."); 1055 1056 // Must pass a handle. First figure out the location we use as a handle. 1057 1058 if (src.first()->is_stack()) { 1059 // Oop is already on the stack, put handle on stack or in register 1060 // If handle will be on the stack, use temp reg to calculate it. 1061 Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register(); 1062 Label skip; 1063 int slot_in_older_frame = reg2slot(src.first()); 1064 1065 guarantee(!is_receiver, "expecting receiver in register"); 1066 map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots)); 1067 1068 __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP); 1069 __ load_and_test_long(Z_R0, Address(rHandle)); 1070 __ z_brne(skip); 1071 // Use a null handle if oop is null. 1072 __ clear_reg(rHandle, true, false); 1073 __ bind(skip); 1074 1075 // Copy handle to the right place (register or stack). 1076 if (dst.first()->is_stack()) { 1077 __ z_stg(rHandle, reg2offset(dst.first()), Z_SP); 1078 } // else 1079 // nothing to do. rHandle uses the correct register 1080 } else { 1081 // Oop is passed in an input register. We must flush it to the stack. 1082 const Register rOop = src.first()->as_Register(); 1083 const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register(); 1084 int oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; 1085 int oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size; 1086 NearLabel skip; 1087 1088 if (is_receiver) { 1089 *receiver_offset = oop_slot_offset; 1090 } 1091 map->set_oop(VMRegImpl::stack2reg(oop_slot)); 1092 1093 // Flush Oop to stack, calculate handle. 1094 __ z_stg(rOop, oop_slot_offset, Z_SP); 1095 __ add2reg(rHandle, oop_slot_offset, Z_SP); 1096 1097 // If Oop is null, use a null handle. 1098 __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip); 1099 __ clear_reg(rHandle, true, false); 1100 __ bind(skip); 1101 1102 // Copy handle to the right place (register or stack). 1103 if (dst.first()->is_stack()) { 1104 __ z_stg(rHandle, reg2offset(dst.first()), Z_SP); 1105 } // else 1106 // nothing to do here, since rHandle = dst.first()->as_Register in this case. 1107 } 1108 } 1109 1110 //---------------------------------------------------------------------- 1111 // A float arg. May have to do float reg to int reg conversion 1112 //---------------------------------------------------------------------- 1113 static void float_move(MacroAssembler *masm, 1114 VMRegPair src, 1115 VMRegPair dst, 1116 int framesize_in_slots, 1117 int workspace_slot_offset) { 1118 int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size; 1119 int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size; 1120 1121 // We do not accept an argument in a VMRegPair to be spread over two slots, 1122 // no matter what physical location (reg or stack) the slots may have. 1123 // We just check for the unaccepted slot to be invalid. 1124 assert(!src.second()->is_valid(), "float in arg spread over two slots"); 1125 assert(!dst.second()->is_valid(), "float out arg spread over two slots"); 1126 1127 if (src.first()->is_stack()) { 1128 if (dst.first()->is_stack()) { 1129 // stack -> stack. The easiest of the bunch. 1130 __ z_mvc(Address(Z_SP, reg2offset(dst.first())), 1131 Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float)); 1132 } else { 1133 // stack to reg 1134 Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset); 1135 if (dst.first()->is_Register()) { 1136 __ mem2reg_opt(dst.first()->as_Register(), memaddr, false); 1137 } else { 1138 __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false); 1139 } 1140 } 1141 } else if (src.first()->is_Register()) { 1142 if (dst.first()->is_stack()) { 1143 // gpr -> stack 1144 __ reg2mem_opt(src.first()->as_Register(), 1145 Address(Z_SP, reg2offset(dst.first()), false )); 1146 } else { 1147 if (dst.first()->is_Register()) { 1148 // gpr -> gpr 1149 __ move_reg_if_needed(dst.first()->as_Register(), T_INT, 1150 src.first()->as_Register(), T_INT); 1151 } else { 1152 if (VM_Version::has_FPSupportEnhancements()) { 1153 // gpr -> fpr. Exploit z10 capability of direct transfer. 1154 __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register()); 1155 } else { 1156 // gpr -> fpr. Use work space on stack to transfer data. 1157 Address stackaddr(Z_SP, workspace_offset); 1158 1159 __ reg2mem_opt(src.first()->as_Register(), stackaddr, false); 1160 __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false); 1161 } 1162 } 1163 } 1164 } else { 1165 if (dst.first()->is_stack()) { 1166 // fpr -> stack 1167 __ freg2mem_opt(src.first()->as_FloatRegister(), 1168 Address(Z_SP, reg2offset(dst.first())), false); 1169 } else { 1170 if (dst.first()->is_Register()) { 1171 if (VM_Version::has_FPSupportEnhancements()) { 1172 // fpr -> gpr. 1173 __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister()); 1174 } else { 1175 // fpr -> gpr. Use work space on stack to transfer data. 1176 Address stackaddr(Z_SP, workspace_offset); 1177 1178 __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false); 1179 __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false); 1180 } 1181 } else { 1182 // fpr -> fpr 1183 __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT, 1184 src.first()->as_FloatRegister(), T_FLOAT); 1185 } 1186 } 1187 } 1188 } 1189 1190 //---------------------------------------------------------------------- 1191 // A double arg. May have to do double reg to long reg conversion 1192 //---------------------------------------------------------------------- 1193 static void double_move(MacroAssembler *masm, 1194 VMRegPair src, 1195 VMRegPair dst, 1196 int framesize_in_slots, 1197 int workspace_slot_offset) { 1198 int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size; 1199 int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size; 1200 1201 // Since src is always a java calling convention we know that the 1202 // src pair is always either all registers or all stack (and aligned?) 1203 1204 if (src.first()->is_stack()) { 1205 if (dst.first()->is_stack()) { 1206 // stack -> stack. The easiest of the bunch. 1207 __ z_mvc(Address(Z_SP, reg2offset(dst.first())), 1208 Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double)); 1209 } else { 1210 // stack to reg 1211 Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset); 1212 1213 if (dst.first()->is_Register()) { 1214 __ mem2reg_opt(dst.first()->as_Register(), stackaddr); 1215 } else { 1216 __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr); 1217 } 1218 } 1219 } else if (src.first()->is_Register()) { 1220 if (dst.first()->is_stack()) { 1221 // gpr -> stack 1222 __ reg2mem_opt(src.first()->as_Register(), 1223 Address(Z_SP, reg2offset(dst.first()))); 1224 } else { 1225 if (dst.first()->is_Register()) { 1226 // gpr -> gpr 1227 __ move_reg_if_needed(dst.first()->as_Register(), T_LONG, 1228 src.first()->as_Register(), T_LONG); 1229 } else { 1230 if (VM_Version::has_FPSupportEnhancements()) { 1231 // gpr -> fpr. Exploit z10 capability of direct transfer. 1232 __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register()); 1233 } else { 1234 // gpr -> fpr. Use work space on stack to transfer data. 1235 Address stackaddr(Z_SP, workspace_offset); 1236 __ reg2mem_opt(src.first()->as_Register(), stackaddr); 1237 __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr); 1238 } 1239 } 1240 } 1241 } else { 1242 if (dst.first()->is_stack()) { 1243 // fpr -> stack 1244 __ freg2mem_opt(src.first()->as_FloatRegister(), 1245 Address(Z_SP, reg2offset(dst.first()))); 1246 } else { 1247 if (dst.first()->is_Register()) { 1248 if (VM_Version::has_FPSupportEnhancements()) { 1249 // fpr -> gpr. Exploit z10 capability of direct transfer. 1250 __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister()); 1251 } else { 1252 // fpr -> gpr. Use work space on stack to transfer data. 1253 Address stackaddr(Z_SP, workspace_offset); 1254 1255 __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr); 1256 __ mem2reg_opt(dst.first()->as_Register(), stackaddr); 1257 } 1258 } else { 1259 // fpr -> fpr 1260 // In theory these overlap but the ordering is such that this is likely a nop. 1261 __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE, 1262 src.first()->as_FloatRegister(), T_DOUBLE); 1263 } 1264 } 1265 } 1266 } 1267 1268 //---------------------------------------------------------------------- 1269 // A long arg. 1270 //---------------------------------------------------------------------- 1271 static void long_move(MacroAssembler *masm, 1272 VMRegPair src, 1273 VMRegPair dst, 1274 int framesize_in_slots) { 1275 int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size; 1276 1277 if (src.first()->is_stack()) { 1278 if (dst.first()->is_stack()) { 1279 // stack -> stack. The easiest of the bunch. 1280 __ z_mvc(Address(Z_SP, reg2offset(dst.first())), 1281 Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long)); 1282 } else { 1283 // stack to reg 1284 assert(dst.first()->is_Register(), "long dst value must be in GPR"); 1285 __ mem2reg_opt(dst.first()->as_Register(), 1286 Address(Z_SP, reg2offset(src.first()) + frame_offset)); 1287 } 1288 } else { 1289 // reg to reg 1290 assert(src.first()->is_Register(), "long src value must be in GPR"); 1291 if (dst.first()->is_stack()) { 1292 // reg -> stack 1293 __ reg2mem_opt(src.first()->as_Register(), 1294 Address(Z_SP, reg2offset(dst.first()))); 1295 } else { 1296 // reg -> reg 1297 assert(dst.first()->is_Register(), "long dst value must be in GPR"); 1298 __ move_reg_if_needed(dst.first()->as_Register(), 1299 T_LONG, src.first()->as_Register(), T_LONG); 1300 } 1301 } 1302 } 1303 1304 1305 //---------------------------------------------------------------------- 1306 // A int-like arg. 1307 //---------------------------------------------------------------------- 1308 // On z/Architecture we will store integer like items to the stack as 64 bit 1309 // items, according to the z/Architecture ABI, even though Java would only store 1310 // 32 bits for a parameter. 1311 // We do sign extension for all base types. That is ok since the only 1312 // unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int. 1313 // Sign extension 32->64 bit will thus not affect the value. 1314 //---------------------------------------------------------------------- 1315 static void move32_64(MacroAssembler *masm, 1316 VMRegPair src, 1317 VMRegPair dst, 1318 int framesize_in_slots) { 1319 int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size; 1320 1321 if (src.first()->is_stack()) { 1322 Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset); 1323 if (dst.first()->is_stack()) { 1324 // stack -> stack. MVC not possible due to sign extension. 1325 Address firstaddr(Z_SP, reg2offset(dst.first())); 1326 __ mem2reg_signed_opt(Z_R0_scratch, memaddr); 1327 __ reg2mem_opt(Z_R0_scratch, firstaddr); 1328 } else { 1329 // stack -> reg, sign extended 1330 __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr); 1331 } 1332 } else { 1333 if (dst.first()->is_stack()) { 1334 // reg -> stack, sign extended 1335 Address firstaddr(Z_SP, reg2offset(dst.first())); 1336 __ z_lgfr(src.first()->as_Register(), src.first()->as_Register()); 1337 __ reg2mem_opt(src.first()->as_Register(), firstaddr); 1338 } else { 1339 // reg -> reg, sign extended 1340 __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register()); 1341 } 1342 } 1343 } 1344 1345 //---------------------------------------------------------------------- 1346 // Wrap a JNI call. 1347 //---------------------------------------------------------------------- 1348 #undef USE_RESIZE_FRAME 1349 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm, 1350 const methodHandle& method, 1351 int compile_id, 1352 BasicType *in_sig_bt, 1353 VMRegPair *in_regs, 1354 BasicType ret_type) { 1355 int total_in_args = method->size_of_parameters(); 1356 if (method->is_method_handle_intrinsic()) { 1357 vmIntrinsics::ID iid = method->intrinsic_id(); 1358 intptr_t start = (intptr_t) __ pc(); 1359 int vep_offset = ((intptr_t) __ pc()) - start; 1360 1361 gen_special_dispatch(masm, total_in_args, 1362 method->intrinsic_id(), in_sig_bt, in_regs); 1363 1364 int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period. 1365 1366 __ flush(); 1367 1368 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // No out slots at all, actually. 1369 1370 return nmethod::new_native_nmethod(method, 1371 compile_id, 1372 masm->code(), 1373 vep_offset, 1374 frame_complete, 1375 stack_slots / VMRegImpl::slots_per_word, 1376 in_ByteSize(-1), 1377 in_ByteSize(-1), 1378 (OopMapSet *) nullptr); 1379 } 1380 1381 1382 /////////////////////////////////////////////////////////////////////// 1383 // 1384 // Precalculations before generating any code 1385 // 1386 /////////////////////////////////////////////////////////////////////// 1387 1388 address native_func = method->native_function(); 1389 assert(native_func != nullptr, "must have function"); 1390 1391 //--------------------------------------------------------------------- 1392 // We have received a description of where all the java args are located 1393 // on entry to the wrapper. We need to convert these args to where 1394 // the jni function will expect them. To figure out where they go 1395 // we convert the java signature to a C signature by inserting 1396 // the hidden arguments as arg[0] and possibly arg[1] (static method). 1397 // 1398 // The first hidden argument arg[0] is a pointer to the JNI environment. 1399 // It is generated for every call. 1400 // The second argument arg[1] to the JNI call, which is hidden for static 1401 // methods, is the boxed lock object. For static calls, the lock object 1402 // is the static method itself. The oop is constructed here. for instance 1403 // calls, the lock is performed on the object itself, the pointer of 1404 // which is passed as the first visible argument. 1405 //--------------------------------------------------------------------- 1406 1407 // Additionally, on z/Architecture we must convert integers 1408 // to longs in the C signature. We do this in advance in order to have 1409 // no trouble with indexes into the bt-arrays. 1410 // So convert the signature and registers now, and adjust the total number 1411 // of in-arguments accordingly. 1412 bool method_is_static = method->is_static(); 1413 int total_c_args = total_in_args + (method_is_static ? 2 : 1); 1414 1415 BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); 1416 VMRegPair *out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); 1417 1418 // Create the signature for the C call: 1419 // 1) add the JNIEnv* 1420 // 2) add the class if the method is static 1421 // 3) copy the rest of the incoming signature (shifted by the number of 1422 // hidden arguments) 1423 1424 int argc = 0; 1425 out_sig_bt[argc++] = T_ADDRESS; 1426 if (method->is_static()) { 1427 out_sig_bt[argc++] = T_OBJECT; 1428 } 1429 1430 for (int i = 0; i < total_in_args; i++) { 1431 out_sig_bt[argc++] = in_sig_bt[i]; 1432 } 1433 1434 /////////////////////////////////////////////////////////////////////// 1435 // Now figure out where the args must be stored and how much stack space 1436 // they require (neglecting out_preserve_stack_slots but providing space 1437 // for storing the first five register arguments). 1438 // It's weird, see int_stk_helper. 1439 /////////////////////////////////////////////////////////////////////// 1440 1441 //--------------------------------------------------------------------- 1442 // Compute framesize for the wrapper. 1443 // 1444 // - We need to handlize all oops passed in registers. 1445 // - We must create space for them here that is disjoint from the save area. 1446 // - We always just allocate 5 words for storing down these object. 1447 // This allows us to simply record the base and use the Ireg number to 1448 // decide which slot to use. 1449 // - Note that the reg number used to index the stack slot is the inbound 1450 // number, not the outbound number. 1451 // - We must shuffle args to match the native convention, 1452 // and to include var-args space. 1453 //--------------------------------------------------------------------- 1454 1455 //--------------------------------------------------------------------- 1456 // Calculate the total number of stack slots we will need: 1457 // - 1) abi requirements 1458 // - 2) outgoing args 1459 // - 3) space for inbound oop handle area 1460 // - 4) space for handlizing a klass if static method 1461 // - 5) space for a lock if synchronized method 1462 // - 6) workspace (save rtn value, int<->float reg moves, ...) 1463 // - 7) filler slots for alignment 1464 //--------------------------------------------------------------------- 1465 // Here is how the space we have allocated will look like. 1466 // Since we use resize_frame, we do not create a new stack frame, 1467 // but just extend the one we got with our own data area. 1468 // 1469 // If an offset or pointer name points to a separator line, it is 1470 // assumed that addressing with offset 0 selects storage starting 1471 // at the first byte above the separator line. 1472 // 1473 // 1474 // ... ... 1475 // | caller's frame | 1476 // FP-> |---------------------| 1477 // | filler slots, if any| 1478 // 7| #slots == mult of 2 | 1479 // |---------------------| 1480 // | work space | 1481 // 6| 2 slots = 8 bytes | 1482 // |---------------------| 1483 // 5| lock box (if sync) | 1484 // |---------------------| <- lock_slot_offset 1485 // 4| klass (if static) | 1486 // |---------------------| <- klass_slot_offset 1487 // 3| oopHandle area | 1488 // | | 1489 // | | 1490 // |---------------------| <- oop_handle_offset 1491 // 2| outbound memory | 1492 // ... ... 1493 // | based arguments | 1494 // |---------------------| 1495 // | vararg | 1496 // ... ... 1497 // | area | 1498 // |---------------------| <- out_arg_slot_offset 1499 // 1| out_preserved_slots | 1500 // ... ... 1501 // | (z_abi spec) | 1502 // SP-> |---------------------| <- FP_slot_offset (back chain) 1503 // ... ... 1504 // 1505 //--------------------------------------------------------------------- 1506 1507 // *_slot_offset indicates offset from SP in #stack slots 1508 // *_offset indicates offset from SP in #bytes 1509 1510 int stack_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args) + // 1+2 1511 SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention 1512 1513 // Now the space for the inbound oop handle area. 1514 int total_save_slots = Register::number_of_arg_registers * VMRegImpl::slots_per_word; 1515 1516 int oop_handle_slot_offset = stack_slots; 1517 stack_slots += total_save_slots; // 3) 1518 1519 int klass_slot_offset = 0; 1520 int klass_offset = -1; 1521 if (method_is_static) { // 4) 1522 klass_slot_offset = stack_slots; 1523 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; 1524 stack_slots += VMRegImpl::slots_per_word; 1525 } 1526 1527 int lock_slot_offset = 0; 1528 int lock_offset = -1; 1529 if (method->is_synchronized()) { // 5) 1530 lock_slot_offset = stack_slots; 1531 lock_offset = lock_slot_offset * VMRegImpl::stack_slot_size; 1532 stack_slots += VMRegImpl::slots_per_word; 1533 } 1534 1535 int workspace_slot_offset= stack_slots; // 6) 1536 stack_slots += 2; 1537 1538 // Now compute actual number of stack words we need. 1539 // Round to align stack properly. 1540 stack_slots = align_up(stack_slots, // 7) 1541 frame::alignment_in_bytes / VMRegImpl::stack_slot_size); 1542 int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size; 1543 1544 1545 /////////////////////////////////////////////////////////////////////// 1546 // Now we can start generating code 1547 /////////////////////////////////////////////////////////////////////// 1548 1549 unsigned int wrapper_CodeStart = __ offset(); 1550 unsigned int wrapper_UEPStart; 1551 unsigned int wrapper_VEPStart; 1552 unsigned int wrapper_FrameDone; 1553 unsigned int wrapper_CRegsSet; 1554 Label handle_pending_exception; 1555 1556 //--------------------------------------------------------------------- 1557 // Unverified entry point (UEP) 1558 //--------------------------------------------------------------------- 1559 1560 // check ic: object class <-> cached class 1561 if (!method_is_static) { 1562 wrapper_UEPStart = __ ic_check(CodeEntryAlignment /* end_alignment */); 1563 } 1564 1565 //--------------------------------------------------------------------- 1566 // Verified entry point (VEP) 1567 //--------------------------------------------------------------------- 1568 wrapper_VEPStart = __ offset(); 1569 1570 if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) { 1571 Label L_skip_barrier; 1572 Register klass = Z_R1_scratch; 1573 // Notify OOP recorder (don't need the relocation) 1574 AddressLiteral md = __ constant_metadata_address(method->method_holder()); 1575 __ load_const_optimized(klass, md.value()); 1576 __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/); 1577 1578 __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub()); 1579 __ z_br(klass); 1580 1581 __ bind(L_skip_barrier); 1582 } 1583 1584 __ save_return_pc(); 1585 __ generate_stack_overflow_check(frame_size_in_bytes); // Check before creating frame. 1586 #ifndef USE_RESIZE_FRAME 1587 __ push_frame(frame_size_in_bytes); // Create a new frame for the wrapper. 1588 #else 1589 __ resize_frame(-frame_size_in_bytes, Z_R0_scratch); // No new frame for the wrapper. 1590 // Just resize the existing one. 1591 #endif 1592 1593 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); 1594 bs->nmethod_entry_barrier(masm); 1595 1596 wrapper_FrameDone = __ offset(); 1597 1598 // Native nmethod wrappers never take possession of the oop arguments. 1599 // So the caller will gc the arguments. 1600 // The only thing we need an oopMap for is if the call is static. 1601 // 1602 // An OopMap for lock (and class if static), and one for the VM call itself 1603 OopMapSet *oop_maps = new OopMapSet(); 1604 OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); 1605 1606 ////////////////////////////////////////////////////////////////////// 1607 // 1608 // The Grand Shuffle 1609 // 1610 ////////////////////////////////////////////////////////////////////// 1611 // 1612 // We immediately shuffle the arguments so that for any vm call we have 1613 // to make from here on out (sync slow path, jvmti, etc.) we will have 1614 // captured the oops from our caller and have a valid oopMap for them. 1615 // 1616 //-------------------------------------------------------------------- 1617 // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* 1618 // (derived from JavaThread* which is in Z_thread) and, if static, 1619 // the class mirror instead of a receiver. This pretty much guarantees that 1620 // register layout will not match. We ignore these extra arguments during 1621 // the shuffle. The shuffle is described by the two calling convention 1622 // vectors we have in our possession. We simply walk the java vector to 1623 // get the source locations and the c vector to get the destinations. 1624 // 1625 // This is a trick. We double the stack slots so we can claim 1626 // the oops in the caller's frame. Since we are sure to have 1627 // more args than the caller doubling is enough to make 1628 // sure we can capture all the incoming oop args from the caller. 1629 //-------------------------------------------------------------------- 1630 1631 // Record sp-based slot for receiver on stack for non-static methods. 1632 int receiver_offset = -1; 1633 1634 //-------------------------------------------------------------------- 1635 // We move the arguments backwards because the floating point registers 1636 // destination will always be to a register with a greater or equal 1637 // register number or the stack. 1638 // jix is the index of the incoming Java arguments. 1639 // cix is the index of the outgoing C arguments. 1640 //-------------------------------------------------------------------- 1641 1642 #ifdef ASSERT 1643 bool reg_destroyed[Register::number_of_registers]; 1644 bool freg_destroyed[FloatRegister::number_of_registers]; 1645 for (int r = 0; r < Register::number_of_registers; r++) { 1646 reg_destroyed[r] = false; 1647 } 1648 for (int f = 0; f < FloatRegister::number_of_registers; f++) { 1649 freg_destroyed[f] = false; 1650 } 1651 #endif // ASSERT 1652 1653 for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) { 1654 #ifdef ASSERT 1655 if (in_regs[jix].first()->is_Register()) { 1656 assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!"); 1657 } else { 1658 if (in_regs[jix].first()->is_FloatRegister()) { 1659 assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!"); 1660 } 1661 } 1662 if (out_regs[cix].first()->is_Register()) { 1663 reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true; 1664 } else { 1665 if (out_regs[cix].first()->is_FloatRegister()) { 1666 freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true; 1667 } 1668 } 1669 #endif // ASSERT 1670 1671 switch (in_sig_bt[jix]) { 1672 // Due to casting, small integers should only occur in pairs with type T_LONG. 1673 case T_BOOLEAN: 1674 case T_CHAR: 1675 case T_BYTE: 1676 case T_SHORT: 1677 case T_INT: 1678 // Move int and do sign extension. 1679 move32_64(masm, in_regs[jix], out_regs[cix], stack_slots); 1680 break; 1681 1682 case T_LONG : 1683 long_move(masm, in_regs[jix], out_regs[cix], stack_slots); 1684 break; 1685 1686 case T_ARRAY: 1687 case T_OBJECT: 1688 object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix], 1689 ((jix == 0) && (!method_is_static)), 1690 &receiver_offset); 1691 break; 1692 case T_VOID: 1693 break; 1694 1695 case T_FLOAT: 1696 float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset); 1697 break; 1698 1699 case T_DOUBLE: 1700 assert(jix+1 < total_in_args && in_sig_bt[jix+1] == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list"); 1701 double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset); 1702 break; 1703 1704 case T_ADDRESS: 1705 assert(false, "found T_ADDRESS in java args"); 1706 break; 1707 1708 default: 1709 ShouldNotReachHere(); 1710 } 1711 } 1712 1713 //-------------------------------------------------------------------- 1714 // Pre-load a static method's oop into ARG2. 1715 // Used both by locking code and the normal JNI call code. 1716 //-------------------------------------------------------------------- 1717 if (method_is_static) { 1718 __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2); 1719 1720 // Now handlize the static class mirror in ARG2. It's known not-null. 1721 __ z_stg(Z_ARG2, klass_offset, Z_SP); 1722 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); 1723 __ add2reg(Z_ARG2, klass_offset, Z_SP); 1724 } 1725 1726 // Get JNIEnv* which is first argument to native. 1727 __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread); 1728 1729 ////////////////////////////////////////////////////////////////////// 1730 // We have all of the arguments setup at this point. 1731 // We MUST NOT touch any outgoing regs from this point on. 1732 // So if we must call out we must push a new frame. 1733 ////////////////////////////////////////////////////////////////////// 1734 1735 1736 // Calc the current pc into Z_R10 and into wrapper_CRegsSet. 1737 // Both values represent the same position. 1738 __ get_PC(Z_R10); // PC into register 1739 wrapper_CRegsSet = __ offset(); // and into into variable. 1740 1741 // Z_R10 now has the pc loaded that we will use when we finally call to native. 1742 1743 // We use the same pc/oopMap repeatedly when we call out. 1744 oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map); 1745 1746 // Lock a synchronized method. 1747 1748 if (method->is_synchronized()) { 1749 1750 // ATTENTION: args and Z_R10 must be preserved. 1751 Register r_oop = Z_R11; 1752 Register r_box = Z_R12; 1753 Register r_tmp1 = Z_R13; 1754 Register r_tmp2 = Z_R7; 1755 Label done; 1756 1757 // Load the oop for the object or class. R_carg2_classorobject contains 1758 // either the handlized oop from the incoming arguments or the handlized 1759 // class mirror (if the method is static). 1760 __ z_lg(r_oop, 0, Z_ARG2); 1761 1762 lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size); 1763 // Get the lock box slot's address. 1764 __ add2reg(r_box, lock_offset, Z_SP); 1765 1766 // Try fastpath for locking. 1767 if (LockingMode == LM_LIGHTWEIGHT) { 1768 // Fast_lock kills r_temp_1, r_temp_2. 1769 __ compiler_fast_lock_lightweight_object(r_oop, r_box, r_tmp1, r_tmp2); 1770 } else { 1771 // Fast_lock kills r_temp_1, r_temp_2. 1772 __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2); 1773 } 1774 __ z_bre(done); 1775 1776 //------------------------------------------------------------------------- 1777 // None of the above fast optimizations worked so we have to get into the 1778 // slow case of monitor enter. Inline a special case of call_VM that 1779 // disallows any pending_exception. 1780 //------------------------------------------------------------------------- 1781 1782 Register oldSP = Z_R11; 1783 1784 __ z_lgr(oldSP, Z_SP); 1785 1786 RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers); 1787 1788 // Prepare arguments for call. 1789 __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object. 1790 __ add2reg(Z_ARG2, lock_offset, oldSP); 1791 __ z_lgr(Z_ARG3, Z_thread); 1792 1793 __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */); 1794 1795 // Do the call. 1796 __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C)); 1797 __ call(Z_R1_scratch); 1798 1799 __ reset_last_Java_frame(); 1800 1801 RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers); 1802 #ifdef ASSERT 1803 { Label L; 1804 __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset())); 1805 __ z_bre(L); 1806 __ stop("no pending exception allowed on exit from IR::monitorenter"); 1807 __ bind(L); 1808 } 1809 #endif 1810 __ bind(done); 1811 } // lock for synchronized methods 1812 1813 1814 ////////////////////////////////////////////////////////////////////// 1815 // Finally just about ready to make the JNI call. 1816 ////////////////////////////////////////////////////////////////////// 1817 1818 // Use that pc we placed in Z_R10 a while back as the current frame anchor. 1819 __ set_last_Java_frame(Z_SP, Z_R10); 1820 1821 // Transition from _thread_in_Java to _thread_in_native. 1822 __ set_thread_state(_thread_in_native); 1823 1824 ////////////////////////////////////////////////////////////////////// 1825 // This is the JNI call. 1826 ////////////////////////////////////////////////////////////////////// 1827 1828 __ call_c(native_func); 1829 1830 1831 ////////////////////////////////////////////////////////////////////// 1832 // We have survived the call once we reach here. 1833 ////////////////////////////////////////////////////////////////////// 1834 1835 1836 //-------------------------------------------------------------------- 1837 // Unpack native results. 1838 //-------------------------------------------------------------------- 1839 // For int-types, we do any needed sign-extension required. 1840 // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2 1841 // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for 1842 // blocking or unlocking. 1843 // An OOP result (handle) is done specially in the slow-path code. 1844 //-------------------------------------------------------------------- 1845 switch (ret_type) { 1846 case T_VOID: break; // Nothing to do! 1847 case T_FLOAT: break; // Got it where we want it (unless slow-path) 1848 case T_DOUBLE: break; // Got it where we want it (unless slow-path) 1849 case T_LONG: break; // Got it where we want it (unless slow-path) 1850 case T_OBJECT: break; // Really a handle. 1851 // Cannot de-handlize until after reclaiming jvm_lock. 1852 case T_ARRAY: break; 1853 1854 case T_BOOLEAN: // 0 -> false(0); !0 -> true(1) 1855 __ z_lngfr(Z_RET, Z_RET); // Force sign bit on except for zero. 1856 __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos. 1857 break; 1858 case T_BYTE: __ z_lgbr(Z_RET, Z_RET); break; // sign extension 1859 case T_CHAR: __ z_llghr(Z_RET, Z_RET); break; // unsigned result 1860 case T_SHORT: __ z_lghr(Z_RET, Z_RET); break; // sign extension 1861 case T_INT: __ z_lgfr(Z_RET, Z_RET); break; // sign-extend for beauty. 1862 1863 default: 1864 ShouldNotReachHere(); 1865 break; 1866 } 1867 1868 // Switch thread to "native transition" state before reading the synchronization state. 1869 // This additional state is necessary because reading and testing the synchronization 1870 // state is not atomic w.r.t. GC, as this scenario demonstrates: 1871 // - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. 1872 // - VM thread changes sync state to synchronizing and suspends threads for GC. 1873 // - Thread A is resumed to finish this native method, but doesn't block here since it 1874 // didn't see any synchronization in progress, and escapes. 1875 1876 // Transition from _thread_in_native to _thread_in_native_trans. 1877 __ set_thread_state(_thread_in_native_trans); 1878 1879 // Safepoint synchronization 1880 //-------------------------------------------------------------------- 1881 // Must we block? 1882 //-------------------------------------------------------------------- 1883 // Block, if necessary, before resuming in _thread_in_Java state. 1884 // In order for GC to work, don't clear the last_Java_sp until after blocking. 1885 //-------------------------------------------------------------------- 1886 { 1887 Label no_block, sync; 1888 1889 save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg. 1890 1891 // Force this write out before the read below. 1892 if (!UseSystemMemoryBarrier) { 1893 __ z_fence(); 1894 } 1895 1896 __ safepoint_poll(sync, Z_R1); 1897 1898 __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset())); 1899 __ z_bre(no_block); 1900 1901 // Block. Save any potential method result value before the operation and 1902 // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this 1903 // lets us share the oopMap we used when we went native rather than create 1904 // a distinct one for this pc. 1905 // 1906 __ bind(sync); 1907 __ z_acquire(); 1908 1909 address entry_point = CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans); 1910 1911 __ call_VM_leaf(entry_point, Z_thread); 1912 1913 __ bind(no_block); 1914 restore_native_result(masm, ret_type, workspace_slot_offset); 1915 } 1916 1917 //-------------------------------------------------------------------- 1918 // Thread state is thread_in_native_trans. Any safepoint blocking has 1919 // already happened so we can now change state to _thread_in_Java. 1920 //-------------------------------------------------------------------- 1921 // Transition from _thread_in_native_trans to _thread_in_Java. 1922 __ set_thread_state(_thread_in_Java); 1923 1924 //-------------------------------------------------------------------- 1925 // Reguard any pages if necessary. 1926 // Protect native result from being destroyed. 1927 //-------------------------------------------------------------------- 1928 1929 Label no_reguard; 1930 1931 __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(StackOverflow::StackGuardState) - 1)), 1932 StackOverflow::stack_guard_yellow_reserved_disabled); 1933 1934 __ z_bre(no_reguard); 1935 1936 save_native_result(masm, ret_type, workspace_slot_offset); 1937 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method); 1938 restore_native_result(masm, ret_type, workspace_slot_offset); 1939 1940 __ bind(no_reguard); 1941 1942 1943 // Synchronized methods (slow path only) 1944 // No pending exceptions for now. 1945 //-------------------------------------------------------------------- 1946 // Handle possibly pending exception (will unlock if necessary). 1947 // Native result is, if any is live, in Z_FRES or Z_RES. 1948 //-------------------------------------------------------------------- 1949 // Unlock 1950 //-------------------------------------------------------------------- 1951 if (method->is_synchronized()) { 1952 const Register r_oop = Z_R11; 1953 const Register r_box = Z_R12; 1954 const Register r_tmp1 = Z_R13; 1955 const Register r_tmp2 = Z_R7; 1956 Label done; 1957 1958 // Get unboxed oop of class mirror or object ... 1959 int offset = method_is_static ? klass_offset : receiver_offset; 1960 1961 assert(offset != -1, ""); 1962 __ z_lg(r_oop, offset, Z_SP); 1963 1964 // ... and address of lock object box. 1965 __ add2reg(r_box, lock_offset, Z_SP); 1966 1967 // Try fastpath for unlocking. 1968 if (LockingMode == LM_LIGHTWEIGHT) { 1969 // Fast_unlock kills r_tmp1, r_tmp2. 1970 __ compiler_fast_unlock_lightweight_object(r_oop, r_box, r_tmp1, r_tmp2); 1971 } else { 1972 // Fast_unlock kills r_tmp1, r_tmp2. 1973 __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2); 1974 } 1975 __ z_bre(done); 1976 1977 // Slow path for unlocking. 1978 // Save and restore any potential method result value around the unlocking operation. 1979 const Register R_exc = Z_R11; 1980 1981 save_native_result(masm, ret_type, workspace_slot_offset); 1982 1983 // Must save pending exception around the slow-path VM call. Since it's a 1984 // leaf call, the pending exception (if any) can be kept in a register. 1985 __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset())); 1986 assert(R_exc->is_nonvolatile(), "exception register must be non-volatile"); 1987 1988 // Must clear pending-exception before re-entering the VM. Since this is 1989 // a leaf call, pending-exception-oop can be safely kept in a register. 1990 __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t)); 1991 1992 // Inline a special case of call_VM that disallows any pending_exception. 1993 1994 // Get locked oop from the handle we passed to jni. 1995 __ z_lg(Z_ARG1, offset, Z_SP); 1996 __ add2reg(Z_ARG2, lock_offset, Z_SP); 1997 __ z_lgr(Z_ARG3, Z_thread); 1998 1999 __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)); 2000 2001 __ call(Z_R1_scratch); 2002 2003 #ifdef ASSERT 2004 { 2005 Label L; 2006 __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset())); 2007 __ z_bre(L); 2008 __ stop("no pending exception allowed on exit from IR::monitorexit"); 2009 __ bind(L); 2010 } 2011 #endif 2012 2013 // Check_forward_pending_exception jump to forward_exception if any pending 2014 // exception is set. The forward_exception routine expects to see the 2015 // exception in pending_exception and not in a register. Kind of clumsy, 2016 // since all folks who branch to forward_exception must have tested 2017 // pending_exception first and hence have it in a register already. 2018 __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset())); 2019 restore_native_result(masm, ret_type, workspace_slot_offset); 2020 __ z_bru(done); 2021 __ z_illtrap(0x66); 2022 2023 __ bind(done); 2024 } 2025 2026 2027 //-------------------------------------------------------------------- 2028 // Clear "last Java frame" SP and PC. 2029 //-------------------------------------------------------------------- 2030 2031 __ reset_last_Java_frame(); 2032 2033 // Unpack oop result, e.g. JNIHandles::resolve result. 2034 if (is_reference_type(ret_type)) { 2035 __ resolve_jobject(Z_RET, /* tmp1 */ Z_R13, /* tmp2 */ Z_R7); 2036 } 2037 2038 if (CheckJNICalls) { 2039 // clear_pending_jni_exception_check 2040 __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop)); 2041 } 2042 2043 // Reset handle block. 2044 __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset())); 2045 __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset()), 4); 2046 2047 // Check for pending exceptions. 2048 __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset())); 2049 __ z_brne(handle_pending_exception); 2050 2051 2052 ////////////////////////////////////////////////////////////////////// 2053 // Return 2054 ////////////////////////////////////////////////////////////////////// 2055 2056 2057 #ifndef USE_RESIZE_FRAME 2058 __ pop_frame(); // Pop wrapper frame. 2059 #else 2060 __ resize_frame(frame_size_in_bytes, Z_R0_scratch); // Revert stack extension. 2061 #endif 2062 __ restore_return_pc(); // This is the way back to the caller. 2063 __ z_br(Z_R14); 2064 2065 2066 ////////////////////////////////////////////////////////////////////// 2067 // Out-of-line calls to the runtime. 2068 ////////////////////////////////////////////////////////////////////// 2069 2070 2071 //--------------------------------------------------------------------- 2072 // Handler for pending exceptions (out-of-line). 2073 //--------------------------------------------------------------------- 2074 // Since this is a native call, we know the proper exception handler 2075 // is the empty function. We just pop this frame and then jump to 2076 // forward_exception_entry. Z_R14 will contain the native caller's 2077 // return PC. 2078 __ bind(handle_pending_exception); 2079 __ pop_frame(); 2080 __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry()); 2081 __ restore_return_pc(); 2082 __ z_br(Z_R1_scratch); 2083 2084 __ flush(); 2085 ////////////////////////////////////////////////////////////////////// 2086 // end of code generation 2087 ////////////////////////////////////////////////////////////////////// 2088 2089 2090 nmethod *nm = nmethod::new_native_nmethod(method, 2091 compile_id, 2092 masm->code(), 2093 (int)(wrapper_VEPStart-wrapper_CodeStart), 2094 (int)(wrapper_FrameDone-wrapper_CodeStart), 2095 stack_slots / VMRegImpl::slots_per_word, 2096 (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), 2097 in_ByteSize(lock_offset), 2098 oop_maps); 2099 2100 return nm; 2101 } 2102 2103 static address gen_c2i_adapter(MacroAssembler *masm, 2104 int total_args_passed, 2105 int comp_args_on_stack, 2106 const BasicType *sig_bt, 2107 const VMRegPair *regs, 2108 Label &skip_fixup) { 2109 // Before we get into the guts of the C2I adapter, see if we should be here 2110 // at all. We've come from compiled code and are attempting to jump to the 2111 // interpreter, which means the caller made a static call to get here 2112 // (vcalls always get a compiled target if there is one). Check for a 2113 // compiled target. If there is one, we need to patch the caller's call. 2114 2115 // These two defs MUST MATCH code in gen_i2c2i_adapter! 2116 const Register ientry = Z_R11; 2117 const Register code = Z_R11; 2118 2119 address c2i_entrypoint; 2120 Label patch_callsite; 2121 2122 // Regular (verified) c2i entry point. 2123 c2i_entrypoint = __ pc(); 2124 2125 // Call patching needed? 2126 __ load_and_test_long(Z_R0_scratch, method_(code)); 2127 __ z_lg(ientry, method_(interpreter_entry)); // Preload interpreter entry (also if patching). 2128 __ z_brne(patch_callsite); // Patch required if code isn't null (compiled target exists). 2129 2130 __ bind(skip_fixup); // Return point from patch_callsite. 2131 2132 // Since all args are passed on the stack, total_args_passed*wordSize is the 2133 // space we need. We need ABI scratch area but we use the caller's since 2134 // it has already been allocated. 2135 2136 const int abi_scratch = frame::z_top_ijava_frame_abi_size; 2137 int extraspace = align_up(total_args_passed, 2)*wordSize + abi_scratch; 2138 Register sender_SP = Z_R10; 2139 Register value = Z_R12; 2140 2141 // Remember the senderSP so we can pop the interpreter arguments off of the stack. 2142 // In addition, frame manager expects initial_caller_sp in Z_R10. 2143 __ z_lgr(sender_SP, Z_SP); 2144 2145 // This should always fit in 14 bit immediate. 2146 __ resize_frame(-extraspace, Z_R0_scratch); 2147 2148 // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial 2149 // args. This essentially moves the callers ABI scratch area from the top to the 2150 // bottom of the arg area. 2151 2152 int st_off = extraspace - wordSize; 2153 2154 // Now write the args into the outgoing interpreter space. 2155 for (int i = 0; i < total_args_passed; i++) { 2156 VMReg r_1 = regs[i].first(); 2157 VMReg r_2 = regs[i].second(); 2158 if (!r_1->is_valid()) { 2159 assert(!r_2->is_valid(), ""); 2160 continue; 2161 } 2162 if (r_1->is_stack()) { 2163 // The calling convention produces OptoRegs that ignore the preserve area (abi scratch). 2164 // We must account for it here. 2165 int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 2166 2167 if (!r_2->is_valid()) { 2168 __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*)); 2169 } else { 2170 // longs are given 2 64-bit slots in the interpreter, 2171 // but the data is passed in only 1 slot. 2172 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { 2173 #ifdef ASSERT 2174 __ clear_mem(Address(Z_SP, st_off), sizeof(void *)); 2175 #endif 2176 st_off -= wordSize; 2177 } 2178 __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*)); 2179 } 2180 } else { 2181 if (r_1->is_Register()) { 2182 if (!r_2->is_valid()) { 2183 __ z_st(r_1->as_Register(), st_off, Z_SP); 2184 } else { 2185 // longs are given 2 64-bit slots in the interpreter, but the 2186 // data is passed in only 1 slot. 2187 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { 2188 #ifdef ASSERT 2189 __ clear_mem(Address(Z_SP, st_off), sizeof(void *)); 2190 #endif 2191 st_off -= wordSize; 2192 } 2193 __ z_stg(r_1->as_Register(), st_off, Z_SP); 2194 } 2195 } else { 2196 assert(r_1->is_FloatRegister(), ""); 2197 if (!r_2->is_valid()) { 2198 __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP); 2199 } else { 2200 // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the 2201 // data is passed in only 1 slot. 2202 // One of these should get known junk... 2203 #ifdef ASSERT 2204 __ z_lzdr(Z_F1); 2205 __ z_std(Z_F1, st_off, Z_SP); 2206 #endif 2207 st_off-=wordSize; 2208 __ z_std(r_1->as_FloatRegister(), st_off, Z_SP); 2209 } 2210 } 2211 } 2212 st_off -= wordSize; 2213 } 2214 2215 2216 // Jump to the interpreter just as if interpreter was doing it. 2217 __ add2reg(Z_esp, st_off, Z_SP); 2218 2219 // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10. 2220 __ z_br(ientry); 2221 2222 2223 // Prevent illegal entry to out-of-line code. 2224 __ z_illtrap(0x22); 2225 2226 // Generate out-of-line runtime call to patch caller, 2227 // then continue as interpreted. 2228 2229 // IF you lose the race you go interpreted. 2230 // We don't see any possible endless c2i -> i2c -> c2i ... 2231 // transitions no matter how rare. 2232 __ bind(patch_callsite); 2233 2234 RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers); 2235 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14); 2236 RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers); 2237 __ z_bru(skip_fixup); 2238 2239 // end of out-of-line code 2240 2241 return c2i_entrypoint; 2242 } 2243 2244 // On entry, the following registers are set 2245 // 2246 // Z_thread r8 - JavaThread* 2247 // Z_method r9 - callee's method (method to be invoked) 2248 // Z_esp r7 - operand (or expression) stack pointer of caller. one slot above last arg. 2249 // Z_SP r15 - SP prepared by call stub such that caller's outgoing args are near top 2250 // 2251 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, 2252 int total_args_passed, 2253 int comp_args_on_stack, 2254 const BasicType *sig_bt, 2255 const VMRegPair *regs) { 2256 const Register value = Z_R12; 2257 const Register ld_ptr= Z_esp; 2258 2259 int ld_offset = total_args_passed * wordSize; 2260 2261 // Cut-out for having no stack args. 2262 if (comp_args_on_stack) { 2263 // Sig words on the stack are greater than VMRegImpl::stack0. Those in 2264 // registers are below. By subtracting stack0, we either get a negative 2265 // number (all values in registers) or the maximum stack slot accessed. 2266 // Convert VMRegImpl (4 byte) stack slots to words. 2267 int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord; 2268 // Round up to miminum stack alignment, in wordSize 2269 comp_words_on_stack = align_up(comp_words_on_stack, 2); 2270 2271 __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch); 2272 } 2273 2274 // Now generate the shuffle code. Pick up all register args and move the 2275 // rest through register value=Z_R12. 2276 for (int i = 0; i < total_args_passed; i++) { 2277 if (sig_bt[i] == T_VOID) { 2278 assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); 2279 continue; 2280 } 2281 2282 // Pick up 0, 1 or 2 words from ld_ptr. 2283 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), 2284 "scrambled load targets?"); 2285 VMReg r_1 = regs[i].first(); 2286 VMReg r_2 = regs[i].second(); 2287 if (!r_1->is_valid()) { 2288 assert(!r_2->is_valid(), ""); 2289 continue; 2290 } 2291 if (r_1->is_FloatRegister()) { 2292 if (!r_2->is_valid()) { 2293 __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr); 2294 ld_offset-=wordSize; 2295 } else { 2296 // Skip the unused interpreter slot. 2297 __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr); 2298 ld_offset -= 2 * wordSize; 2299 } 2300 } else { 2301 if (r_1->is_stack()) { 2302 // Must do a memory to memory move. 2303 int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; 2304 2305 if (!r_2->is_valid()) { 2306 __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*)); 2307 } else { 2308 // In 64bit, longs are given 2 64-bit slots in the interpreter, but the 2309 // data is passed in only 1 slot. 2310 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { 2311 ld_offset -= wordSize; 2312 } 2313 __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*)); 2314 } 2315 } else { 2316 if (!r_2->is_valid()) { 2317 // Not sure we need to do this but it shouldn't hurt. 2318 if (is_reference_type(sig_bt[i]) || sig_bt[i] == T_ADDRESS) { 2319 __ z_lg(r_1->as_Register(), ld_offset, ld_ptr); 2320 } else { 2321 __ z_l(r_1->as_Register(), ld_offset, ld_ptr); 2322 } 2323 } else { 2324 // In 64bit, longs are given 2 64-bit slots in the interpreter, but the 2325 // data is passed in only 1 slot. 2326 if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) { 2327 ld_offset -= wordSize; 2328 } 2329 __ z_lg(r_1->as_Register(), ld_offset, ld_ptr); 2330 } 2331 } 2332 ld_offset -= wordSize; 2333 } 2334 } 2335 2336 // Jump to the compiled code just as if compiled code was doing it. 2337 // load target address from method: 2338 __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset())); 2339 2340 // Store method into thread->callee_target. 2341 // 6243940: We might end up in handle_wrong_method if 2342 // the callee is deoptimized as we race thru here. If that 2343 // happens we don't want to take a safepoint because the 2344 // caller frame will look interpreted and arguments are now 2345 // "compiled" so it is much better to make this transition 2346 // invisible to the stack walking code. Unfortunately, if 2347 // we try and find the callee by normal means a safepoint 2348 // is possible. So we stash the desired callee in the thread 2349 // and the vm will find it there should this case occur. 2350 __ z_stg(Z_method, thread_(callee_target)); 2351 2352 __ z_br(Z_R1_scratch); 2353 } 2354 2355 void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, 2356 int total_args_passed, 2357 int comp_args_on_stack, 2358 const BasicType *sig_bt, 2359 const VMRegPair *regs, 2360 AdapterHandlerEntry* handler) { 2361 __ align(CodeEntryAlignment); 2362 address i2c_entry = __ pc(); 2363 gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); 2364 2365 address c2i_unverified_entry; 2366 2367 Label skip_fixup; 2368 { 2369 Label ic_miss; 2370 2371 // Out-of-line call to ic_miss handler. 2372 __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch); 2373 2374 // Unverified Entry Point UEP 2375 __ align(CodeEntryAlignment); 2376 c2i_unverified_entry = __ pc(); 2377 2378 __ ic_check(2); 2379 __ z_lg(Z_method, Address(Z_inline_cache, CompiledICData::speculated_method_offset())); 2380 // This def MUST MATCH code in gen_c2i_adapter! 2381 const Register code = Z_R11; 2382 2383 __ load_and_test_long(Z_R0, method_(code)); 2384 __ z_brne(ic_miss); // Cache miss: call runtime to handle this. 2385 2386 // Fallthru to VEP. Duplicate LTG, but saved taken branch. 2387 } 2388 2389 address c2i_entry = __ pc(); 2390 2391 // Class initialization barrier for static methods 2392 address c2i_no_clinit_check_entry = nullptr; 2393 if (VM_Version::supports_fast_class_init_checks()) { 2394 Label L_skip_barrier; 2395 2396 { // Bypass the barrier for non-static methods 2397 __ testbit_ushort(Address(Z_method, Method::access_flags_offset()), JVM_ACC_STATIC_BIT); 2398 __ z_bfalse(L_skip_barrier); // non-static 2399 } 2400 2401 Register klass = Z_R11; 2402 __ load_method_holder(klass, Z_method); 2403 __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/); 2404 2405 __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub()); 2406 __ z_br(klass); 2407 2408 __ bind(L_skip_barrier); 2409 c2i_no_clinit_check_entry = __ pc(); 2410 } 2411 2412 gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); 2413 2414 handler->set_entry_points(i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry); 2415 return; 2416 } 2417 2418 // This function returns the adjust size (in number of words) to a c2i adapter 2419 // activation for use during deoptimization. 2420 // 2421 // Actually only compiled frames need to be adjusted, but it 2422 // doesn't harm to adjust entry and interpreter frames, too. 2423 // 2424 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { 2425 assert(callee_locals >= callee_parameters, 2426 "test and remove; got more parms than locals"); 2427 // Handle the abi adjustment here instead of doing it in push_skeleton_frames. 2428 return (callee_locals - callee_parameters) * Interpreter::stackElementWords + 2429 frame::z_parent_ijava_frame_abi_size / BytesPerWord; 2430 } 2431 2432 uint SharedRuntime::in_preserve_stack_slots() { 2433 return frame::jit_in_preserve_size_in_4_byte_units; 2434 } 2435 2436 uint SharedRuntime::out_preserve_stack_slots() { 2437 return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size; 2438 } 2439 2440 VMReg SharedRuntime::thread_register() { 2441 Unimplemented(); 2442 return nullptr; 2443 } 2444 2445 // 2446 // Frame generation for deopt and uncommon trap blobs. 2447 // 2448 static void push_skeleton_frame(MacroAssembler* masm, 2449 /* Unchanged */ 2450 Register frame_sizes_reg, 2451 Register pcs_reg, 2452 /* Invalidate */ 2453 Register frame_size_reg, 2454 Register pc_reg) { 2455 BLOCK_COMMENT(" push_skeleton_frame {"); 2456 __ z_lg(pc_reg, 0, pcs_reg); 2457 __ z_lg(frame_size_reg, 0, frame_sizes_reg); 2458 __ z_stg(pc_reg, _z_abi(return_pc), Z_SP); 2459 Register fp = pc_reg; 2460 __ push_frame(frame_size_reg, fp); 2461 #ifdef ASSERT 2462 // The magic is required for successful walking skeletal frames. 2463 __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number); 2464 __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp); 2465 // Fill other slots that are supposedly not necessary with eye catchers. 2466 __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1); 2467 __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp); 2468 // The sender_sp of the bottom frame is set before pushing it. 2469 // The sender_sp of non bottom frames is their caller's top_frame_sp, which 2470 // is unknown here. Luckily it is not needed before filling the frame in 2471 // layout_activation(), we assert this by setting an eye catcher (see 2472 // comments on sender_sp in frame_s390.hpp). 2473 __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP); 2474 #endif // ASSERT 2475 BLOCK_COMMENT(" } push_skeleton_frame"); 2476 } 2477 2478 // Loop through the UnrollBlock info and create new frames. 2479 static void push_skeleton_frames(MacroAssembler* masm, bool deopt, 2480 /* read */ 2481 Register unroll_block_reg, 2482 /* invalidate */ 2483 Register frame_sizes_reg, 2484 Register number_of_frames_reg, 2485 Register pcs_reg, 2486 Register tmp1, 2487 Register tmp2) { 2488 BLOCK_COMMENT("push_skeleton_frames {"); 2489 // _number_of_frames is of type int (deoptimization.hpp). 2490 __ z_lgf(number_of_frames_reg, 2491 Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset())); 2492 __ z_lg(pcs_reg, 2493 Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset())); 2494 __ z_lg(frame_sizes_reg, 2495 Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset())); 2496 2497 // stack: (caller_of_deoptee, ...). 2498 2499 // If caller_of_deoptee is a compiled frame, then we extend it to make 2500 // room for the callee's locals and the frame::z_parent_ijava_frame_abi. 2501 // See also Deoptimization::last_frame_adjust() above. 2502 // Note: entry and interpreted frames are adjusted, too. But this doesn't harm. 2503 2504 __ z_lgf(Z_R1_scratch, 2505 Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset())); 2506 __ z_lgr(tmp1, Z_SP); // Save the sender sp before extending the frame. 2507 __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/); 2508 // The oldest skeletal frame requires a valid sender_sp to make it walkable 2509 // (it is required to find the original pc of caller_of_deoptee if it is marked 2510 // for deoptimization - see nmethod::orig_pc_addr()). 2511 __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP); 2512 2513 // Now push the new interpreter frames. 2514 Label loop, loop_entry; 2515 2516 // Make sure that there is at least one entry in the array. 2517 DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg)); 2518 __ asm_assert(Assembler::bcondNotZero, "array_size must be > 0", 0x205); 2519 2520 __ z_bru(loop_entry); 2521 2522 __ bind(loop); 2523 2524 __ add2reg(frame_sizes_reg, wordSize); 2525 __ add2reg(pcs_reg, wordSize); 2526 2527 __ bind(loop_entry); 2528 2529 // Allocate a new frame, fill in the pc. 2530 push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2); 2531 2532 __ z_aghi(number_of_frames_reg, -1); // Emit AGHI, because it sets the condition code 2533 __ z_brne(loop); 2534 2535 // Set the top frame's return pc. 2536 __ add2reg(pcs_reg, wordSize); 2537 __ z_lg(Z_R0_scratch, 0, pcs_reg); 2538 __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP); 2539 BLOCK_COMMENT("} push_skeleton_frames"); 2540 } 2541 2542 //------------------------------generate_deopt_blob---------------------------- 2543 void SharedRuntime::generate_deopt_blob() { 2544 // Allocate space for the code. 2545 ResourceMark rm; 2546 // Setup code generation tools. 2547 const char* name = SharedRuntime::stub_name(SharedStubId::deopt_id); 2548 CodeBuffer buffer(name, 2048, 1024); 2549 InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer); 2550 Label exec_mode_initialized; 2551 OopMap* map = nullptr; 2552 OopMapSet *oop_maps = new OopMapSet(); 2553 2554 unsigned int start_off = __ offset(); 2555 Label cont; 2556 2557 // -------------------------------------------------------------------------- 2558 // Normal entry (non-exception case) 2559 // 2560 // We have been called from the deopt handler of the deoptee. 2561 // Z_R14 points behind the call in the deopt handler. We adjust 2562 // it such that it points to the start of the deopt handler. 2563 // The return_pc has been stored in the frame of the deoptee and 2564 // will replace the address of the deopt_handler in the call 2565 // to Deoptimization::fetch_unroll_info below. 2566 // The (int) cast is necessary, because -((unsigned int)14) 2567 // is an unsigned int. 2568 __ add2reg(Z_R14, -(int)NativeCall::max_instruction_size()); 2569 2570 const Register exec_mode_reg = Z_tmp_1; 2571 2572 // stack: (deoptee, caller of deoptee, ...) 2573 2574 // pushes an "unpack" frame 2575 // R14 contains the return address pointing into the deoptimized 2576 // nmethod that was valid just before the nmethod was deoptimized. 2577 // save R14 into the deoptee frame. the `fetch_unroll_info' 2578 // procedure called below will read it from there. 2579 map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers); 2580 2581 // note the entry point. 2582 __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt); 2583 __ z_bru(exec_mode_initialized); 2584 2585 #ifndef COMPILER1 2586 int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap 2587 #else 2588 // -------------------------------------------------------------------------- 2589 // Reexecute entry 2590 // - Z_R14 = Deopt Handler in nmethod 2591 2592 int reexecute_offset = __ offset() - start_off; 2593 2594 // No need to update map as each call to save_live_registers will produce identical oopmap 2595 (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers); 2596 2597 __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute); 2598 __ z_bru(exec_mode_initialized); 2599 #endif 2600 2601 2602 // -------------------------------------------------------------------------- 2603 // Exception entry. We reached here via a branch. Registers on entry: 2604 // - Z_EXC_OOP (Z_ARG1) = exception oop 2605 // - Z_EXC_PC (Z_ARG2) = the exception pc. 2606 2607 int exception_offset = __ offset() - start_off; 2608 2609 // all registers are dead at this entry point, except for Z_EXC_OOP, and 2610 // Z_EXC_PC which contain the exception oop and exception pc 2611 // respectively. Set them in TLS and fall thru to the 2612 // unpack_with_exception_in_tls entry point. 2613 2614 // Store exception oop and pc in thread (location known to GC). 2615 // Need this since the call to "fetch_unroll_info()" may safepoint. 2616 __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset())); 2617 __ z_stg(Z_EXC_PC, Address(Z_thread, JavaThread::exception_pc_offset())); 2618 2619 // fall through 2620 2621 int exception_in_tls_offset = __ offset() - start_off; 2622 2623 // new implementation because exception oop is now passed in JavaThread 2624 2625 // Prolog for exception case 2626 // All registers must be preserved because they might be used by LinearScan 2627 // Exceptiop oop and throwing PC are passed in JavaThread 2628 2629 // load throwing pc from JavaThread and us it as the return address of the current frame. 2630 __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset())); 2631 2632 // Save everything in sight. 2633 (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch); 2634 2635 // Now it is safe to overwrite any register 2636 2637 // Clear the exception pc field in JavaThread 2638 __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8); 2639 2640 // Deopt during an exception. Save exec mode for unpack_frames. 2641 __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception); 2642 2643 2644 #ifdef ASSERT 2645 // verify that there is really an exception oop in JavaThread 2646 __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset())); 2647 __ MacroAssembler::verify_oop(Z_ARG1, FILE_AND_LINE); 2648 2649 // verify that there is no pending exception 2650 __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread, 2651 "must not have pending exception here", __LINE__); 2652 #endif 2653 2654 // -------------------------------------------------------------------------- 2655 // At this point, the live registers are saved and 2656 // the exec_mode_reg has been set up correctly. 2657 __ bind(exec_mode_initialized); 2658 2659 // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...). 2660 2661 const Register unroll_block_reg = Z_tmp_2; 2662 2663 // we need to set `last_Java_frame' because `fetch_unroll_info' will 2664 // call `last_Java_frame()'. however we can't block and no gc will 2665 // occur so we don't need an oopmap. the value of the pc in the 2666 // frame is not particularly important. it just needs to identify the blob. 2667 2668 // Don't set last_Java_pc anymore here (is implicitly null then). 2669 // the correct PC is retrieved in pd_last_frame() in that case. 2670 __ set_last_Java_frame(/*sp*/Z_SP, noreg); 2671 // With EscapeAnalysis turned on, this call may safepoint 2672 // despite it's marked as "leaf call"! 2673 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg); 2674 // Set an oopmap for the call site this describes all our saved volatile registers 2675 int oop_map_offs = __ offset(); 2676 oop_maps->add_gc_map(oop_map_offs, map); 2677 2678 __ reset_last_Java_frame(); 2679 // save the return value. 2680 __ z_lgr(unroll_block_reg, Z_RET); 2681 // restore the return registers that have been saved 2682 // (among other registers) by save_live_registers(...). 2683 RegisterSaver::restore_result_registers(masm); 2684 2685 // reload the exec mode from the UnrollBlock (it might have changed) 2686 __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset())); 2687 2688 // In excp_deopt_mode, restore and clear exception oop which we 2689 // stored in the thread during exception entry above. The exception 2690 // oop will be the return value of this stub. 2691 NearLabel skip_restore_excp; 2692 __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp); 2693 __ z_lg(Z_RET, thread_(exception_oop)); 2694 __ clear_mem(thread_(exception_oop), 8); 2695 __ bind(skip_restore_excp); 2696 2697 // remove the "unpack" frame 2698 __ pop_frame(); 2699 2700 // stack: (deoptee, caller of deoptee, ...). 2701 2702 // pop the deoptee's frame 2703 __ pop_frame(); 2704 2705 // stack: (caller_of_deoptee, ...). 2706 2707 // loop through the `UnrollBlock' info and create interpreter frames. 2708 push_skeleton_frames(masm, true/*deopt*/, 2709 unroll_block_reg, 2710 Z_tmp_3, 2711 Z_tmp_4, 2712 Z_ARG5, 2713 Z_ARG4, 2714 Z_ARG3); 2715 2716 // stack: (skeletal interpreter frame, ..., optional skeletal 2717 // interpreter frame, caller of deoptee, ...). 2718 2719 // push an "unpack" frame taking care of float / int return values. 2720 __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)); 2721 2722 // stack: (unpack frame, skeletal interpreter frame, ..., optional 2723 // skeletal interpreter frame, caller of deoptee, ...). 2724 2725 // spill live volatile registers since we'll do a call. 2726 __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP); 2727 __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP); 2728 2729 // let the unpacker layout information in the skeletal frames just allocated. 2730 __ get_PC(Z_RET, oop_map_offs - __ offset()); 2731 __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET); 2732 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), 2733 Z_thread/*thread*/, exec_mode_reg/*exec_mode*/); 2734 2735 __ reset_last_Java_frame(); 2736 2737 // restore the volatiles saved above. 2738 __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP); 2739 __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP); 2740 2741 // pop the "unpack" frame. 2742 __ pop_frame(); 2743 __ restore_return_pc(); 2744 2745 // stack: (top interpreter frame, ..., optional interpreter frame, 2746 // caller of deoptee, ...). 2747 2748 __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer 2749 __ restore_bcp(); 2750 __ restore_locals(); 2751 __ restore_esp(); 2752 2753 // return to the interpreter entry point. 2754 __ z_br(Z_R14); 2755 2756 // Make sure all code is generated 2757 masm->flush(); 2758 2759 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize); 2760 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); 2761 } 2762 2763 2764 #ifdef COMPILER2 2765 //------------------------------generate_uncommon_trap_blob-------------------- 2766 void OptoRuntime::generate_uncommon_trap_blob() { 2767 // Allocate space for the code 2768 ResourceMark rm; 2769 // Setup code generation tools 2770 CodeBuffer buffer("uncommon_trap_blob", 2048, 1024); 2771 InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer); 2772 2773 Register unroll_block_reg = Z_tmp_1; 2774 Register klass_index_reg = Z_ARG2; 2775 Register unc_trap_reg = Z_ARG2; 2776 2777 // stack: (deoptee, caller_of_deoptee, ...). 2778 2779 // push a dummy "unpack" frame and call 2780 // `Deoptimization::uncommon_trap' to pack the compiled frame into a 2781 // vframe array and return the `UnrollBlock' information. 2782 2783 // save R14 to compiled frame. 2784 __ save_return_pc(); 2785 // push the "unpack_frame". 2786 __ push_frame_abi160(0); 2787 2788 // stack: (unpack frame, deoptee, caller_of_deoptee, ...). 2789 2790 // set the "unpack" frame as last_Java_frame. 2791 // `Deoptimization::uncommon_trap' expects it and considers its 2792 // sender frame as the deoptee frame. 2793 __ get_PC(Z_R1_scratch); 2794 __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch); 2795 2796 __ z_lgr(klass_index_reg, Z_ARG1); // passed implicitly as ARG2 2797 __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap); // passed implicitly as ARG3 2798 BLOCK_COMMENT("call Deoptimization::uncommon_trap()"); 2799 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread); 2800 2801 __ reset_last_Java_frame(); 2802 2803 // pop the "unpack" frame 2804 __ pop_frame(); 2805 2806 // stack: (deoptee, caller_of_deoptee, ...). 2807 2808 // save the return value. 2809 __ z_lgr(unroll_block_reg, Z_RET); 2810 2811 // pop the deoptee frame. 2812 __ pop_frame(); 2813 2814 // stack: (caller_of_deoptee, ...). 2815 2816 #ifdef ASSERT 2817 assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates"); 2818 assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates"); 2819 const int unpack_kind_byte_offset = in_bytes(Deoptimization::UnrollBlock::unpack_kind_offset()) 2820 #ifndef VM_LITTLE_ENDIAN 2821 + 3 2822 #endif 2823 ; 2824 if (Displacement::is_shortDisp(unpack_kind_byte_offset)) { 2825 __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap); 2826 } else { 2827 __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap); 2828 } 2829 __ asm_assert(Assembler::bcondEqual, "OptoRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0); 2830 #endif 2831 2832 __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1); 2833 2834 // allocate new interpreter frame(s) and possibly resize the caller's frame 2835 // (no more adapters !) 2836 push_skeleton_frames(masm, false/*deopt*/, 2837 unroll_block_reg, 2838 Z_tmp_2, 2839 Z_tmp_3, 2840 Z_tmp_4, 2841 Z_ARG5, 2842 Z_ARG4); 2843 2844 // stack: (skeletal interpreter frame, ..., optional skeletal 2845 // interpreter frame, (resized) caller of deoptee, ...). 2846 2847 // push a dummy "unpack" frame taking care of float return values. 2848 // call `Deoptimization::unpack_frames' to layout information in the 2849 // interpreter frames just created 2850 2851 // push the "unpack" frame 2852 const unsigned int framesize_in_bytes = __ push_frame_abi160(0); 2853 2854 // stack: (unpack frame, skeletal interpreter frame, ..., optional 2855 // skeletal interpreter frame, (resized) caller of deoptee, ...). 2856 2857 // set the "unpack" frame as last_Java_frame 2858 __ get_PC(Z_R1_scratch); 2859 __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch); 2860 2861 // indicate it is the uncommon trap case 2862 BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()"); 2863 __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap); 2864 // let the unpacker layout information in the skeletal frames just allocated. 2865 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread); 2866 2867 __ reset_last_Java_frame(); 2868 // pop the "unpack" frame 2869 __ pop_frame(); 2870 // restore LR from top interpreter frame 2871 __ restore_return_pc(); 2872 2873 // stack: (top interpreter frame, ..., optional interpreter frame, 2874 // (resized) caller of deoptee, ...). 2875 2876 __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer 2877 __ restore_bcp(); 2878 __ restore_locals(); 2879 __ restore_esp(); 2880 2881 // return to the interpreter entry point 2882 __ z_br(Z_R14); 2883 2884 masm->flush(); 2885 _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, nullptr, framesize_in_bytes/wordSize); 2886 } 2887 #endif // COMPILER2 2888 2889 2890 //------------------------------generate_handler_blob------ 2891 // 2892 // Generate a special Compile2Runtime blob that saves all registers, 2893 // and setup oopmap. 2894 SafepointBlob* SharedRuntime::generate_handler_blob(SharedStubId id, address call_ptr) { 2895 assert(StubRoutines::forward_exception_entry() != nullptr, 2896 "must be generated before"); 2897 assert(is_polling_page_id(id), "expected a polling page stub id"); 2898 2899 ResourceMark rm; 2900 OopMapSet *oop_maps = new OopMapSet(); 2901 OopMap* map; 2902 2903 // Allocate space for the code. Setup code generation tools. 2904 const char* name = SharedRuntime::stub_name(id); 2905 CodeBuffer buffer(name, 2048, 1024); 2906 MacroAssembler* masm = new MacroAssembler(&buffer); 2907 2908 unsigned int start_off = __ offset(); 2909 address call_pc = nullptr; 2910 int frame_size_in_bytes; 2911 2912 bool cause_return = (id == SharedStubId::polling_page_return_handler_id); 2913 // Make room for return address (or push it again) 2914 if (!cause_return) { 2915 __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset())); 2916 } 2917 2918 bool save_vectors = (id == SharedStubId::polling_page_vectors_safepoint_handler_id); 2919 // Save registers, fpu state, and flags 2920 map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R14, save_vectors); 2921 2922 if (!cause_return) { 2923 // Keep a copy of the return pc to detect if it gets modified. 2924 __ z_lgr(Z_R6, Z_R14); 2925 } 2926 2927 // The following is basically a call_VM. However, we need the precise 2928 // address of the call in order to generate an oopmap. Hence, we do all the 2929 // work ourselves. 2930 __ set_last_Java_frame(Z_SP, noreg); 2931 2932 // call into the runtime to handle the safepoint poll 2933 __ call_VM_leaf(call_ptr, Z_thread); 2934 2935 2936 // Set an oopmap for the call site. This oopmap will map all 2937 // oop-registers and debug-info registers as callee-saved. This 2938 // will allow deoptimization at this safepoint to find all possible 2939 // debug-info recordings, as well as let GC find all oops. 2940 2941 oop_maps->add_gc_map((int)(__ offset()-start_off), map); 2942 2943 Label noException; 2944 2945 __ reset_last_Java_frame(); 2946 2947 __ load_and_test_long(Z_R1, thread_(pending_exception)); 2948 __ z_bre(noException); 2949 2950 // Pending exception case, used (sporadically) by 2951 // api/java_lang/Thread.State/index#ThreadState et al. 2952 RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers, save_vectors); 2953 2954 // Jump to forward_exception_entry, with the issuing PC in Z_R14 2955 // so it looks like the original nmethod called forward_exception_entry. 2956 __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry()); 2957 __ z_br(Z_R1_scratch); 2958 2959 // No exception case 2960 __ bind(noException); 2961 2962 if (!cause_return) { 2963 Label no_adjust; 2964 // If our stashed return pc was modified by the runtime we avoid touching it 2965 const int offset_of_return_pc = _z_common_abi(return_pc) + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers, save_vectors); 2966 __ z_cg(Z_R6, offset_of_return_pc, Z_SP); 2967 __ z_brne(no_adjust); 2968 2969 // Adjust return pc forward to step over the safepoint poll instruction 2970 __ instr_size(Z_R1_scratch, Z_R6); 2971 __ z_agr(Z_R6, Z_R1_scratch); 2972 __ z_stg(Z_R6, offset_of_return_pc, Z_SP); 2973 2974 __ bind(no_adjust); 2975 } 2976 2977 // Normal exit, restore registers and exit. 2978 RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers, save_vectors); 2979 2980 __ z_br(Z_R14); 2981 2982 // Make sure all code is generated 2983 masm->flush(); 2984 2985 // Fill-out other meta info 2986 return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers, save_vectors)/wordSize); 2987 } 2988 2989 2990 // 2991 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss 2992 // 2993 // Generate a stub that calls into vm to find out the proper destination 2994 // of a Java call. All the argument registers are live at this point 2995 // but since this is generic code we don't know what they are and the caller 2996 // must do any gc of the args. 2997 // 2998 RuntimeStub* SharedRuntime::generate_resolve_blob(SharedStubId id, address destination) { 2999 assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before"); 3000 assert(is_resolve_id(id), "expected a resolve stub id"); 3001 3002 // allocate space for the code 3003 ResourceMark rm; 3004 3005 const char* name = SharedRuntime::stub_name(id); 3006 CodeBuffer buffer(name, 1000, 512); 3007 MacroAssembler* masm = new MacroAssembler(&buffer); 3008 3009 OopMapSet *oop_maps = new OopMapSet(); 3010 OopMap* map = nullptr; 3011 3012 unsigned int start_off = __ offset(); 3013 3014 map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers); 3015 3016 // We must save a PC from within the stub as return PC 3017 // C code doesn't store the LR where we expect the PC, 3018 // so we would run into trouble upon stack walking. 3019 __ get_PC(Z_R1_scratch); 3020 3021 unsigned int frame_complete = __ offset(); 3022 3023 __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch); 3024 3025 __ call_VM_leaf(destination, Z_thread, Z_method); 3026 3027 3028 // Set an oopmap for the call site. 3029 // We need this not only for callee-saved registers, but also for volatile 3030 // registers that the compiler might be keeping live across a safepoint. 3031 3032 oop_maps->add_gc_map((int)(frame_complete-start_off), map); 3033 3034 // clear last_Java_sp 3035 __ reset_last_Java_frame(); 3036 3037 // check for pending exceptions 3038 Label pending; 3039 __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset())); 3040 __ z_brne(pending); 3041 3042 __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation. 3043 RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers); 3044 3045 // get the returned method 3046 __ get_vm_result_2(Z_method); 3047 3048 // We are back to the original state on entry and ready to go. 3049 __ z_br(Z_R1_scratch); 3050 3051 // Pending exception after the safepoint 3052 3053 __ bind(pending); 3054 3055 RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers); 3056 3057 // exception pending => remove activation and forward to exception handler 3058 3059 __ z_lgr(Z_R2, Z_R0); // pending_exception 3060 __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong)); 3061 __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry()); 3062 __ z_br(Z_R1_scratch); 3063 3064 // ------------- 3065 // make sure all code is generated 3066 masm->flush(); 3067 3068 // return the blob 3069 // frame_size_words or bytes?? 3070 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize, 3071 oop_maps, true); 3072 3073 } 3074 3075 // Continuation point for throwing of implicit exceptions that are 3076 // not handled in the current activation. Fabricates an exception 3077 // oop and initiates normal exception dispatching in this 3078 // frame. Only callee-saved registers are preserved (through the 3079 // normal RegisterMap handling). If the compiler 3080 // needs all registers to be preserved between the fault point and 3081 // the exception handler then it must assume responsibility for that 3082 // in AbstractCompiler::continuation_for_implicit_null_exception or 3083 // continuation_for_implicit_division_by_zero_exception. All other 3084 // implicit exceptions (e.g., NullPointerException or 3085 // AbstractMethodError on entry) are either at call sites or 3086 // otherwise assume that stack unwinding will be initiated, so 3087 // caller saved registers were assumed volatile in the compiler. 3088 3089 // Note that we generate only this stub into a RuntimeStub, because 3090 // it needs to be properly traversed and ignored during GC, so we 3091 // change the meaning of the "__" macro within this method. 3092 3093 // Note: the routine set_pc_not_at_call_for_caller in 3094 // SharedRuntime.cpp requires that this code be generated into a 3095 // RuntimeStub. 3096 3097 RuntimeStub* SharedRuntime::generate_throw_exception(SharedStubId id, address runtime_entry) { 3098 assert(is_throw_id(id), "expected a throw stub id"); 3099 3100 const char* name = SharedRuntime::stub_name(id); 3101 3102 int insts_size = 256; 3103 int locs_size = 0; 3104 3105 ResourceMark rm; 3106 const char* timer_msg = "SharedRuntime generate_throw_exception"; 3107 TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime)); 3108 3109 CodeBuffer code(name, insts_size, locs_size); 3110 MacroAssembler* masm = new MacroAssembler(&code); 3111 int framesize_in_bytes; 3112 address start = __ pc(); 3113 3114 __ save_return_pc(); 3115 framesize_in_bytes = __ push_frame_abi160(0); 3116 3117 address frame_complete_pc = __ pc(); 3118 3119 // Note that we always have a runtime stub frame on the top of stack at this point. 3120 __ get_PC(Z_R1); 3121 __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1); 3122 3123 // Do the call. 3124 BLOCK_COMMENT("call runtime_entry"); 3125 __ call_VM_leaf(runtime_entry, Z_thread); 3126 3127 __ reset_last_Java_frame(); 3128 3129 #ifdef ASSERT 3130 // Make sure that this code is only executed if there is a pending exception. 3131 { Label L; 3132 __ z_lg(Z_R0, 3133 in_bytes(Thread::pending_exception_offset()), 3134 Z_thread); 3135 __ z_ltgr(Z_R0, Z_R0); 3136 __ z_brne(L); 3137 __ stop("SharedRuntime::throw_exception: no pending exception"); 3138 __ bind(L); 3139 } 3140 #endif 3141 3142 __ pop_frame(); 3143 __ restore_return_pc(); 3144 3145 __ load_const_optimized(Z_R1, StubRoutines::forward_exception_entry()); 3146 __ z_br(Z_R1); 3147 3148 RuntimeStub* stub = 3149 RuntimeStub::new_runtime_stub(name, &code, 3150 frame_complete_pc - start, 3151 framesize_in_bytes/wordSize, 3152 nullptr /*oop_maps*/, false); 3153 3154 return stub; 3155 } 3156 3157 //------------------------------Montgomery multiplication------------------------ 3158 // 3159 3160 // Subtract 0:b from carry:a. Return carry. 3161 static unsigned long 3162 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) { 3163 unsigned long i, c = 8 * (unsigned long)(len - 1); 3164 __asm__ __volatile__ ( 3165 "SLGR %[i], %[i] \n" // initialize to 0 and pre-set carry 3166 "LGHI 0, 8 \n" // index increment (for BRXLG) 3167 "LGR 1, %[c] \n" // index limit (for BRXLG) 3168 "0: \n" 3169 "LG %[c], 0(%[i],%[a]) \n" 3170 "SLBG %[c], 0(%[i],%[b]) \n" // subtract with borrow 3171 "STG %[c], 0(%[i],%[a]) \n" 3172 "BRXLG %[i], 0, 0b \n" // while ((i+=8)<limit); 3173 "SLBGR %[c], %[c] \n" // save carry - 1 3174 : [i]"=&a"(i), [c]"+r"(c) 3175 : [a]"a"(a), [b]"a"(b) 3176 : "cc", "memory", "r0", "r1" 3177 ); 3178 return carry + c; 3179 } 3180 3181 // Multiply (unsigned) Long A by Long B, accumulating the double- 3182 // length result into the accumulator formed of T0, T1, and T2. 3183 inline void MACC(unsigned long A[], long A_ind, 3184 unsigned long B[], long B_ind, 3185 unsigned long &T0, unsigned long &T1, unsigned long &T2) { 3186 long A_si = 8 * A_ind, 3187 B_si = 8 * B_ind; 3188 __asm__ __volatile__ ( 3189 "LG 1, 0(%[A_si],%[A]) \n" 3190 "MLG 0, 0(%[B_si],%[B]) \n" // r0r1 = A * B 3191 "ALGR %[T0], 1 \n" 3192 "LGHI 1, 0 \n" // r1 = 0 3193 "ALCGR %[T1], 0 \n" 3194 "ALCGR %[T2], 1 \n" 3195 : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2) 3196 : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si) 3197 : "cc", "r0", "r1" 3198 ); 3199 } 3200 3201 // As above, but add twice the double-length result into the 3202 // accumulator. 3203 inline void MACC2(unsigned long A[], long A_ind, 3204 unsigned long B[], long B_ind, 3205 unsigned long &T0, unsigned long &T1, unsigned long &T2) { 3206 const unsigned long zero = 0; 3207 long A_si = 8 * A_ind, 3208 B_si = 8 * B_ind; 3209 __asm__ __volatile__ ( 3210 "LG 1, 0(%[A_si],%[A]) \n" 3211 "MLG 0, 0(%[B_si],%[B]) \n" // r0r1 = A * B 3212 "ALGR %[T0], 1 \n" 3213 "ALCGR %[T1], 0 \n" 3214 "ALCGR %[T2], %[zero] \n" 3215 "ALGR %[T0], 1 \n" 3216 "ALCGR %[T1], 0 \n" 3217 "ALCGR %[T2], %[zero] \n" 3218 : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2) 3219 : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si), [zero]"r"(zero) 3220 : "cc", "r0", "r1" 3221 ); 3222 } 3223 3224 // Fast Montgomery multiplication. The derivation of the algorithm is 3225 // in "A Cryptographic Library for the Motorola DSP56000, 3226 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237". 3227 static void 3228 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[], 3229 unsigned long m[], unsigned long inv, int len) { 3230 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3231 int i; 3232 3233 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); 3234 3235 for (i = 0; i < len; i++) { 3236 int j; 3237 for (j = 0; j < i; j++) { 3238 MACC(a, j, b, i-j, t0, t1, t2); 3239 MACC(m, j, n, i-j, t0, t1, t2); 3240 } 3241 MACC(a, i, b, 0, t0, t1, t2); 3242 m[i] = t0 * inv; 3243 MACC(m, i, n, 0, t0, t1, t2); 3244 3245 assert(t0 == 0, "broken Montgomery multiply"); 3246 3247 t0 = t1; t1 = t2; t2 = 0; 3248 } 3249 3250 for (i = len; i < 2 * len; i++) { 3251 int j; 3252 for (j = i - len + 1; j < len; j++) { 3253 MACC(a, j, b, i-j, t0, t1, t2); 3254 MACC(m, j, n, i-j, t0, t1, t2); 3255 } 3256 m[i-len] = t0; 3257 t0 = t1; t1 = t2; t2 = 0; 3258 } 3259 3260 while (t0) { 3261 t0 = sub(m, n, t0, len); 3262 } 3263 } 3264 3265 // Fast Montgomery squaring. This uses asymptotically 25% fewer 3266 // multiplies so it should be up to 25% faster than Montgomery 3267 // multiplication. However, its loop control is more complex and it 3268 // may actually run slower on some machines. 3269 static void 3270 montgomery_square(unsigned long a[], unsigned long n[], 3271 unsigned long m[], unsigned long inv, int len) { 3272 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator 3273 int i; 3274 3275 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); 3276 3277 for (i = 0; i < len; i++) { 3278 int j; 3279 int end = (i+1)/2; 3280 for (j = 0; j < end; j++) { 3281 MACC2(a, j, a, i-j, t0, t1, t2); 3282 MACC(m, j, n, i-j, t0, t1, t2); 3283 } 3284 if ((i & 1) == 0) { 3285 MACC(a, j, a, j, t0, t1, t2); 3286 } 3287 for (; j < i; j++) { 3288 MACC(m, j, n, i-j, t0, t1, t2); 3289 } 3290 m[i] = t0 * inv; 3291 MACC(m, i, n, 0, t0, t1, t2); 3292 3293 assert(t0 == 0, "broken Montgomery square"); 3294 3295 t0 = t1; t1 = t2; t2 = 0; 3296 } 3297 3298 for (i = len; i < 2*len; i++) { 3299 int start = i-len+1; 3300 int end = start + (len - start)/2; 3301 int j; 3302 for (j = start; j < end; j++) { 3303 MACC2(a, j, a, i-j, t0, t1, t2); 3304 MACC(m, j, n, i-j, t0, t1, t2); 3305 } 3306 if ((i & 1) == 0) { 3307 MACC(a, j, a, j, t0, t1, t2); 3308 } 3309 for (; j < len; j++) { 3310 MACC(m, j, n, i-j, t0, t1, t2); 3311 } 3312 m[i-len] = t0; 3313 t0 = t1; t1 = t2; t2 = 0; 3314 } 3315 3316 while (t0) { 3317 t0 = sub(m, n, t0, len); 3318 } 3319 } 3320 3321 // The threshold at which squaring is advantageous was determined 3322 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. 3323 // Value seems to be ok for other platforms, too. 3324 #define MONTGOMERY_SQUARING_THRESHOLD 64 3325 3326 // Copy len longwords from s to d, word-swapping as we go. The 3327 // destination array is reversed. 3328 static void reverse_words(unsigned long *s, unsigned long *d, int len) { 3329 d += len; 3330 while(len-- > 0) { 3331 d--; 3332 unsigned long s_val = *s; 3333 // Swap words in a longword on little endian machines. 3334 #ifdef VM_LITTLE_ENDIAN 3335 Unimplemented(); 3336 #endif 3337 *d = s_val; 3338 s++; 3339 } 3340 } 3341 3342 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, 3343 jint len, jlong inv, 3344 jint *m_ints) { 3345 len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls. 3346 assert(len % 2 == 0, "array length in montgomery_multiply must be even"); 3347 int longwords = len/2; 3348 3349 // Make very sure we don't use so much space that the stack might 3350 // overflow. 512 jints corresponds to an 16384-bit integer and 3351 // will use here a total of 8k bytes of stack space. 3352 int divisor = sizeof(unsigned long) * 4; 3353 guarantee(longwords <= 8192 / divisor, "must be"); 3354 int total_allocation = longwords * sizeof (unsigned long) * 4; 3355 unsigned long *scratch = (unsigned long *)alloca(total_allocation); 3356 3357 // Local scratch arrays 3358 unsigned long 3359 *a = scratch + 0 * longwords, 3360 *b = scratch + 1 * longwords, 3361 *n = scratch + 2 * longwords, 3362 *m = scratch + 3 * longwords; 3363 3364 reverse_words((unsigned long *)a_ints, a, longwords); 3365 reverse_words((unsigned long *)b_ints, b, longwords); 3366 reverse_words((unsigned long *)n_ints, n, longwords); 3367 3368 ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords); 3369 3370 reverse_words(m, (unsigned long *)m_ints, longwords); 3371 } 3372 3373 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, 3374 jint len, jlong inv, 3375 jint *m_ints) { 3376 len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls. 3377 assert(len % 2 == 0, "array length in montgomery_square must be even"); 3378 int longwords = len/2; 3379 3380 // Make very sure we don't use so much space that the stack might 3381 // overflow. 512 jints corresponds to an 16384-bit integer and 3382 // will use here a total of 6k bytes of stack space. 3383 int divisor = sizeof(unsigned long) * 3; 3384 guarantee(longwords <= (8192 / divisor), "must be"); 3385 int total_allocation = longwords * sizeof (unsigned long) * 3; 3386 unsigned long *scratch = (unsigned long *)alloca(total_allocation); 3387 3388 // Local scratch arrays 3389 unsigned long 3390 *a = scratch + 0 * longwords, 3391 *n = scratch + 1 * longwords, 3392 *m = scratch + 2 * longwords; 3393 3394 reverse_words((unsigned long *)a_ints, a, longwords); 3395 reverse_words((unsigned long *)n_ints, n, longwords); 3396 3397 if (len >= MONTGOMERY_SQUARING_THRESHOLD) { 3398 ::montgomery_square(a, n, m, (unsigned long)inv, longwords); 3399 } else { 3400 ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords); 3401 } 3402 3403 reverse_words(m, (unsigned long *)m_ints, longwords); 3404 } 3405 3406 extern "C" 3407 int SpinPause() { 3408 return 0; 3409 } 3410 3411 #if INCLUDE_JFR 3412 RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() { 3413 if (!Continuations::enabled()) return nullptr; 3414 Unimplemented(); 3415 return nullptr; 3416 } 3417 3418 RuntimeStub* SharedRuntime::generate_jfr_return_lease() { 3419 if (!Continuations::enabled()) return nullptr; 3420 Unimplemented(); 3421 return nullptr; 3422 } 3423 3424 #endif // INCLUDE_JFR