1 /*
2 * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2016, 2024 SAP SE. All rights reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "asm/macroAssembler.inline.hpp"
27 #include "code/debugInfoRec.hpp"
28 #include "code/vtableStubs.hpp"
29 #include "code/compiledIC.hpp"
30 #include "compiler/oopMap.hpp"
31 #include "gc/shared/barrierSetAssembler.hpp"
32 #include "gc/shared/gcLocker.hpp"
33 #include "interpreter/interpreter.hpp"
34 #include "interpreter/interp_masm.hpp"
35 #include "memory/resourceArea.hpp"
36 #include "nativeInst_s390.hpp"
37 #include "oops/klass.inline.hpp"
38 #include "prims/methodHandles.hpp"
39 #include "registerSaver_s390.hpp"
40 #include "runtime/jniHandles.hpp"
41 #include "runtime/safepointMechanism.hpp"
42 #include "runtime/sharedRuntime.hpp"
43 #include "runtime/signature.hpp"
44 #include "runtime/stubRoutines.hpp"
45 #include "runtime/timerTrace.hpp"
46 #include "runtime/vframeArray.hpp"
47 #include "utilities/align.hpp"
48 #include "utilities/macros.hpp"
49 #include "vmreg_s390.inline.hpp"
50 #ifdef COMPILER1
51 #include "c1/c1_Runtime1.hpp"
52 #endif
53 #ifdef COMPILER2
54 #include "opto/ad.hpp"
55 #include "opto/runtime.hpp"
56 #endif
57
58 #ifdef PRODUCT
59 #define __ masm->
60 #else
61 #define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)->
62 #endif
63
64 #define BLOCK_COMMENT(str) __ block_comment(str)
65 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
66
67 #define RegisterSaver_LiveIntReg(regname) \
68 { RegisterSaver::int_reg, regname->encoding(), regname->as_VMReg() }
69
70 #define RegisterSaver_LiveFloatReg(regname) \
71 { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
72
73 // Registers which are not saved/restored, but still they have got a frame slot.
74 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2
75 #define RegisterSaver_ExcludedIntReg(regname) \
76 { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
77
78 // Registers which are not saved/restored, but still they have got a frame slot.
79 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2.
80 #define RegisterSaver_ExcludedFloatReg(regname) \
81 { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
82
83 #define RegisterSaver_LiveVReg(regname) \
84 { RegisterSaver::v_reg, regname->encoding(), regname->as_VMReg() }
85
86 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
87 // Live registers which get spilled to the stack. Register positions
88 // in this array correspond directly to the stack layout.
89 //
90 // live float registers:
91 //
92 RegisterSaver_LiveFloatReg(Z_F0 ),
93 // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
94 RegisterSaver_LiveFloatReg(Z_F2 ),
95 RegisterSaver_LiveFloatReg(Z_F3 ),
96 RegisterSaver_LiveFloatReg(Z_F4 ),
97 RegisterSaver_LiveFloatReg(Z_F5 ),
98 RegisterSaver_LiveFloatReg(Z_F6 ),
99 RegisterSaver_LiveFloatReg(Z_F7 ),
100 RegisterSaver_LiveFloatReg(Z_F8 ),
101 RegisterSaver_LiveFloatReg(Z_F9 ),
102 RegisterSaver_LiveFloatReg(Z_F10),
103 RegisterSaver_LiveFloatReg(Z_F11),
104 RegisterSaver_LiveFloatReg(Z_F12),
105 RegisterSaver_LiveFloatReg(Z_F13),
106 RegisterSaver_LiveFloatReg(Z_F14),
107 RegisterSaver_LiveFloatReg(Z_F15),
108 //
109 // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
110 // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
111 RegisterSaver_LiveIntReg(Z_R2 ),
112 RegisterSaver_LiveIntReg(Z_R3 ),
113 RegisterSaver_LiveIntReg(Z_R4 ),
114 RegisterSaver_LiveIntReg(Z_R5 ),
115 RegisterSaver_LiveIntReg(Z_R6 ),
116 RegisterSaver_LiveIntReg(Z_R7 ),
117 RegisterSaver_LiveIntReg(Z_R8 ),
118 RegisterSaver_LiveIntReg(Z_R9 ),
119 RegisterSaver_LiveIntReg(Z_R10),
120 RegisterSaver_LiveIntReg(Z_R11),
121 RegisterSaver_LiveIntReg(Z_R12),
122 RegisterSaver_LiveIntReg(Z_R13),
123 // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
124 // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer
125 };
126
127 static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = {
128 // Live registers which get spilled to the stack. Register positions
129 // in this array correspond directly to the stack layout.
130 //
131 // live float registers: All excluded, but still they get a stack slot to get same frame size.
132 //
133 RegisterSaver_ExcludedFloatReg(Z_F0 ),
134 // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
135 RegisterSaver_ExcludedFloatReg(Z_F2 ),
136 RegisterSaver_ExcludedFloatReg(Z_F3 ),
137 RegisterSaver_ExcludedFloatReg(Z_F4 ),
138 RegisterSaver_ExcludedFloatReg(Z_F5 ),
139 RegisterSaver_ExcludedFloatReg(Z_F6 ),
140 RegisterSaver_ExcludedFloatReg(Z_F7 ),
141 RegisterSaver_ExcludedFloatReg(Z_F8 ),
142 RegisterSaver_ExcludedFloatReg(Z_F9 ),
143 RegisterSaver_ExcludedFloatReg(Z_F10),
144 RegisterSaver_ExcludedFloatReg(Z_F11),
145 RegisterSaver_ExcludedFloatReg(Z_F12),
146 RegisterSaver_ExcludedFloatReg(Z_F13),
147 RegisterSaver_ExcludedFloatReg(Z_F14),
148 RegisterSaver_ExcludedFloatReg(Z_F15),
149 //
150 // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
151 // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
152 RegisterSaver_LiveIntReg(Z_R2 ),
153 RegisterSaver_LiveIntReg(Z_R3 ),
154 RegisterSaver_LiveIntReg(Z_R4 ),
155 RegisterSaver_LiveIntReg(Z_R5 ),
156 RegisterSaver_LiveIntReg(Z_R6 ),
157 RegisterSaver_LiveIntReg(Z_R7 ),
158 RegisterSaver_LiveIntReg(Z_R8 ),
159 RegisterSaver_LiveIntReg(Z_R9 ),
160 RegisterSaver_LiveIntReg(Z_R10),
161 RegisterSaver_LiveIntReg(Z_R11),
162 RegisterSaver_LiveIntReg(Z_R12),
163 RegisterSaver_LiveIntReg(Z_R13),
164 // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
165 // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer
166 };
167
168 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = {
169 // Live registers which get spilled to the stack. Register positions
170 // in this array correspond directly to the stack layout.
171 //
172 // live float registers:
173 //
174 RegisterSaver_LiveFloatReg(Z_F0 ),
175 // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
176 RegisterSaver_LiveFloatReg(Z_F2 ),
177 RegisterSaver_LiveFloatReg(Z_F3 ),
178 RegisterSaver_LiveFloatReg(Z_F4 ),
179 RegisterSaver_LiveFloatReg(Z_F5 ),
180 RegisterSaver_LiveFloatReg(Z_F6 ),
181 RegisterSaver_LiveFloatReg(Z_F7 ),
182 RegisterSaver_LiveFloatReg(Z_F8 ),
183 RegisterSaver_LiveFloatReg(Z_F9 ),
184 RegisterSaver_LiveFloatReg(Z_F10),
185 RegisterSaver_LiveFloatReg(Z_F11),
186 RegisterSaver_LiveFloatReg(Z_F12),
187 RegisterSaver_LiveFloatReg(Z_F13),
188 RegisterSaver_LiveFloatReg(Z_F14),
189 RegisterSaver_LiveFloatReg(Z_F15),
190 //
191 // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
192 // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
193 RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2.
194 RegisterSaver_LiveIntReg(Z_R3 ),
195 RegisterSaver_LiveIntReg(Z_R4 ),
196 RegisterSaver_LiveIntReg(Z_R5 ),
197 RegisterSaver_LiveIntReg(Z_R6 ),
198 RegisterSaver_LiveIntReg(Z_R7 ),
199 RegisterSaver_LiveIntReg(Z_R8 ),
200 RegisterSaver_LiveIntReg(Z_R9 ),
201 RegisterSaver_LiveIntReg(Z_R10),
202 RegisterSaver_LiveIntReg(Z_R11),
203 RegisterSaver_LiveIntReg(Z_R12),
204 RegisterSaver_LiveIntReg(Z_R13),
205 // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
206 // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer
207 };
208
209 // Live argument registers which get spilled to the stack.
210 static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = {
211 RegisterSaver_LiveFloatReg(Z_FARG1),
212 RegisterSaver_LiveFloatReg(Z_FARG2),
213 RegisterSaver_LiveFloatReg(Z_FARG3),
214 RegisterSaver_LiveFloatReg(Z_FARG4),
215 RegisterSaver_LiveIntReg(Z_ARG1),
216 RegisterSaver_LiveIntReg(Z_ARG2),
217 RegisterSaver_LiveIntReg(Z_ARG3),
218 RegisterSaver_LiveIntReg(Z_ARG4),
219 RegisterSaver_LiveIntReg(Z_ARG5)
220 };
221
222 static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = {
223 // Live registers which get spilled to the stack. Register positions
224 // in this array correspond directly to the stack layout.
225 //
226 // live float registers:
227 //
228 RegisterSaver_LiveFloatReg(Z_F0 ),
229 // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
230 RegisterSaver_LiveFloatReg(Z_F2 ),
231 RegisterSaver_LiveFloatReg(Z_F3 ),
232 RegisterSaver_LiveFloatReg(Z_F4 ),
233 RegisterSaver_LiveFloatReg(Z_F5 ),
234 RegisterSaver_LiveFloatReg(Z_F6 ),
235 RegisterSaver_LiveFloatReg(Z_F7 ),
236 // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile
237 // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile
238 // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile
239 // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile
240 // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile
241 // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile
242 // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile
243 // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile
244 //
245 // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
246 // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
247 RegisterSaver_LiveIntReg(Z_R2 ),
248 RegisterSaver_LiveIntReg(Z_R3 ),
249 RegisterSaver_LiveIntReg(Z_R4 ),
250 RegisterSaver_LiveIntReg(Z_R5 ),
251 // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile
252 // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile
253 // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile
254 // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile
255 // RegisterSaver_LiveIntReg(Z_R10), // non-volatile
256 // RegisterSaver_LiveIntReg(Z_R11), // non-volatile
257 // RegisterSaver_LiveIntReg(Z_R12), // non-volatile
258 // RegisterSaver_LiveIntReg(Z_R13), // non-volatile
259 // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
260 // RegisterSaver_ExcludedIntReg(Z_R15) // stack pointer
261 };
262
263 static const RegisterSaver::LiveRegType RegisterSaver_LiveVRegs[] = {
264 // live vector registers (optional, only these are used by C2):
265 RegisterSaver_LiveVReg( Z_V16 ),
266 RegisterSaver_LiveVReg( Z_V17 ),
267 RegisterSaver_LiveVReg( Z_V18 ),
268 RegisterSaver_LiveVReg( Z_V19 ),
269 RegisterSaver_LiveVReg( Z_V20 ),
270 RegisterSaver_LiveVReg( Z_V21 ),
271 RegisterSaver_LiveVReg( Z_V22 ),
272 RegisterSaver_LiveVReg( Z_V23 ),
273 RegisterSaver_LiveVReg( Z_V24 ),
274 RegisterSaver_LiveVReg( Z_V25 ),
275 RegisterSaver_LiveVReg( Z_V26 ),
276 RegisterSaver_LiveVReg( Z_V27 ),
277 RegisterSaver_LiveVReg( Z_V28 ),
278 RegisterSaver_LiveVReg( Z_V29 ),
279 RegisterSaver_LiveVReg( Z_V30 ),
280 RegisterSaver_LiveVReg( Z_V31 )
281 };
282
283 int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
284 int reg_space = -1;
285 switch (reg_set) {
286 case all_registers: reg_space = sizeof(RegisterSaver_LiveRegs); break;
287 case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break;
288 case all_integer_registers: reg_space = sizeof(RegisterSaver_LiveIntRegs); break;
289 case all_volatile_registers: reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break;
290 case arg_registers: reg_space = sizeof(RegisterSaver_LiveArgRegs); break;
291 default: ShouldNotReachHere();
292 }
293 return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size;
294 }
295
296 int RegisterSaver::calculate_vregstosave_num() {
297 return (sizeof(RegisterSaver_LiveVRegs) / sizeof(RegisterSaver::LiveRegType));
298 }
299
300 int RegisterSaver::live_reg_frame_size(RegisterSet reg_set, bool save_vectors) {
301 const int vregstosave_num = save_vectors ? calculate_vregstosave_num() : 0;
302 return live_reg_save_size(reg_set) + vregstosave_num * v_reg_size + frame::z_abi_160_size;
303 }
304
305
306 // return_pc: Specify the register that should be stored as the return pc in the current frame.
307 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc, bool save_vectors) {
308 // Record volatile registers as callee-save values in an OopMap so
309 // their save locations will be propagated to the caller frame's
310 // RegisterMap during StackFrameStream construction (needed for
311 // deoptimization; see compiledVFrame::create_stack_value).
312
313 // Calculate frame size.
314 const int frame_size_in_bytes = live_reg_frame_size(reg_set, save_vectors);
315 const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
316 const int vregstosave_num = save_vectors ? calculate_vregstosave_num() : 0;
317 const int register_save_offset = frame_size_in_bytes - (live_reg_save_size(reg_set) + vregstosave_num * v_reg_size);
318
319 // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
320 OopMap* map = new OopMap(frame_size_in_slots, 0);
321
322 int regstosave_num = 0;
323 const RegisterSaver::LiveRegType* live_regs = nullptr;
324
325 switch (reg_set) {
326 case all_registers:
327 regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
328 live_regs = RegisterSaver_LiveRegs;
329 break;
330 case all_registers_except_r2:
331 regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
332 live_regs = RegisterSaver_LiveRegsWithoutR2;
333 break;
334 case all_integer_registers:
335 regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
336 live_regs = RegisterSaver_LiveIntRegs;
337 break;
338 case all_volatile_registers:
339 regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
340 live_regs = RegisterSaver_LiveVolatileRegs;
341 break;
342 case arg_registers:
343 regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
344 live_regs = RegisterSaver_LiveArgRegs;
345 break;
346 default: ShouldNotReachHere();
347 }
348
349 // Save return pc in old frame.
350 __ save_return_pc(return_pc);
351
352 // Push a new frame (includes stack linkage).
353 // Use return_pc as scratch for push_frame. Z_R0_scratch (the default) and Z_R1_scratch are
354 // illegally used to pass parameters by RangeCheckStub::emit_code().
355 __ push_frame(frame_size_in_bytes, return_pc);
356 // We have to restore return_pc right away.
357 // Nobody else will. Furthermore, return_pc isn't necessarily the default (Z_R14).
358 // Nobody else knows which register we saved.
359 __ z_lg(return_pc, _z_common_abi(return_pc) + frame_size_in_bytes, Z_SP);
360
361 // Register save area in new frame starts above z_abi_160 area.
362 int offset = register_save_offset;
363
364 Register first = noreg;
365 Register last = noreg;
366 int first_offset = -1;
367 bool float_spilled = false;
368
369 for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
370 int reg_num = live_regs[i].reg_num;
371 int reg_type = live_regs[i].reg_type;
372
373 switch (reg_type) {
374 case RegisterSaver::int_reg: {
375 Register reg = as_Register(reg_num);
376 if (last != reg->predecessor()) {
377 if (first != noreg) {
378 __ z_stmg(first, last, first_offset, Z_SP);
379 }
380 first = reg;
381 first_offset = offset;
382 DEBUG_ONLY(float_spilled = false);
383 }
384 last = reg;
385 assert(last != Z_R0, "r0 would require special treatment");
386 assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
387 break;
388 }
389
390 case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot.
391 continue; // Continue with next loop iteration.
392
393 case RegisterSaver::float_reg: {
394 FloatRegister freg = as_FloatRegister(reg_num);
395 __ z_std(freg, offset, Z_SP);
396 DEBUG_ONLY(float_spilled = true);
397 break;
398 }
399
400 default:
401 ShouldNotReachHere();
402 break;
403 }
404
405 // Second set_callee_saved is really a waste but we'll keep things as they were for now
406 map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg);
407 map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next());
408 }
409 assert(first != noreg, "Should spill at least one int reg.");
410 __ z_stmg(first, last, first_offset, Z_SP);
411
412 for (int i = 0; i < vregstosave_num; i++, offset += v_reg_size) {
413 int reg_num = RegisterSaver_LiveVRegs[i].reg_num;
414
415 __ z_vst(as_VectorRegister(reg_num), Address(Z_SP, offset));
416
417 map->set_callee_saved(VMRegImpl::stack2reg(offset>>2),
418 RegisterSaver_LiveVRegs[i].vmreg);
419 map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size ) >> 2),
420 RegisterSaver_LiveVRegs[i].vmreg->next());
421 map->set_callee_saved(VMRegImpl::stack2reg((offset + (half_reg_size * 2)) >> 2),
422 RegisterSaver_LiveVRegs[i].vmreg->next(2));
423 map->set_callee_saved(VMRegImpl::stack2reg((offset + (half_reg_size * 3)) >> 2),
424 RegisterSaver_LiveVRegs[i].vmreg->next(3));
425 }
426
427 assert(offset == frame_size_in_bytes, "consistency check");
428
429 // And we're done.
430 return map;
431 }
432
433
434 // Generate the OopMap (again, regs where saved before).
435 OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) {
436 // Calculate frame size.
437 const int frame_size_in_bytes = live_reg_frame_size(reg_set);
438 const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
439 const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
440
441 // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
442 OopMap* map = new OopMap(frame_size_in_slots, 0);
443
444 int regstosave_num = 0;
445 const RegisterSaver::LiveRegType* live_regs = nullptr;
446
447 switch (reg_set) {
448 case all_registers:
449 regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
450 live_regs = RegisterSaver_LiveRegs;
451 break;
452 case all_registers_except_r2:
453 regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
454 live_regs = RegisterSaver_LiveRegsWithoutR2;
455 break;
456 case all_integer_registers:
457 regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
458 live_regs = RegisterSaver_LiveIntRegs;
459 break;
460 case all_volatile_registers:
461 regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
462 live_regs = RegisterSaver_LiveVolatileRegs;
463 break;
464 case arg_registers:
465 regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
466 live_regs = RegisterSaver_LiveArgRegs;
467 break;
468 default: ShouldNotReachHere();
469 }
470
471 // Register save area in new frame starts above z_abi_160 area.
472 int offset = register_save_offset;
473 for (int i = 0; i < regstosave_num; i++) {
474 if (live_regs[i].reg_type < RegisterSaver::excluded_reg) {
475 map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg);
476 map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next());
477 }
478 offset += reg_size;
479 }
480 #ifdef ASSERT
481 assert(offset == frame_size_in_bytes, "consistency check");
482 #endif
483 return map;
484 }
485
486
487 // Pop the current frame and restore all the registers that we saved.
488 void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set, bool save_vectors) {
489 int offset;
490 const int vregstosave_num = save_vectors ? calculate_vregstosave_num() : 0;
491 const int register_save_offset = live_reg_frame_size(reg_set, save_vectors) - (live_reg_save_size(reg_set) + vregstosave_num * v_reg_size);
492
493 Register first = noreg;
494 Register last = noreg;
495 int first_offset = -1;
496 bool float_spilled = false;
497
498 int regstosave_num = 0;
499 const RegisterSaver::LiveRegType* live_regs = nullptr;
500
501 switch (reg_set) {
502 case all_registers:
503 regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);;
504 live_regs = RegisterSaver_LiveRegs;
505 break;
506 case all_registers_except_r2:
507 regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
508 live_regs = RegisterSaver_LiveRegsWithoutR2;
509 break;
510 case all_integer_registers:
511 regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
512 live_regs = RegisterSaver_LiveIntRegs;
513 break;
514 case all_volatile_registers:
515 regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);;
516 live_regs = RegisterSaver_LiveVolatileRegs;
517 break;
518 case arg_registers:
519 regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
520 live_regs = RegisterSaver_LiveArgRegs;
521 break;
522 default: ShouldNotReachHere();
523 }
524
525 // Restore all registers (ints and floats).
526
527 // Register save area in new frame starts above z_abi_160 area.
528 offset = register_save_offset;
529
530 for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
531 int reg_num = live_regs[i].reg_num;
532 int reg_type = live_regs[i].reg_type;
533
534 switch (reg_type) {
535 case RegisterSaver::excluded_reg:
536 continue; // Continue with next loop iteration.
537
538 case RegisterSaver::int_reg: {
539 Register reg = as_Register(reg_num);
540 if (last != reg->predecessor()) {
541 if (first != noreg) {
542 __ z_lmg(first, last, first_offset, Z_SP);
543 }
544 first = reg;
545 first_offset = offset;
546 DEBUG_ONLY(float_spilled = false);
547 }
548 last = reg;
549 assert(last != Z_R0, "r0 would require special treatment");
550 assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
551 break;
552 }
553
554 case RegisterSaver::float_reg: {
555 FloatRegister freg = as_FloatRegister(reg_num);
556 __ z_ld(freg, offset, Z_SP);
557 DEBUG_ONLY(float_spilled = true);
558 break;
559 }
560
561 default:
562 ShouldNotReachHere();
563 }
564 }
565 assert(first != noreg, "Should spill at least one int reg.");
566 __ z_lmg(first, last, first_offset, Z_SP);
567
568 for (int i = 0; i < vregstosave_num; i++, offset += v_reg_size) {
569 int reg_num = RegisterSaver_LiveVRegs[i].reg_num;
570
571 __ z_vl(as_VectorRegister(reg_num), Address(Z_SP, offset));
572 }
573
574 // Pop the frame.
575 __ pop_frame();
576
577 // Restore the flags.
578 __ restore_return_pc();
579 }
580
581
582 // Pop the current frame and restore the registers that might be holding a result.
583 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
584 const int regstosave_num = sizeof(RegisterSaver_LiveRegs) /
585 sizeof(RegisterSaver::LiveRegType);
586 const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);
587
588 // Restore all result registers (ints and floats).
589 int offset = register_save_offset;
590 for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
591 int reg_num = RegisterSaver_LiveRegs[i].reg_num;
592 int reg_type = RegisterSaver_LiveRegs[i].reg_type;
593 switch (reg_type) {
594 case RegisterSaver::excluded_reg:
595 continue; // Continue with next loop iteration.
596 case RegisterSaver::int_reg: {
597 if (as_Register(reg_num) == Z_RET) { // int result_reg
598 __ z_lg(as_Register(reg_num), offset, Z_SP);
599 }
600 break;
601 }
602 case RegisterSaver::float_reg: {
603 if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg
604 __ z_ld(as_FloatRegister(reg_num), offset, Z_SP);
605 }
606 break;
607 }
608 default:
609 ShouldNotReachHere();
610 }
611 }
612 assert(offset == live_reg_frame_size(all_registers), "consistency check");
613 }
614
615 // ---------------------------------------------------------------------------
616 void SharedRuntime::save_native_result(MacroAssembler * masm,
617 BasicType ret_type,
618 int frame_slots) {
619 Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
620
621 switch (ret_type) {
622 case T_BOOLEAN: // Save shorter types as int. Do we need sign extension at restore??
623 case T_BYTE:
624 case T_CHAR:
625 case T_SHORT:
626 case T_INT:
627 __ reg2mem_opt(Z_RET, memaddr, false);
628 break;
629 case T_OBJECT: // Save pointer types as long.
630 case T_ARRAY:
631 case T_ADDRESS:
632 case T_VOID:
633 case T_LONG:
634 __ reg2mem_opt(Z_RET, memaddr);
635 break;
636 case T_FLOAT:
637 __ freg2mem_opt(Z_FRET, memaddr, false);
638 break;
639 case T_DOUBLE:
640 __ freg2mem_opt(Z_FRET, memaddr);
641 break;
642 default:
643 ShouldNotReachHere();
644 break;
645 }
646 }
647
648 void SharedRuntime::restore_native_result(MacroAssembler *masm,
649 BasicType ret_type,
650 int frame_slots) {
651 Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
652
653 switch (ret_type) {
654 case T_BOOLEAN: // Restore shorter types as int. Do we need sign extension at restore??
655 case T_BYTE:
656 case T_CHAR:
657 case T_SHORT:
658 case T_INT:
659 __ mem2reg_opt(Z_RET, memaddr, false);
660 break;
661 case T_OBJECT: // Restore pointer types as long.
662 case T_ARRAY:
663 case T_ADDRESS:
664 case T_VOID:
665 case T_LONG:
666 __ mem2reg_opt(Z_RET, memaddr);
667 break;
668 case T_FLOAT:
669 __ mem2freg_opt(Z_FRET, memaddr, false);
670 break;
671 case T_DOUBLE:
672 __ mem2freg_opt(Z_FRET, memaddr);
673 break;
674 default:
675 ShouldNotReachHere();
676 break;
677 }
678 }
679
680 // ---------------------------------------------------------------------------
681 // Read the array of BasicTypes from a signature, and compute where the
682 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
683 // quantities. Values less than VMRegImpl::stack0 are registers, those above
684 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
685 // as framesizes are fixed.
686 // VMRegImpl::stack0 refers to the first slot 0(sp).
687 // VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers
688 // up to Register::number_of_registers are the 64-bit integer registers.
689
690 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
691 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
692 // units regardless of build.
693
694 // The Java calling convention is a "shifted" version of the C ABI.
695 // By skipping the first C ABI register we can call non-static jni methods
696 // with small numbers of arguments without having to shuffle the arguments
697 // at all. Since we control the java ABI we ought to at least get some
698 // advantage out of it.
699 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
700 VMRegPair *regs,
701 int total_args_passed) {
702 // c2c calling conventions for compiled-compiled calls.
703
704 // An int/float occupies 1 slot here.
705 const int inc_stk_for_intfloat = 1; // 1 slots for ints and floats.
706 const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
707
708 const VMReg z_iarg_reg[5] = {
709 Z_R2->as_VMReg(),
710 Z_R3->as_VMReg(),
711 Z_R4->as_VMReg(),
712 Z_R5->as_VMReg(),
713 Z_R6->as_VMReg()
714 };
715 const VMReg z_farg_reg[4] = {
716 Z_F0->as_VMReg(),
717 Z_F2->as_VMReg(),
718 Z_F4->as_VMReg(),
719 Z_F6->as_VMReg()
720 };
721 const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
722 const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
723
724 assert(Register::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
725 assert(FloatRegister::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
726
727 int i;
728 int stk = 0;
729 int ireg = 0;
730 int freg = 0;
731
732 for (int i = 0; i < total_args_passed; ++i) {
733 switch (sig_bt[i]) {
734 case T_BOOLEAN:
735 case T_CHAR:
736 case T_BYTE:
737 case T_SHORT:
738 case T_INT:
739 if (ireg < z_num_iarg_registers) {
740 // Put int/ptr in register.
741 regs[i].set1(z_iarg_reg[ireg]);
742 ++ireg;
743 } else {
744 // Put int/ptr on stack.
745 regs[i].set1(VMRegImpl::stack2reg(stk));
746 stk += inc_stk_for_intfloat;
747 }
748 break;
749 case T_LONG:
750 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
751 if (ireg < z_num_iarg_registers) {
752 // Put long in register.
753 regs[i].set2(z_iarg_reg[ireg]);
754 ++ireg;
755 } else {
756 // Put long on stack and align to 2 slots.
757 if (stk & 0x1) { ++stk; }
758 regs[i].set2(VMRegImpl::stack2reg(stk));
759 stk += inc_stk_for_longdouble;
760 }
761 break;
762 case T_OBJECT:
763 case T_ARRAY:
764 case T_ADDRESS:
765 if (ireg < z_num_iarg_registers) {
766 // Put ptr in register.
767 regs[i].set2(z_iarg_reg[ireg]);
768 ++ireg;
769 } else {
770 // Put ptr on stack and align to 2 slots, because
771 // "64-bit pointers record oop-ishness on 2 aligned adjacent
772 // registers." (see OopFlow::build_oop_map).
773 if (stk & 0x1) { ++stk; }
774 regs[i].set2(VMRegImpl::stack2reg(stk));
775 stk += inc_stk_for_longdouble;
776 }
777 break;
778 case T_FLOAT:
779 if (freg < z_num_farg_registers) {
780 // Put float in register.
781 regs[i].set1(z_farg_reg[freg]);
782 ++freg;
783 } else {
784 // Put float on stack.
785 regs[i].set1(VMRegImpl::stack2reg(stk));
786 stk += inc_stk_for_intfloat;
787 }
788 break;
789 case T_DOUBLE:
790 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
791 if (freg < z_num_farg_registers) {
792 // Put double in register.
793 regs[i].set2(z_farg_reg[freg]);
794 ++freg;
795 } else {
796 // Put double on stack and align to 2 slots.
797 if (stk & 0x1) { ++stk; }
798 regs[i].set2(VMRegImpl::stack2reg(stk));
799 stk += inc_stk_for_longdouble;
800 }
801 break;
802 case T_VOID:
803 assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
804 // Do not count halves.
805 regs[i].set_bad();
806 break;
807 default:
808 ShouldNotReachHere();
809 }
810 }
811 return stk;
812 }
813
814 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
815 VMRegPair *regs,
816 int total_args_passed) {
817
818 // Calling conventions for C runtime calls and calls to JNI native methods.
819 const VMReg z_iarg_reg[5] = {
820 Z_R2->as_VMReg(),
821 Z_R3->as_VMReg(),
822 Z_R4->as_VMReg(),
823 Z_R5->as_VMReg(),
824 Z_R6->as_VMReg()
825 };
826 const VMReg z_farg_reg[4] = {
827 Z_F0->as_VMReg(),
828 Z_F2->as_VMReg(),
829 Z_F4->as_VMReg(),
830 Z_F6->as_VMReg()
831 };
832 const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
833 const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
834
835 // Check calling conventions consistency.
836 assert(Register::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
837 assert(FloatRegister::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
838
839 // Avoid passing C arguments in the wrong stack slots.
840
841 // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy
842 // 2 such slots, like 64 bit values do.
843 const int inc_stk_for_intfloat = 2; // 2 slots for ints and floats.
844 const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
845
846 int i;
847 // Leave room for C-compatible ABI
848 int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size;
849 int freg = 0;
850 int ireg = 0;
851
852 // We put the first 5 arguments into registers and the rest on the
853 // stack. Float arguments are already in their argument registers
854 // due to c2c calling conventions (see calling_convention).
855 for (int i = 0; i < total_args_passed; ++i) {
856 switch (sig_bt[i]) {
857 case T_BOOLEAN:
858 case T_CHAR:
859 case T_BYTE:
860 case T_SHORT:
861 case T_INT:
862 // Fall through, handle as long.
863 case T_LONG:
864 case T_OBJECT:
865 case T_ARRAY:
866 case T_ADDRESS:
867 case T_METADATA:
868 // Oops are already boxed if required (JNI).
869 if (ireg < z_num_iarg_registers) {
870 regs[i].set2(z_iarg_reg[ireg]);
871 ++ireg;
872 } else {
873 regs[i].set2(VMRegImpl::stack2reg(stk));
874 stk += inc_stk_for_longdouble;
875 }
876 break;
877 case T_FLOAT:
878 if (freg < z_num_farg_registers) {
879 regs[i].set1(z_farg_reg[freg]);
880 ++freg;
881 } else {
882 regs[i].set1(VMRegImpl::stack2reg(stk+1));
883 stk += inc_stk_for_intfloat;
884 }
885 break;
886 case T_DOUBLE:
887 assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
888 if (freg < z_num_farg_registers) {
889 regs[i].set2(z_farg_reg[freg]);
890 ++freg;
891 } else {
892 // Put double on stack.
893 regs[i].set2(VMRegImpl::stack2reg(stk));
894 stk += inc_stk_for_longdouble;
895 }
896 break;
897 case T_VOID:
898 // Do not count halves.
899 regs[i].set_bad();
900 break;
901 default:
902 ShouldNotReachHere();
903 }
904 }
905 return align_up(stk, 2);
906 }
907
908 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
909 uint num_bits,
910 uint total_args_passed) {
911 Unimplemented();
912 return 0;
913 }
914
915 ////////////////////////////////////////////////////////////////////////
916 //
917 // Argument shufflers
918 //
919 ////////////////////////////////////////////////////////////////////////
920
921 //----------------------------------------------------------------------
922 // The java_calling_convention describes stack locations as ideal slots on
923 // a frame with no abi restrictions. Since we must observe abi restrictions
924 // (like the placement of the register window) the slots must be biased by
925 // the following value.
926 //----------------------------------------------------------------------
927 static int reg2slot(VMReg r) {
928 return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
929 }
930
931 static int reg2offset(VMReg r) {
932 return reg2slot(r) * VMRegImpl::stack_slot_size;
933 }
934
935 static void verify_oop_args(MacroAssembler *masm,
936 int total_args_passed,
937 const BasicType *sig_bt,
938 const VMRegPair *regs) {
939 if (!VerifyOops) { return; }
940
941 for (int i = 0; i < total_args_passed; i++) {
942 if (is_reference_type(sig_bt[i])) {
943 VMReg r = regs[i].first();
944 assert(r->is_valid(), "bad oop arg");
945
946 if (r->is_stack()) {
947 __ z_lg(Z_R0_scratch,
948 Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
949 __ verify_oop(Z_R0_scratch, FILE_AND_LINE);
950 } else {
951 __ verify_oop(r->as_Register(), FILE_AND_LINE);
952 }
953 }
954 }
955 }
956
957 static void gen_special_dispatch(MacroAssembler *masm,
958 int total_args_passed,
959 vmIntrinsics::ID special_dispatch,
960 const BasicType *sig_bt,
961 const VMRegPair *regs) {
962 verify_oop_args(masm, total_args_passed, sig_bt, regs);
963
964 // Now write the args into the outgoing interpreter space.
965 bool has_receiver = false;
966 Register receiver_reg = noreg;
967 int member_arg_pos = -1;
968 Register member_reg = noreg;
969 int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
970
971 if (ref_kind != 0) {
972 member_arg_pos = total_args_passed - 1; // trailing MemberName argument
973 member_reg = Z_R9; // Known to be free at this point.
974 has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
975 } else if (special_dispatch == vmIntrinsics::_linkToNative) {
976 member_arg_pos = total_args_passed - 1; // trailing NativeEntryPoint argument
977 member_reg = Z_R9; // known to be free at this point
978 } else {
979 guarantee(special_dispatch == vmIntrinsics::_invokeBasic,
980 "special_dispatch=%d", vmIntrinsics::as_int(special_dispatch));
981 has_receiver = true;
982 }
983
984 if (member_reg != noreg) {
985 // Load the member_arg into register, if necessary.
986 assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
987 assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
988
989 VMReg r = regs[member_arg_pos].first();
990 assert(r->is_valid(), "bad member arg");
991
992 if (r->is_stack()) {
993 __ z_lg(member_reg, Address(Z_SP, reg2offset(r)));
994 } else {
995 // No data motion is needed.
996 member_reg = r->as_Register();
997 }
998 }
999
1000 if (has_receiver) {
1001 // Make sure the receiver is loaded into a register.
1002 assert(total_args_passed > 0, "oob");
1003 assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1004
1005 VMReg r = regs[0].first();
1006 assert(r->is_valid(), "bad receiver arg");
1007
1008 if (r->is_stack()) {
1009 // Porting note: This assumes that compiled calling conventions always
1010 // pass the receiver oop in a register. If this is not true on some
1011 // platform, pick a temp and load the receiver from stack.
1012 assert(false, "receiver always in a register");
1013 receiver_reg = Z_R13; // Known to be free at this point.
1014 __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r)));
1015 } else {
1016 // No data motion is needed.
1017 receiver_reg = r->as_Register();
1018 }
1019 }
1020
1021 // Figure out which address we are really jumping to:
1022 MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
1023 receiver_reg, member_reg,
1024 /*for_compiler_entry:*/ true);
1025 }
1026
1027 ////////////////////////////////////////////////////////////////////////
1028 //
1029 // Argument shufflers
1030 //
1031 ////////////////////////////////////////////////////////////////////////
1032
1033 // Is the size of a vector size (in bytes) bigger than a size saved by default?
1034 // 8 bytes registers are saved by default on z/Architecture.
1035 bool SharedRuntime::is_wide_vector(int size) {
1036 // Note, MaxVectorSize == 8/16 on this platform.
1037 assert(size <= (SuperwordUseVX ? 16 : 8), "%d bytes vectors are not supported", size);
1038 return size > 8;
1039 }
1040
1041 //----------------------------------------------------------------------
1042 // An oop arg. Must pass a handle not the oop itself
1043 //----------------------------------------------------------------------
1044 static void object_move(MacroAssembler *masm,
1045 OopMap *map,
1046 int oop_handle_offset,
1047 int framesize_in_slots,
1048 VMRegPair src,
1049 VMRegPair dst,
1050 bool is_receiver,
1051 int *receiver_offset) {
1052 int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1053
1054 assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please.");
1055
1056 // Must pass a handle. First figure out the location we use as a handle.
1057
1058 if (src.first()->is_stack()) {
1059 // Oop is already on the stack, put handle on stack or in register
1060 // If handle will be on the stack, use temp reg to calculate it.
1061 Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1062 Label skip;
1063 int slot_in_older_frame = reg2slot(src.first());
1064
1065 guarantee(!is_receiver, "expecting receiver in register");
1066 map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots));
1067
1068 __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP);
1069 __ load_and_test_long(Z_R0, Address(rHandle));
1070 __ z_brne(skip);
1071 // Use a null handle if oop is null.
1072 __ clear_reg(rHandle, true, false);
1073 __ bind(skip);
1074
1075 // Copy handle to the right place (register or stack).
1076 if (dst.first()->is_stack()) {
1077 __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1078 } // else
1079 // nothing to do. rHandle uses the correct register
1080 } else {
1081 // Oop is passed in an input register. We must flush it to the stack.
1082 const Register rOop = src.first()->as_Register();
1083 const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1084 int oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
1085 int oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size;
1086 NearLabel skip;
1087
1088 if (is_receiver) {
1089 *receiver_offset = oop_slot_offset;
1090 }
1091 map->set_oop(VMRegImpl::stack2reg(oop_slot));
1092
1093 // Flush Oop to stack, calculate handle.
1094 __ z_stg(rOop, oop_slot_offset, Z_SP);
1095 __ add2reg(rHandle, oop_slot_offset, Z_SP);
1096
1097 // If Oop is null, use a null handle.
1098 __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip);
1099 __ clear_reg(rHandle, true, false);
1100 __ bind(skip);
1101
1102 // Copy handle to the right place (register or stack).
1103 if (dst.first()->is_stack()) {
1104 __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1105 } // else
1106 // nothing to do here, since rHandle = dst.first()->as_Register in this case.
1107 }
1108 }
1109
1110 //----------------------------------------------------------------------
1111 // A float arg. May have to do float reg to int reg conversion
1112 //----------------------------------------------------------------------
1113 static void float_move(MacroAssembler *masm,
1114 VMRegPair src,
1115 VMRegPair dst,
1116 int framesize_in_slots,
1117 int workspace_slot_offset) {
1118 int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1119 int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size;
1120
1121 // We do not accept an argument in a VMRegPair to be spread over two slots,
1122 // no matter what physical location (reg or stack) the slots may have.
1123 // We just check for the unaccepted slot to be invalid.
1124 assert(!src.second()->is_valid(), "float in arg spread over two slots");
1125 assert(!dst.second()->is_valid(), "float out arg spread over two slots");
1126
1127 if (src.first()->is_stack()) {
1128 if (dst.first()->is_stack()) {
1129 // stack -> stack. The easiest of the bunch.
1130 __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1131 Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float));
1132 } else {
1133 // stack to reg
1134 Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1135 if (dst.first()->is_Register()) {
1136 __ mem2reg_opt(dst.first()->as_Register(), memaddr, false);
1137 } else {
1138 __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false);
1139 }
1140 }
1141 } else if (src.first()->is_Register()) {
1142 if (dst.first()->is_stack()) {
1143 // gpr -> stack
1144 __ reg2mem_opt(src.first()->as_Register(),
1145 Address(Z_SP, reg2offset(dst.first()), false ));
1146 } else {
1147 if (dst.first()->is_Register()) {
1148 // gpr -> gpr
1149 __ move_reg_if_needed(dst.first()->as_Register(), T_INT,
1150 src.first()->as_Register(), T_INT);
1151 } else {
1152 if (VM_Version::has_FPSupportEnhancements()) {
1153 // gpr -> fpr. Exploit z10 capability of direct transfer.
1154 __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1155 } else {
1156 // gpr -> fpr. Use work space on stack to transfer data.
1157 Address stackaddr(Z_SP, workspace_offset);
1158
1159 __ reg2mem_opt(src.first()->as_Register(), stackaddr, false);
1160 __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false);
1161 }
1162 }
1163 }
1164 } else {
1165 if (dst.first()->is_stack()) {
1166 // fpr -> stack
1167 __ freg2mem_opt(src.first()->as_FloatRegister(),
1168 Address(Z_SP, reg2offset(dst.first())), false);
1169 } else {
1170 if (dst.first()->is_Register()) {
1171 if (VM_Version::has_FPSupportEnhancements()) {
1172 // fpr -> gpr.
1173 __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1174 } else {
1175 // fpr -> gpr. Use work space on stack to transfer data.
1176 Address stackaddr(Z_SP, workspace_offset);
1177
1178 __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false);
1179 __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false);
1180 }
1181 } else {
1182 // fpr -> fpr
1183 __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT,
1184 src.first()->as_FloatRegister(), T_FLOAT);
1185 }
1186 }
1187 }
1188 }
1189
1190 //----------------------------------------------------------------------
1191 // A double arg. May have to do double reg to long reg conversion
1192 //----------------------------------------------------------------------
1193 static void double_move(MacroAssembler *masm,
1194 VMRegPair src,
1195 VMRegPair dst,
1196 int framesize_in_slots,
1197 int workspace_slot_offset) {
1198 int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1199 int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size;
1200
1201 // Since src is always a java calling convention we know that the
1202 // src pair is always either all registers or all stack (and aligned?)
1203
1204 if (src.first()->is_stack()) {
1205 if (dst.first()->is_stack()) {
1206 // stack -> stack. The easiest of the bunch.
1207 __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1208 Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double));
1209 } else {
1210 // stack to reg
1211 Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1212
1213 if (dst.first()->is_Register()) {
1214 __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1215 } else {
1216 __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1217 }
1218 }
1219 } else if (src.first()->is_Register()) {
1220 if (dst.first()->is_stack()) {
1221 // gpr -> stack
1222 __ reg2mem_opt(src.first()->as_Register(),
1223 Address(Z_SP, reg2offset(dst.first())));
1224 } else {
1225 if (dst.first()->is_Register()) {
1226 // gpr -> gpr
1227 __ move_reg_if_needed(dst.first()->as_Register(), T_LONG,
1228 src.first()->as_Register(), T_LONG);
1229 } else {
1230 if (VM_Version::has_FPSupportEnhancements()) {
1231 // gpr -> fpr. Exploit z10 capability of direct transfer.
1232 __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1233 } else {
1234 // gpr -> fpr. Use work space on stack to transfer data.
1235 Address stackaddr(Z_SP, workspace_offset);
1236 __ reg2mem_opt(src.first()->as_Register(), stackaddr);
1237 __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1238 }
1239 }
1240 }
1241 } else {
1242 if (dst.first()->is_stack()) {
1243 // fpr -> stack
1244 __ freg2mem_opt(src.first()->as_FloatRegister(),
1245 Address(Z_SP, reg2offset(dst.first())));
1246 } else {
1247 if (dst.first()->is_Register()) {
1248 if (VM_Version::has_FPSupportEnhancements()) {
1249 // fpr -> gpr. Exploit z10 capability of direct transfer.
1250 __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1251 } else {
1252 // fpr -> gpr. Use work space on stack to transfer data.
1253 Address stackaddr(Z_SP, workspace_offset);
1254
1255 __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr);
1256 __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1257 }
1258 } else {
1259 // fpr -> fpr
1260 // In theory these overlap but the ordering is such that this is likely a nop.
1261 __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE,
1262 src.first()->as_FloatRegister(), T_DOUBLE);
1263 }
1264 }
1265 }
1266 }
1267
1268 //----------------------------------------------------------------------
1269 // A long arg.
1270 //----------------------------------------------------------------------
1271 static void long_move(MacroAssembler *masm,
1272 VMRegPair src,
1273 VMRegPair dst,
1274 int framesize_in_slots) {
1275 int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1276
1277 if (src.first()->is_stack()) {
1278 if (dst.first()->is_stack()) {
1279 // stack -> stack. The easiest of the bunch.
1280 __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1281 Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long));
1282 } else {
1283 // stack to reg
1284 assert(dst.first()->is_Register(), "long dst value must be in GPR");
1285 __ mem2reg_opt(dst.first()->as_Register(),
1286 Address(Z_SP, reg2offset(src.first()) + frame_offset));
1287 }
1288 } else {
1289 // reg to reg
1290 assert(src.first()->is_Register(), "long src value must be in GPR");
1291 if (dst.first()->is_stack()) {
1292 // reg -> stack
1293 __ reg2mem_opt(src.first()->as_Register(),
1294 Address(Z_SP, reg2offset(dst.first())));
1295 } else {
1296 // reg -> reg
1297 assert(dst.first()->is_Register(), "long dst value must be in GPR");
1298 __ move_reg_if_needed(dst.first()->as_Register(),
1299 T_LONG, src.first()->as_Register(), T_LONG);
1300 }
1301 }
1302 }
1303
1304
1305 //----------------------------------------------------------------------
1306 // A int-like arg.
1307 //----------------------------------------------------------------------
1308 // On z/Architecture we will store integer like items to the stack as 64 bit
1309 // items, according to the z/Architecture ABI, even though Java would only store
1310 // 32 bits for a parameter.
1311 // We do sign extension for all base types. That is ok since the only
1312 // unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int.
1313 // Sign extension 32->64 bit will thus not affect the value.
1314 //----------------------------------------------------------------------
1315 static void move32_64(MacroAssembler *masm,
1316 VMRegPair src,
1317 VMRegPair dst,
1318 int framesize_in_slots) {
1319 int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1320
1321 if (src.first()->is_stack()) {
1322 Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1323 if (dst.first()->is_stack()) {
1324 // stack -> stack. MVC not possible due to sign extension.
1325 Address firstaddr(Z_SP, reg2offset(dst.first()));
1326 __ mem2reg_signed_opt(Z_R0_scratch, memaddr);
1327 __ reg2mem_opt(Z_R0_scratch, firstaddr);
1328 } else {
1329 // stack -> reg, sign extended
1330 __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr);
1331 }
1332 } else {
1333 if (dst.first()->is_stack()) {
1334 // reg -> stack, sign extended
1335 Address firstaddr(Z_SP, reg2offset(dst.first()));
1336 __ z_lgfr(src.first()->as_Register(), src.first()->as_Register());
1337 __ reg2mem_opt(src.first()->as_Register(), firstaddr);
1338 } else {
1339 // reg -> reg, sign extended
1340 __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register());
1341 }
1342 }
1343 }
1344
1345 //----------------------------------------------------------------------
1346 // Wrap a JNI call.
1347 //----------------------------------------------------------------------
1348 #undef USE_RESIZE_FRAME
1349 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1350 const methodHandle& method,
1351 int compile_id,
1352 BasicType *in_sig_bt,
1353 VMRegPair *in_regs,
1354 BasicType ret_type) {
1355 int total_in_args = method->size_of_parameters();
1356 if (method->is_method_handle_intrinsic()) {
1357 vmIntrinsics::ID iid = method->intrinsic_id();
1358 intptr_t start = (intptr_t) __ pc();
1359 int vep_offset = ((intptr_t) __ pc()) - start;
1360
1361 gen_special_dispatch(masm, total_in_args,
1362 method->intrinsic_id(), in_sig_bt, in_regs);
1363
1364 int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period.
1365
1366 __ flush();
1367
1368 int stack_slots = SharedRuntime::out_preserve_stack_slots(); // No out slots at all, actually.
1369
1370 return nmethod::new_native_nmethod(method,
1371 compile_id,
1372 masm->code(),
1373 vep_offset,
1374 frame_complete,
1375 stack_slots / VMRegImpl::slots_per_word,
1376 in_ByteSize(-1),
1377 in_ByteSize(-1),
1378 (OopMapSet *) nullptr);
1379 }
1380
1381
1382 ///////////////////////////////////////////////////////////////////////
1383 //
1384 // Precalculations before generating any code
1385 //
1386 ///////////////////////////////////////////////////////////////////////
1387
1388 address native_func = method->native_function();
1389 assert(native_func != nullptr, "must have function");
1390
1391 //---------------------------------------------------------------------
1392 // We have received a description of where all the java args are located
1393 // on entry to the wrapper. We need to convert these args to where
1394 // the jni function will expect them. To figure out where they go
1395 // we convert the java signature to a C signature by inserting
1396 // the hidden arguments as arg[0] and possibly arg[1] (static method).
1397 //
1398 // The first hidden argument arg[0] is a pointer to the JNI environment.
1399 // It is generated for every call.
1400 // The second argument arg[1] to the JNI call, which is hidden for static
1401 // methods, is the boxed lock object. For static calls, the lock object
1402 // is the static method itself. The oop is constructed here. for instance
1403 // calls, the lock is performed on the object itself, the pointer of
1404 // which is passed as the first visible argument.
1405 //---------------------------------------------------------------------
1406
1407 // Additionally, on z/Architecture we must convert integers
1408 // to longs in the C signature. We do this in advance in order to have
1409 // no trouble with indexes into the bt-arrays.
1410 // So convert the signature and registers now, and adjust the total number
1411 // of in-arguments accordingly.
1412 bool method_is_static = method->is_static();
1413 int total_c_args = total_in_args + (method_is_static ? 2 : 1);
1414
1415 BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1416 VMRegPair *out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1417
1418 // Create the signature for the C call:
1419 // 1) add the JNIEnv*
1420 // 2) add the class if the method is static
1421 // 3) copy the rest of the incoming signature (shifted by the number of
1422 // hidden arguments)
1423
1424 int argc = 0;
1425 out_sig_bt[argc++] = T_ADDRESS;
1426 if (method->is_static()) {
1427 out_sig_bt[argc++] = T_OBJECT;
1428 }
1429
1430 for (int i = 0; i < total_in_args; i++) {
1431 out_sig_bt[argc++] = in_sig_bt[i];
1432 }
1433
1434 ///////////////////////////////////////////////////////////////////////
1435 // Now figure out where the args must be stored and how much stack space
1436 // they require (neglecting out_preserve_stack_slots but providing space
1437 // for storing the first five register arguments).
1438 // It's weird, see int_stk_helper.
1439 ///////////////////////////////////////////////////////////////////////
1440
1441 //---------------------------------------------------------------------
1442 // Compute framesize for the wrapper.
1443 //
1444 // - We need to handlize all oops passed in registers.
1445 // - We must create space for them here that is disjoint from the save area.
1446 // - We always just allocate 5 words for storing down these object.
1447 // This allows us to simply record the base and use the Ireg number to
1448 // decide which slot to use.
1449 // - Note that the reg number used to index the stack slot is the inbound
1450 // number, not the outbound number.
1451 // - We must shuffle args to match the native convention,
1452 // and to include var-args space.
1453 //---------------------------------------------------------------------
1454
1455 //---------------------------------------------------------------------
1456 // Calculate the total number of stack slots we will need:
1457 // - 1) abi requirements
1458 // - 2) outgoing args
1459 // - 3) space for inbound oop handle area
1460 // - 4) space for handlizing a klass if static method
1461 // - 5) space for a lock if synchronized method
1462 // - 6) workspace (save rtn value, int<->float reg moves, ...)
1463 // - 7) filler slots for alignment
1464 //---------------------------------------------------------------------
1465 // Here is how the space we have allocated will look like.
1466 // Since we use resize_frame, we do not create a new stack frame,
1467 // but just extend the one we got with our own data area.
1468 //
1469 // If an offset or pointer name points to a separator line, it is
1470 // assumed that addressing with offset 0 selects storage starting
1471 // at the first byte above the separator line.
1472 //
1473 //
1474 // ... ...
1475 // | caller's frame |
1476 // FP-> |---------------------|
1477 // | filler slots, if any|
1478 // 7| #slots == mult of 2 |
1479 // |---------------------|
1480 // | work space |
1481 // 6| 2 slots = 8 bytes |
1482 // |---------------------|
1483 // 5| lock box (if sync) |
1484 // |---------------------| <- lock_slot_offset
1485 // 4| klass (if static) |
1486 // |---------------------| <- klass_slot_offset
1487 // 3| oopHandle area |
1488 // | |
1489 // | |
1490 // |---------------------| <- oop_handle_offset
1491 // 2| outbound memory |
1492 // ... ...
1493 // | based arguments |
1494 // |---------------------|
1495 // | vararg |
1496 // ... ...
1497 // | area |
1498 // |---------------------| <- out_arg_slot_offset
1499 // 1| out_preserved_slots |
1500 // ... ...
1501 // | (z_abi spec) |
1502 // SP-> |---------------------| <- FP_slot_offset (back chain)
1503 // ... ...
1504 //
1505 //---------------------------------------------------------------------
1506
1507 // *_slot_offset indicates offset from SP in #stack slots
1508 // *_offset indicates offset from SP in #bytes
1509
1510 int stack_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args) + // 1+2
1511 SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention
1512
1513 // Now the space for the inbound oop handle area.
1514 int total_save_slots = Register::number_of_arg_registers * VMRegImpl::slots_per_word;
1515
1516 int oop_handle_slot_offset = stack_slots;
1517 stack_slots += total_save_slots; // 3)
1518
1519 int klass_slot_offset = 0;
1520 int klass_offset = -1;
1521 if (method_is_static) { // 4)
1522 klass_slot_offset = stack_slots;
1523 klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
1524 stack_slots += VMRegImpl::slots_per_word;
1525 }
1526
1527 int lock_slot_offset = 0;
1528 int lock_offset = -1;
1529 if (method->is_synchronized()) { // 5)
1530 lock_slot_offset = stack_slots;
1531 lock_offset = lock_slot_offset * VMRegImpl::stack_slot_size;
1532 stack_slots += VMRegImpl::slots_per_word;
1533 }
1534
1535 int workspace_slot_offset= stack_slots; // 6)
1536 stack_slots += 2;
1537
1538 // Now compute actual number of stack words we need.
1539 // Round to align stack properly.
1540 stack_slots = align_up(stack_slots, // 7)
1541 frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
1542 int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
1543
1544
1545 ///////////////////////////////////////////////////////////////////////
1546 // Now we can start generating code
1547 ///////////////////////////////////////////////////////////////////////
1548
1549 unsigned int wrapper_CodeStart = __ offset();
1550 unsigned int wrapper_UEPStart;
1551 unsigned int wrapper_VEPStart;
1552 unsigned int wrapper_FrameDone;
1553 unsigned int wrapper_CRegsSet;
1554 Label handle_pending_exception;
1555
1556 //---------------------------------------------------------------------
1557 // Unverified entry point (UEP)
1558 //---------------------------------------------------------------------
1559
1560 // check ic: object class <-> cached class
1561 if (!method_is_static) {
1562 wrapper_UEPStart = __ ic_check(CodeEntryAlignment /* end_alignment */);
1563 }
1564
1565 //---------------------------------------------------------------------
1566 // Verified entry point (VEP)
1567 //---------------------------------------------------------------------
1568 wrapper_VEPStart = __ offset();
1569
1570 if (method->needs_clinit_barrier()) {
1571 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1572 Label L_skip_barrier;
1573 Register klass = Z_R1_scratch;
1574 // Notify OOP recorder (don't need the relocation)
1575 AddressLiteral md = __ constant_metadata_address(method->method_holder());
1576 __ load_const_optimized(klass, md.value());
1577 __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/);
1578
1579 __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub());
1580 __ z_br(klass);
1581
1582 __ bind(L_skip_barrier);
1583 }
1584
1585 __ save_return_pc();
1586 __ generate_stack_overflow_check(frame_size_in_bytes); // Check before creating frame.
1587 #ifndef USE_RESIZE_FRAME
1588 __ push_frame(frame_size_in_bytes); // Create a new frame for the wrapper.
1589 #else
1590 __ resize_frame(-frame_size_in_bytes, Z_R0_scratch); // No new frame for the wrapper.
1591 // Just resize the existing one.
1592 #endif
1593
1594 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1595 bs->nmethod_entry_barrier(masm);
1596
1597 wrapper_FrameDone = __ offset();
1598
1599 // Native nmethod wrappers never take possession of the oop arguments.
1600 // So the caller will gc the arguments.
1601 // The only thing we need an oopMap for is if the call is static.
1602 //
1603 // An OopMap for lock (and class if static), and one for the VM call itself
1604 OopMapSet *oop_maps = new OopMapSet();
1605 OopMap *map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1606
1607 //////////////////////////////////////////////////////////////////////
1608 //
1609 // The Grand Shuffle
1610 //
1611 //////////////////////////////////////////////////////////////////////
1612 //
1613 // We immediately shuffle the arguments so that for any vm call we have
1614 // to make from here on out (sync slow path, jvmti, etc.) we will have
1615 // captured the oops from our caller and have a valid oopMap for them.
1616 //
1617 //--------------------------------------------------------------------
1618 // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1619 // (derived from JavaThread* which is in Z_thread) and, if static,
1620 // the class mirror instead of a receiver. This pretty much guarantees that
1621 // register layout will not match. We ignore these extra arguments during
1622 // the shuffle. The shuffle is described by the two calling convention
1623 // vectors we have in our possession. We simply walk the java vector to
1624 // get the source locations and the c vector to get the destinations.
1625 //
1626 // This is a trick. We double the stack slots so we can claim
1627 // the oops in the caller's frame. Since we are sure to have
1628 // more args than the caller doubling is enough to make
1629 // sure we can capture all the incoming oop args from the caller.
1630 //--------------------------------------------------------------------
1631
1632 // Record sp-based slot for receiver on stack for non-static methods.
1633 int receiver_offset = -1;
1634
1635 //--------------------------------------------------------------------
1636 // We move the arguments backwards because the floating point registers
1637 // destination will always be to a register with a greater or equal
1638 // register number or the stack.
1639 // jix is the index of the incoming Java arguments.
1640 // cix is the index of the outgoing C arguments.
1641 //--------------------------------------------------------------------
1642
1643 #ifdef ASSERT
1644 bool reg_destroyed[Register::number_of_registers];
1645 bool freg_destroyed[FloatRegister::number_of_registers];
1646 for (int r = 0; r < Register::number_of_registers; r++) {
1647 reg_destroyed[r] = false;
1648 }
1649 for (int f = 0; f < FloatRegister::number_of_registers; f++) {
1650 freg_destroyed[f] = false;
1651 }
1652 #endif // ASSERT
1653
1654 for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) {
1655 #ifdef ASSERT
1656 if (in_regs[jix].first()->is_Register()) {
1657 assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!");
1658 } else {
1659 if (in_regs[jix].first()->is_FloatRegister()) {
1660 assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!");
1661 }
1662 }
1663 if (out_regs[cix].first()->is_Register()) {
1664 reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true;
1665 } else {
1666 if (out_regs[cix].first()->is_FloatRegister()) {
1667 freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true;
1668 }
1669 }
1670 #endif // ASSERT
1671
1672 switch (in_sig_bt[jix]) {
1673 // Due to casting, small integers should only occur in pairs with type T_LONG.
1674 case T_BOOLEAN:
1675 case T_CHAR:
1676 case T_BYTE:
1677 case T_SHORT:
1678 case T_INT:
1679 // Move int and do sign extension.
1680 move32_64(masm, in_regs[jix], out_regs[cix], stack_slots);
1681 break;
1682
1683 case T_LONG :
1684 long_move(masm, in_regs[jix], out_regs[cix], stack_slots);
1685 break;
1686
1687 case T_ARRAY:
1688 case T_OBJECT:
1689 object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix],
1690 ((jix == 0) && (!method_is_static)),
1691 &receiver_offset);
1692 break;
1693 case T_VOID:
1694 break;
1695
1696 case T_FLOAT:
1697 float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1698 break;
1699
1700 case T_DOUBLE:
1701 assert(jix+1 < total_in_args && in_sig_bt[jix+1] == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list");
1702 double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1703 break;
1704
1705 case T_ADDRESS:
1706 assert(false, "found T_ADDRESS in java args");
1707 break;
1708
1709 default:
1710 ShouldNotReachHere();
1711 }
1712 }
1713
1714 //--------------------------------------------------------------------
1715 // Pre-load a static method's oop into ARG2.
1716 // Used both by locking code and the normal JNI call code.
1717 //--------------------------------------------------------------------
1718 if (method_is_static) {
1719 __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2);
1720
1721 // Now handlize the static class mirror in ARG2. It's known not-null.
1722 __ z_stg(Z_ARG2, klass_offset, Z_SP);
1723 map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1724 __ add2reg(Z_ARG2, klass_offset, Z_SP);
1725 }
1726
1727 // Get JNIEnv* which is first argument to native.
1728 __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);
1729
1730 //////////////////////////////////////////////////////////////////////
1731 // We have all of the arguments setup at this point.
1732 // We MUST NOT touch any outgoing regs from this point on.
1733 // So if we must call out we must push a new frame.
1734 //////////////////////////////////////////////////////////////////////
1735
1736
1737 // Calc the current pc into Z_R10 and into wrapper_CRegsSet.
1738 // Both values represent the same position.
1739 __ get_PC(Z_R10); // PC into register
1740 wrapper_CRegsSet = __ offset(); // and into into variable.
1741
1742 // Z_R10 now has the pc loaded that we will use when we finally call to native.
1743
1744 // We use the same pc/oopMap repeatedly when we call out.
1745 oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map);
1746
1747 // Lock a synchronized method.
1748
1749 if (method->is_synchronized()) {
1750
1751 // ATTENTION: args and Z_R10 must be preserved.
1752 Register r_oop = Z_R11;
1753 Register r_box = Z_R12;
1754 Register r_tmp1 = Z_R13;
1755 Register r_tmp2 = Z_R7;
1756 Label done;
1757
1758 // Load the oop for the object or class. R_carg2_classorobject contains
1759 // either the handlized oop from the incoming arguments or the handlized
1760 // class mirror (if the method is static).
1761 __ z_lg(r_oop, 0, Z_ARG2);
1762
1763 lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
1764 // Get the lock box slot's address.
1765 __ add2reg(r_box, lock_offset, Z_SP);
1766
1767 // Try fastpath for locking.
1768 // Fast_lock kills r_temp_1, r_temp_2.
1769 __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
1770 __ z_bre(done);
1771
1772 //-------------------------------------------------------------------------
1773 // None of the above fast optimizations worked so we have to get into the
1774 // slow case of monitor enter. Inline a special case of call_VM that
1775 // disallows any pending_exception.
1776 //-------------------------------------------------------------------------
1777
1778 Register oldSP = Z_R11;
1779
1780 __ z_lgr(oldSP, Z_SP);
1781
1782 RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
1783
1784 // Prepare arguments for call.
1785 __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object.
1786 __ add2reg(Z_ARG2, lock_offset, oldSP);
1787 __ z_lgr(Z_ARG3, Z_thread);
1788
1789 __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */);
1790
1791 // Do the call.
1792 __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
1793 __ call(Z_R1_scratch);
1794
1795 __ reset_last_Java_frame();
1796
1797 RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
1798 #ifdef ASSERT
1799 { Label L;
1800 __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
1801 __ z_bre(L);
1802 __ stop("no pending exception allowed on exit from IR::monitorenter");
1803 __ bind(L);
1804 }
1805 #endif
1806 __ bind(done);
1807 } // lock for synchronized methods
1808
1809
1810 //////////////////////////////////////////////////////////////////////
1811 // Finally just about ready to make the JNI call.
1812 //////////////////////////////////////////////////////////////////////
1813
1814 // Use that pc we placed in Z_R10 a while back as the current frame anchor.
1815 __ set_last_Java_frame(Z_SP, Z_R10);
1816
1817 // Transition from _thread_in_Java to _thread_in_native.
1818 __ set_thread_state(_thread_in_native);
1819
1820 //////////////////////////////////////////////////////////////////////
1821 // This is the JNI call.
1822 //////////////////////////////////////////////////////////////////////
1823
1824 __ call_c(native_func);
1825
1826
1827 //////////////////////////////////////////////////////////////////////
1828 // We have survived the call once we reach here.
1829 //////////////////////////////////////////////////////////////////////
1830
1831
1832 //--------------------------------------------------------------------
1833 // Unpack native results.
1834 //--------------------------------------------------------------------
1835 // For int-types, we do any needed sign-extension required.
1836 // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2
1837 // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for
1838 // blocking or unlocking.
1839 // An OOP result (handle) is done specially in the slow-path code.
1840 //--------------------------------------------------------------------
1841 switch (ret_type) {
1842 case T_VOID: break; // Nothing to do!
1843 case T_FLOAT: break; // Got it where we want it (unless slow-path)
1844 case T_DOUBLE: break; // Got it where we want it (unless slow-path)
1845 case T_LONG: break; // Got it where we want it (unless slow-path)
1846 case T_OBJECT: break; // Really a handle.
1847 // Cannot de-handlize until after reclaiming jvm_lock.
1848 case T_ARRAY: break;
1849
1850 case T_BOOLEAN: // 0 -> false(0); !0 -> true(1)
1851 __ z_lngfr(Z_RET, Z_RET); // Force sign bit on except for zero.
1852 __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos.
1853 break;
1854 case T_BYTE: __ z_lgbr(Z_RET, Z_RET); break; // sign extension
1855 case T_CHAR: __ z_llghr(Z_RET, Z_RET); break; // unsigned result
1856 case T_SHORT: __ z_lghr(Z_RET, Z_RET); break; // sign extension
1857 case T_INT: __ z_lgfr(Z_RET, Z_RET); break; // sign-extend for beauty.
1858
1859 default:
1860 ShouldNotReachHere();
1861 break;
1862 }
1863
1864 // Switch thread to "native transition" state before reading the synchronization state.
1865 // This additional state is necessary because reading and testing the synchronization
1866 // state is not atomic w.r.t. GC, as this scenario demonstrates:
1867 // - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1868 // - VM thread changes sync state to synchronizing and suspends threads for GC.
1869 // - Thread A is resumed to finish this native method, but doesn't block here since it
1870 // didn't see any synchronization in progress, and escapes.
1871
1872 // Transition from _thread_in_native to _thread_in_native_trans.
1873 __ set_thread_state(_thread_in_native_trans);
1874
1875 // Safepoint synchronization
1876 //--------------------------------------------------------------------
1877 // Must we block?
1878 //--------------------------------------------------------------------
1879 // Block, if necessary, before resuming in _thread_in_Java state.
1880 // In order for GC to work, don't clear the last_Java_sp until after blocking.
1881 //--------------------------------------------------------------------
1882 {
1883 Label no_block, sync;
1884
1885 save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg.
1886
1887 // Force this write out before the read below.
1888 if (!UseSystemMemoryBarrier) {
1889 __ z_fence();
1890 }
1891
1892 __ safepoint_poll(sync, Z_R1);
1893
1894 __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
1895 __ z_bre(no_block);
1896
1897 // Block. Save any potential method result value before the operation and
1898 // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
1899 // lets us share the oopMap we used when we went native rather than create
1900 // a distinct one for this pc.
1901 //
1902 __ bind(sync);
1903 __ z_acquire();
1904
1905 address entry_point = CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
1906
1907 __ call_VM_leaf(entry_point, Z_thread);
1908
1909 __ bind(no_block);
1910 restore_native_result(masm, ret_type, workspace_slot_offset);
1911 }
1912
1913 //--------------------------------------------------------------------
1914 // Thread state is thread_in_native_trans. Any safepoint blocking has
1915 // already happened so we can now change state to _thread_in_Java.
1916 //--------------------------------------------------------------------
1917 // Transition from _thread_in_native_trans to _thread_in_Java.
1918 __ set_thread_state(_thread_in_Java);
1919
1920 //--------------------------------------------------------------------
1921 // Reguard any pages if necessary.
1922 // Protect native result from being destroyed.
1923 //--------------------------------------------------------------------
1924
1925 Label no_reguard;
1926
1927 __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(StackOverflow::StackGuardState) - 1)),
1928 StackOverflow::stack_guard_yellow_reserved_disabled);
1929
1930 __ z_bre(no_reguard);
1931
1932 save_native_result(masm, ret_type, workspace_slot_offset);
1933 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method);
1934 restore_native_result(masm, ret_type, workspace_slot_offset);
1935
1936 __ bind(no_reguard);
1937
1938
1939 // Synchronized methods (slow path only)
1940 // No pending exceptions for now.
1941 //--------------------------------------------------------------------
1942 // Handle possibly pending exception (will unlock if necessary).
1943 // Native result is, if any is live, in Z_FRES or Z_RES.
1944 //--------------------------------------------------------------------
1945 // Unlock
1946 //--------------------------------------------------------------------
1947 if (method->is_synchronized()) {
1948 const Register r_oop = Z_R11;
1949 const Register r_box = Z_R12;
1950 const Register r_tmp1 = Z_R13;
1951 const Register r_tmp2 = Z_R7;
1952 Label done;
1953
1954 // Get unboxed oop of class mirror or object ...
1955 int offset = method_is_static ? klass_offset : receiver_offset;
1956
1957 assert(offset != -1, "");
1958 __ z_lg(r_oop, offset, Z_SP);
1959
1960 // ... and address of lock object box.
1961 __ add2reg(r_box, lock_offset, Z_SP);
1962
1963 // Try fastpath for unlocking.
1964 // Fast_unlock kills r_tmp1, r_tmp2.
1965 __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2);
1966 __ z_bre(done);
1967
1968 // Slow path for unlocking.
1969 // Save and restore any potential method result value around the unlocking operation.
1970 const Register R_exc = Z_R11;
1971
1972 save_native_result(masm, ret_type, workspace_slot_offset);
1973
1974 // Must save pending exception around the slow-path VM call. Since it's a
1975 // leaf call, the pending exception (if any) can be kept in a register.
1976 __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
1977 assert(R_exc->is_nonvolatile(), "exception register must be non-volatile");
1978
1979 // Must clear pending-exception before re-entering the VM. Since this is
1980 // a leaf call, pending-exception-oop can be safely kept in a register.
1981 __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t));
1982
1983 // Inline a special case of call_VM that disallows any pending_exception.
1984
1985 // Get locked oop from the handle we passed to jni.
1986 __ z_lg(Z_ARG1, offset, Z_SP);
1987 __ add2reg(Z_ARG2, lock_offset, Z_SP);
1988 __ z_lgr(Z_ARG3, Z_thread);
1989
1990 __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1991
1992 __ call(Z_R1_scratch);
1993
1994 #ifdef ASSERT
1995 {
1996 Label L;
1997 __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
1998 __ z_bre(L);
1999 __ stop("no pending exception allowed on exit from IR::monitorexit");
2000 __ bind(L);
2001 }
2002 #endif
2003
2004 // Check_forward_pending_exception jump to forward_exception if any pending
2005 // exception is set. The forward_exception routine expects to see the
2006 // exception in pending_exception and not in a register. Kind of clumsy,
2007 // since all folks who branch to forward_exception must have tested
2008 // pending_exception first and hence have it in a register already.
2009 __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2010 restore_native_result(masm, ret_type, workspace_slot_offset);
2011 __ z_bru(done);
2012 __ z_illtrap(0x66);
2013
2014 __ bind(done);
2015 }
2016
2017
2018 //--------------------------------------------------------------------
2019 // Clear "last Java frame" SP and PC.
2020 //--------------------------------------------------------------------
2021
2022 __ reset_last_Java_frame();
2023
2024 // Unpack oop result, e.g. JNIHandles::resolve result.
2025 if (is_reference_type(ret_type)) {
2026 __ resolve_jobject(Z_RET, /* tmp1 */ Z_R13, /* tmp2 */ Z_R7);
2027 }
2028
2029 if (CheckJNICalls) {
2030 // clear_pending_jni_exception_check
2031 __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
2032 }
2033
2034 // Reset handle block.
2035 __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset()));
2036 __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset()), 4);
2037
2038 // Check for pending exceptions.
2039 __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2040 __ z_brne(handle_pending_exception);
2041
2042
2043 //////////////////////////////////////////////////////////////////////
2044 // Return
2045 //////////////////////////////////////////////////////////////////////
2046
2047
2048 #ifndef USE_RESIZE_FRAME
2049 __ pop_frame(); // Pop wrapper frame.
2050 #else
2051 __ resize_frame(frame_size_in_bytes, Z_R0_scratch); // Revert stack extension.
2052 #endif
2053 __ restore_return_pc(); // This is the way back to the caller.
2054 __ z_br(Z_R14);
2055
2056
2057 //////////////////////////////////////////////////////////////////////
2058 // Out-of-line calls to the runtime.
2059 //////////////////////////////////////////////////////////////////////
2060
2061
2062 //---------------------------------------------------------------------
2063 // Handler for pending exceptions (out-of-line).
2064 //---------------------------------------------------------------------
2065 // Since this is a native call, we know the proper exception handler
2066 // is the empty function. We just pop this frame and then jump to
2067 // forward_exception_entry. Z_R14 will contain the native caller's
2068 // return PC.
2069 __ bind(handle_pending_exception);
2070 __ pop_frame();
2071 __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2072 __ restore_return_pc();
2073 __ z_br(Z_R1_scratch);
2074
2075 __ flush();
2076 //////////////////////////////////////////////////////////////////////
2077 // end of code generation
2078 //////////////////////////////////////////////////////////////////////
2079
2080
2081 nmethod *nm = nmethod::new_native_nmethod(method,
2082 compile_id,
2083 masm->code(),
2084 (int)(wrapper_VEPStart-wrapper_CodeStart),
2085 (int)(wrapper_FrameDone-wrapper_CodeStart),
2086 stack_slots / VMRegImpl::slots_per_word,
2087 (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2088 in_ByteSize(lock_offset),
2089 oop_maps);
2090
2091 return nm;
2092 }
2093
2094 static address gen_c2i_adapter(MacroAssembler *masm,
2095 int comp_args_on_stack,
2096 const GrowableArray<SigEntry>* sig,
2097 const VMRegPair *regs,
2098 Label &skip_fixup) {
2099 // Before we get into the guts of the C2I adapter, see if we should be here
2100 // at all. We've come from compiled code and are attempting to jump to the
2101 // interpreter, which means the caller made a static call to get here
2102 // (vcalls always get a compiled target if there is one). Check for a
2103 // compiled target. If there is one, we need to patch the caller's call.
2104
2105 // These two defs MUST MATCH code in gen_i2c2i_adapter!
2106 const Register ientry = Z_R11;
2107 const Register code = Z_R11;
2108
2109 address c2i_entrypoint;
2110 Label patch_callsite;
2111
2112 // Regular (verified) c2i entry point.
2113 c2i_entrypoint = __ pc();
2114
2115 // Call patching needed?
2116 __ load_and_test_long(Z_R0_scratch, method_(code));
2117 __ z_lg(ientry, method_(interpreter_entry)); // Preload interpreter entry (also if patching).
2118 __ z_brne(patch_callsite); // Patch required if code isn't null (compiled target exists).
2119
2120 __ bind(skip_fixup); // Return point from patch_callsite.
2121
2122 // Since all args are passed on the stack, total_args_passed*wordSize is the
2123 // space we need. We need ABI scratch area but we use the caller's since
2124 // it has already been allocated.
2125 int total_args_passed = sig->length();
2126 const int abi_scratch = frame::z_top_ijava_frame_abi_size;
2127 int extraspace = align_up(total_args_passed, 2)*wordSize + abi_scratch;
2128 Register sender_SP = Z_R10;
2129 Register value = Z_R12;
2130
2131 // Remember the senderSP so we can pop the interpreter arguments off of the stack.
2132 // In addition, template interpreter expects initial_caller_sp in Z_R10.
2133 __ z_lgr(sender_SP, Z_SP);
2134
2135 // This should always fit in 14 bit immediate.
2136 __ resize_frame(-extraspace, Z_R0_scratch);
2137
2138 // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial
2139 // args. This essentially moves the callers ABI scratch area from the top to the
2140 // bottom of the arg area.
2141
2142 int st_off = extraspace - wordSize;
2143
2144 // Now write the args into the outgoing interpreter space.
2145 for (int i = 0; i < total_args_passed; i++) {
2146 BasicType bt = sig->at(i)._bt;
2147
2148 VMReg r_1 = regs[i].first();
2149 VMReg r_2 = regs[i].second();
2150 if (!r_1->is_valid()) {
2151 assert(!r_2->is_valid(), "");
2152 continue;
2153 }
2154 if (r_1->is_stack()) {
2155 // The calling convention produces OptoRegs that ignore the preserve area (abi scratch).
2156 // We must account for it here.
2157 int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2158
2159 if (!r_2->is_valid()) {
2160 __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2161 } else {
2162 // longs are given 2 64-bit slots in the interpreter,
2163 // but the data is passed in only 1 slot.
2164 if (bt == T_LONG || bt == T_DOUBLE) {
2165 #ifdef ASSERT
2166 __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2167 #endif
2168 st_off -= wordSize;
2169 }
2170 __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2171 }
2172 } else {
2173 if (r_1->is_Register()) {
2174 if (!r_2->is_valid()) {
2175 __ z_st(r_1->as_Register(), st_off, Z_SP);
2176 } else {
2177 // longs are given 2 64-bit slots in the interpreter, but the
2178 // data is passed in only 1 slot.
2179 if (bt == T_LONG || bt == T_DOUBLE) {
2180 #ifdef ASSERT
2181 __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2182 #endif
2183 st_off -= wordSize;
2184 }
2185 __ z_stg(r_1->as_Register(), st_off, Z_SP);
2186 }
2187 } else {
2188 assert(r_1->is_FloatRegister(), "");
2189 if (!r_2->is_valid()) {
2190 __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP);
2191 } else {
2192 // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
2193 // data is passed in only 1 slot.
2194 // One of these should get known junk...
2195 #ifdef ASSERT
2196 __ z_lzdr(Z_F1);
2197 __ z_std(Z_F1, st_off, Z_SP);
2198 #endif
2199 st_off-=wordSize;
2200 __ z_std(r_1->as_FloatRegister(), st_off, Z_SP);
2201 }
2202 }
2203 }
2204 st_off -= wordSize;
2205 }
2206
2207
2208 // Jump to the interpreter just as if interpreter was doing it.
2209 __ add2reg(Z_esp, st_off, Z_SP);
2210
2211 // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10.
2212 __ z_br(ientry);
2213
2214
2215 // Prevent illegal entry to out-of-line code.
2216 __ z_illtrap(0x22);
2217
2218 // Generate out-of-line runtime call to patch caller,
2219 // then continue as interpreted.
2220
2221 // IF you lose the race you go interpreted.
2222 // We don't see any possible endless c2i -> i2c -> c2i ...
2223 // transitions no matter how rare.
2224 __ bind(patch_callsite);
2225
2226 RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2227 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14);
2228 RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2229 __ z_bru(skip_fixup);
2230
2231 // end of out-of-line code
2232
2233 return c2i_entrypoint;
2234 }
2235
2236 // On entry, the following registers are set
2237 //
2238 // Z_thread r8 - JavaThread*
2239 // Z_method r9 - callee's method (method to be invoked)
2240 // Z_esp r7 - operand (or expression) stack pointer of caller. one slot above last arg.
2241 // Z_SP r15 - SP prepared by call stub such that caller's outgoing args are near top
2242 //
2243 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
2244 int comp_args_on_stack,
2245 const GrowableArray<SigEntry>* sig,
2246 const VMRegPair *regs) {
2247 const Register value = Z_R12;
2248 const Register ld_ptr= Z_esp;
2249 int total_args_passed = sig->length();
2250
2251 int ld_offset = total_args_passed * wordSize;
2252
2253 // Cut-out for having no stack args.
2254 if (comp_args_on_stack) {
2255 // Sig words on the stack are greater than VMRegImpl::stack0. Those in
2256 // registers are below. By subtracting stack0, we either get a negative
2257 // number (all values in registers) or the maximum stack slot accessed.
2258 // Convert VMRegImpl (4 byte) stack slots to words.
2259 int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
2260 // Round up to miminum stack alignment, in wordSize
2261 comp_words_on_stack = align_up(comp_words_on_stack, 2);
2262
2263 __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch);
2264 }
2265
2266 // Now generate the shuffle code. Pick up all register args and move the
2267 // rest through register value=Z_R12.
2268 for (int i = 0; i < total_args_passed; i++) {
2269 BasicType bt = sig->at(i)._bt;
2270 if (bt == T_VOID) {
2271 assert(i > 0 && (sig->at(i - 1)._bt == T_LONG || sig->at(i - 1)._bt == T_DOUBLE), "missing half");
2272 continue;
2273 }
2274
2275 // Pick up 0, 1 or 2 words from ld_ptr.
2276 assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
2277 "scrambled load targets?");
2278 VMReg r_1 = regs[i].first();
2279 VMReg r_2 = regs[i].second();
2280 if (!r_1->is_valid()) {
2281 assert(!r_2->is_valid(), "");
2282 continue;
2283 }
2284 if (r_1->is_FloatRegister()) {
2285 if (!r_2->is_valid()) {
2286 __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr);
2287 ld_offset-=wordSize;
2288 } else {
2289 // Skip the unused interpreter slot.
2290 __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr);
2291 ld_offset -= 2 * wordSize;
2292 }
2293 } else {
2294 if (r_1->is_stack()) {
2295 // Must do a memory to memory move.
2296 int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2297
2298 if (!r_2->is_valid()) {
2299 __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2300 } else {
2301 // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2302 // data is passed in only 1 slot.
2303 if (bt == T_LONG || bt == T_DOUBLE) {
2304 ld_offset -= wordSize;
2305 }
2306 __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2307 }
2308 } else {
2309 if (!r_2->is_valid()) {
2310 // Not sure we need to do this but it shouldn't hurt.
2311 if (is_reference_type(bt) || bt == T_ADDRESS) {
2312 __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2313 } else {
2314 __ z_l(r_1->as_Register(), ld_offset, ld_ptr);
2315 }
2316 } else {
2317 // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2318 // data is passed in only 1 slot.
2319 if (bt == T_LONG || bt == T_DOUBLE) {
2320 ld_offset -= wordSize;
2321 }
2322 __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2323 }
2324 }
2325 ld_offset -= wordSize;
2326 }
2327 }
2328
2329 // Jump to the compiled code just as if compiled code was doing it.
2330 // load target address from method:
2331 __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));
2332
2333 // Store method into thread->callee_target.
2334 // 6243940: We might end up in handle_wrong_method if
2335 // the callee is deoptimized as we race thru here. If that
2336 // happens we don't want to take a safepoint because the
2337 // caller frame will look interpreted and arguments are now
2338 // "compiled" so it is much better to make this transition
2339 // invisible to the stack walking code. Unfortunately, if
2340 // we try and find the callee by normal means a safepoint
2341 // is possible. So we stash the desired callee in the thread
2342 // and the vm will find it there should this case occur.
2343 __ z_stg(Z_method, thread_(callee_target));
2344
2345 __ z_br(Z_R1_scratch);
2346 }
2347
2348 void SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm,
2349 int comp_args_on_stack,
2350 const GrowableArray<SigEntry>* sig,
2351 const VMRegPair* regs,
2352 const GrowableArray<SigEntry>* sig_cc,
2353 const VMRegPair* regs_cc,
2354 const GrowableArray<SigEntry>* sig_cc_ro,
2355 const VMRegPair* regs_cc_ro,
2356 address entry_address[AdapterBlob::ENTRY_COUNT],
2357 AdapterBlob*& new_adapter,
2358 bool allocate_code_blob) {
2359 __ align(CodeEntryAlignment);
2360 entry_address[AdapterBlob::I2C] = __ pc();
2361 gen_i2c_adapter(masm, comp_args_on_stack, sig, regs);
2362
2363 Label skip_fixup;
2364 {
2365 Label ic_miss;
2366
2367 // Out-of-line call to ic_miss handler.
2368 __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch);
2369
2370 // Unverified Entry Point UEP
2371 __ align(CodeEntryAlignment);
2372 entry_address[AdapterBlob::C2I_Unverified] = __ pc();
2373
2374 __ ic_check(2);
2375 __ z_lg(Z_method, Address(Z_inline_cache, CompiledICData::speculated_method_offset()));
2376 // This def MUST MATCH code in gen_c2i_adapter!
2377 const Register code = Z_R11;
2378
2379 __ load_and_test_long(Z_R0, method_(code));
2380 __ z_brne(ic_miss); // Cache miss: call runtime to handle this.
2381
2382 // Fallthru to VEP. Duplicate LTG, but saved taken branch.
2383 }
2384
2385 entry_address[AdapterBlob::C2I] = __ pc();
2386
2387 // Class initialization barrier for static methods
2388 entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr;
2389 assert(VM_Version::supports_fast_class_init_checks(), "sanity");
2390 Label L_skip_barrier;
2391
2392 // Bypass the barrier for non-static methods
2393 __ testbit_ushort(Address(Z_method, Method::access_flags_offset()), JVM_ACC_STATIC_BIT);
2394 __ z_bfalse(L_skip_barrier); // non-static
2395
2396 Register klass = Z_R11;
2397 __ load_method_holder(klass, Z_method);
2398 __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/);
2399
2400 __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub());
2401 __ z_br(klass);
2402
2403 __ bind(L_skip_barrier);
2404 entry_address[AdapterBlob::C2I_No_Clinit_Check] = __ pc();
2405
2406 gen_c2i_adapter(masm, comp_args_on_stack, sig, regs, skip_fixup);
2407 return;
2408 }
2409
2410 // This function returns the adjust size (in number of words) to a c2i adapter
2411 // activation for use during deoptimization.
2412 //
2413 // Actually only compiled frames need to be adjusted, but it
2414 // doesn't harm to adjust entry and interpreter frames, too.
2415 //
2416 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2417 assert(callee_locals >= callee_parameters,
2418 "test and remove; got more parms than locals");
2419 // Handle the abi adjustment here instead of doing it in push_skeleton_frames.
2420 return (callee_locals - callee_parameters) * Interpreter::stackElementWords +
2421 frame::z_parent_ijava_frame_abi_size / BytesPerWord;
2422 }
2423
2424 uint SharedRuntime::in_preserve_stack_slots() {
2425 return frame::jit_in_preserve_size_in_4_byte_units;
2426 }
2427
2428 uint SharedRuntime::out_preserve_stack_slots() {
2429 return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size;
2430 }
2431
2432 VMReg SharedRuntime::thread_register() {
2433 Unimplemented();
2434 return nullptr;
2435 }
2436
2437 //
2438 // Frame generation for deopt and uncommon trap blobs.
2439 //
2440 static void push_skeleton_frame(MacroAssembler* masm,
2441 /* Unchanged */
2442 Register frame_sizes_reg,
2443 Register pcs_reg,
2444 /* Invalidate */
2445 Register frame_size_reg,
2446 Register pc_reg) {
2447 BLOCK_COMMENT(" push_skeleton_frame {");
2448 __ z_lg(pc_reg, 0, pcs_reg);
2449 __ z_lg(frame_size_reg, 0, frame_sizes_reg);
2450 __ z_stg(pc_reg, _z_abi(return_pc), Z_SP);
2451 Register fp = pc_reg;
2452 __ push_frame(frame_size_reg, fp);
2453 #ifdef ASSERT
2454 // The magic is required for successful walking skeletal frames.
2455 __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number);
2456 __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp);
2457 // Fill other slots that are supposedly not necessary with eye catchers.
2458 __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1);
2459 __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp);
2460 // The sender_sp of the bottom frame is set before pushing it.
2461 // The sender_sp of non bottom frames is their caller's top_frame_sp, which
2462 // is unknown here. Luckily it is not needed before filling the frame in
2463 // layout_activation(), we assert this by setting an eye catcher (see
2464 // comments on sender_sp in frame_s390.hpp).
2465 __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP);
2466 #endif // ASSERT
2467 BLOCK_COMMENT(" } push_skeleton_frame");
2468 }
2469
2470 // Loop through the UnrollBlock info and create new frames.
2471 static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2472 /* read */
2473 Register unroll_block_reg,
2474 /* invalidate */
2475 Register frame_sizes_reg,
2476 Register number_of_frames_reg,
2477 Register pcs_reg,
2478 Register tmp1,
2479 Register tmp2) {
2480 BLOCK_COMMENT("push_skeleton_frames {");
2481 // _number_of_frames is of type int (deoptimization.hpp).
2482 __ z_lgf(number_of_frames_reg,
2483 Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset()));
2484 __ z_lg(pcs_reg,
2485 Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset()));
2486 __ z_lg(frame_sizes_reg,
2487 Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset()));
2488
2489 // stack: (caller_of_deoptee, ...).
2490
2491 // If caller_of_deoptee is a compiled frame, then we extend it to make
2492 // room for the callee's locals and the frame::z_parent_ijava_frame_abi.
2493 // See also Deoptimization::last_frame_adjust() above.
2494 // Note: entry and interpreted frames are adjusted, too. But this doesn't harm.
2495
2496 __ z_lgf(Z_R1_scratch,
2497 Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset()));
2498 __ z_lgr(tmp1, Z_SP); // Save the sender sp before extending the frame.
2499 __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/);
2500 // The oldest skeletal frame requires a valid sender_sp to make it walkable
2501 // (it is required to find the original pc of caller_of_deoptee if it is marked
2502 // for deoptimization - see nmethod::orig_pc_addr()).
2503 __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP);
2504
2505 // Now push the new interpreter frames.
2506 Label loop, loop_entry;
2507
2508 // Make sure that there is at least one entry in the array.
2509 DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg));
2510 __ asm_assert(Assembler::bcondNotZero, "array_size must be > 0", 0x205);
2511
2512 __ z_bru(loop_entry);
2513
2514 __ bind(loop);
2515
2516 __ add2reg(frame_sizes_reg, wordSize);
2517 __ add2reg(pcs_reg, wordSize);
2518
2519 __ bind(loop_entry);
2520
2521 // Allocate a new frame, fill in the pc.
2522 push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2);
2523
2524 __ z_aghi(number_of_frames_reg, -1); // Emit AGHI, because it sets the condition code
2525 __ z_brne(loop);
2526
2527 // Set the top frame's return pc.
2528 __ add2reg(pcs_reg, wordSize);
2529 __ z_lg(Z_R0_scratch, 0, pcs_reg);
2530 __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP);
2531 BLOCK_COMMENT("} push_skeleton_frames");
2532 }
2533
2534 //------------------------------generate_deopt_blob----------------------------
2535 void SharedRuntime::generate_deopt_blob() {
2536 // Allocate space for the code.
2537 ResourceMark rm;
2538 // Setup code generation tools.
2539 const char* name = SharedRuntime::stub_name(StubId::shared_deopt_id);
2540 CodeBuffer buffer(name, 2048, 1024);
2541 InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2542 Label exec_mode_initialized;
2543 OopMap* map = nullptr;
2544 OopMapSet *oop_maps = new OopMapSet();
2545
2546 unsigned int start_off = __ offset();
2547 Label cont;
2548
2549 // --------------------------------------------------------------------------
2550 // Normal entry (non-exception case)
2551 //
2552 // We have been called from the deopt handler of the deoptee.
2553 // Z_R14 points to the entry point of the deopt handler.
2554 // The return_pc has been stored in the frame of the deoptee and
2555 // will replace the address of the deopt_handler in the call
2556 // to Deoptimization::fetch_unroll_info below.
2557
2558 const Register exec_mode_reg = Z_tmp_1;
2559
2560 // stack: (deoptee, caller of deoptee, ...)
2561
2562 // pushes an "unpack" frame
2563 // R14 contains the return address pointing into the deoptimized
2564 // nmethod that was valid just before the nmethod was deoptimized.
2565 // save R14 into the deoptee frame. the `fetch_unroll_info'
2566 // procedure called below will read it from there.
2567 map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2568
2569 // note the entry point.
2570 __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt);
2571 __ z_bru(exec_mode_initialized);
2572
2573 #ifndef COMPILER1
2574 int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap
2575 #else
2576 // --------------------------------------------------------------------------
2577 // Reexecute entry
2578 // - Z_R14 = Deopt Handler in nmethod
2579
2580 int reexecute_offset = __ offset() - start_off;
2581
2582 // No need to update map as each call to save_live_registers will produce identical oopmap
2583 (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2584
2585 __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute);
2586 __ z_bru(exec_mode_initialized);
2587 #endif
2588
2589
2590 // --------------------------------------------------------------------------
2591 // Exception entry. We reached here via a branch. Registers on entry:
2592 // - Z_EXC_OOP (Z_ARG1) = exception oop
2593 // - Z_EXC_PC (Z_ARG2) = the exception pc.
2594
2595 int exception_offset = __ offset() - start_off;
2596
2597 // all registers are dead at this entry point, except for Z_EXC_OOP, and
2598 // Z_EXC_PC which contain the exception oop and exception pc
2599 // respectively. Set them in TLS and fall thru to the
2600 // unpack_with_exception_in_tls entry point.
2601
2602 // Store exception oop and pc in thread (location known to GC).
2603 // Need this since the call to "fetch_unroll_info()" may safepoint.
2604 __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
2605 __ z_stg(Z_EXC_PC, Address(Z_thread, JavaThread::exception_pc_offset()));
2606
2607 // fall through
2608
2609 int exception_in_tls_offset = __ offset() - start_off;
2610
2611 // new implementation because exception oop is now passed in JavaThread
2612
2613 // Prolog for exception case
2614 // All registers must be preserved because they might be used by LinearScan
2615 // Exceptiop oop and throwing PC are passed in JavaThread
2616
2617 // load throwing pc from JavaThread and us it as the return address of the current frame.
2618 __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset()));
2619
2620 // Save everything in sight.
2621 (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch);
2622
2623 // Now it is safe to overwrite any register
2624
2625 // Clear the exception pc field in JavaThread
2626 __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8);
2627
2628 // Deopt during an exception. Save exec mode for unpack_frames.
2629 __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception);
2630
2631
2632 #ifdef ASSERT
2633 // verify that there is really an exception oop in JavaThread
2634 __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
2635 __ MacroAssembler::verify_oop(Z_ARG1, FILE_AND_LINE);
2636
2637 // verify that there is no pending exception
2638 __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread,
2639 "must not have pending exception here", __LINE__);
2640 #endif
2641
2642 // --------------------------------------------------------------------------
2643 // At this point, the live registers are saved and
2644 // the exec_mode_reg has been set up correctly.
2645 __ bind(exec_mode_initialized);
2646
2647 // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...).
2648
2649 const Register unroll_block_reg = Z_tmp_2;
2650
2651 // we need to set `last_Java_frame' because `fetch_unroll_info' will
2652 // call `last_Java_frame()'. however we can't block and no gc will
2653 // occur so we don't need an oopmap. the value of the pc in the
2654 // frame is not particularly important. it just needs to identify the blob.
2655
2656 // Don't set last_Java_pc anymore here (is implicitly null then).
2657 // the correct PC is retrieved in pd_last_frame() in that case.
2658 __ set_last_Java_frame(/*sp*/Z_SP, noreg);
2659 // With EscapeAnalysis turned on, this call may safepoint
2660 // despite it's marked as "leaf call"!
2661 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg);
2662 // Set an oopmap for the call site this describes all our saved volatile registers
2663 int oop_map_offs = __ offset();
2664 oop_maps->add_gc_map(oop_map_offs, map);
2665
2666 __ reset_last_Java_frame();
2667 // save the return value.
2668 __ z_lgr(unroll_block_reg, Z_RET);
2669 // restore the return registers that have been saved
2670 // (among other registers) by save_live_registers(...).
2671 RegisterSaver::restore_result_registers(masm);
2672
2673 // reload the exec mode from the UnrollBlock (it might have changed)
2674 __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset()));
2675
2676 // In excp_deopt_mode, restore and clear exception oop which we
2677 // stored in the thread during exception entry above. The exception
2678 // oop will be the return value of this stub.
2679 NearLabel skip_restore_excp;
2680 __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp);
2681 __ z_lg(Z_RET, thread_(exception_oop));
2682 __ clear_mem(thread_(exception_oop), 8);
2683 __ bind(skip_restore_excp);
2684
2685 // remove the "unpack" frame
2686 __ pop_frame();
2687
2688 // stack: (deoptee, caller of deoptee, ...).
2689
2690 // pop the deoptee's frame
2691 __ pop_frame();
2692
2693 // stack: (caller_of_deoptee, ...).
2694
2695 // loop through the `UnrollBlock' info and create interpreter frames.
2696 push_skeleton_frames(masm, true/*deopt*/,
2697 unroll_block_reg,
2698 Z_tmp_3,
2699 Z_tmp_4,
2700 Z_ARG5,
2701 Z_ARG4,
2702 Z_ARG3);
2703
2704 // stack: (skeletal interpreter frame, ..., optional skeletal
2705 // interpreter frame, caller of deoptee, ...).
2706
2707 // push an "unpack" frame taking care of float / int return values.
2708 __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers));
2709
2710 // stack: (unpack frame, skeletal interpreter frame, ..., optional
2711 // skeletal interpreter frame, caller of deoptee, ...).
2712
2713 // spill live volatile registers since we'll do a call.
2714 __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
2715 __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
2716
2717 // let the unpacker layout information in the skeletal frames just allocated.
2718 __ get_PC(Z_RET, oop_map_offs - __ offset());
2719 __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET);
2720 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
2721 Z_thread/*thread*/, exec_mode_reg/*exec_mode*/);
2722
2723 __ reset_last_Java_frame();
2724
2725 // restore the volatiles saved above.
2726 __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
2727 __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
2728
2729 // pop the "unpack" frame.
2730 __ pop_frame();
2731 __ restore_return_pc();
2732
2733 // stack: (top interpreter frame, ..., optional interpreter frame,
2734 // caller of deoptee, ...).
2735
2736 __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
2737 __ restore_bcp();
2738 __ restore_locals();
2739 __ restore_esp();
2740
2741 // return to the interpreter entry point.
2742 __ z_br(Z_R14);
2743
2744 // Make sure all code is generated
2745 masm->flush();
2746
2747 _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
2748 _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2749 }
2750
2751
2752 #ifdef COMPILER2
2753 //------------------------------generate_uncommon_trap_blob--------------------
2754 UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() {
2755 // Allocate space for the code
2756 ResourceMark rm;
2757 // Setup code generation tools
2758 const char* name = OptoRuntime::stub_name(StubId::c2_uncommon_trap_id);
2759 CodeBuffer buffer(name, 2048, 1024);
2760 if (buffer.blob() == nullptr) {
2761 return nullptr;
2762 }
2763 InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2764
2765 Register unroll_block_reg = Z_tmp_1;
2766 Register klass_index_reg = Z_ARG2;
2767 Register unc_trap_reg = Z_ARG2;
2768
2769 // stack: (deoptee, caller_of_deoptee, ...).
2770
2771 // push a dummy "unpack" frame and call
2772 // `Deoptimization::uncommon_trap' to pack the compiled frame into a
2773 // vframe array and return the `UnrollBlock' information.
2774
2775 // save R14 to compiled frame.
2776 __ save_return_pc();
2777 // push the "unpack_frame".
2778 __ push_frame_abi160(0);
2779
2780 // stack: (unpack frame, deoptee, caller_of_deoptee, ...).
2781
2782 // set the "unpack" frame as last_Java_frame.
2783 // `Deoptimization::uncommon_trap' expects it and considers its
2784 // sender frame as the deoptee frame.
2785 __ get_PC(Z_R1_scratch);
2786 __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
2787
2788 __ z_lgr(klass_index_reg, Z_ARG1); // passed implicitly as ARG2
2789 __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap); // passed implicitly as ARG3
2790 BLOCK_COMMENT("call Deoptimization::uncommon_trap()");
2791 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread);
2792
2793 __ reset_last_Java_frame();
2794
2795 // pop the "unpack" frame
2796 __ pop_frame();
2797
2798 // stack: (deoptee, caller_of_deoptee, ...).
2799
2800 // save the return value.
2801 __ z_lgr(unroll_block_reg, Z_RET);
2802
2803 // pop the deoptee frame.
2804 __ pop_frame();
2805
2806 // stack: (caller_of_deoptee, ...).
2807
2808 #ifdef ASSERT
2809 assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates");
2810 assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates");
2811 const int unpack_kind_byte_offset = in_bytes(Deoptimization::UnrollBlock::unpack_kind_offset())
2812 #ifndef VM_LITTLE_ENDIAN
2813 + 3
2814 #endif
2815 ;
2816 if (Displacement::is_shortDisp(unpack_kind_byte_offset)) {
2817 __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
2818 } else {
2819 __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
2820 }
2821 __ asm_assert(Assembler::bcondEqual, "OptoRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
2822 #endif
2823
2824 __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1);
2825
2826 // allocate new interpreter frame(s) and possibly resize the caller's frame
2827 // (no more adapters !)
2828 push_skeleton_frames(masm, false/*deopt*/,
2829 unroll_block_reg,
2830 Z_tmp_2,
2831 Z_tmp_3,
2832 Z_tmp_4,
2833 Z_ARG5,
2834 Z_ARG4);
2835
2836 // stack: (skeletal interpreter frame, ..., optional skeletal
2837 // interpreter frame, (resized) caller of deoptee, ...).
2838
2839 // push a dummy "unpack" frame taking care of float return values.
2840 // call `Deoptimization::unpack_frames' to layout information in the
2841 // interpreter frames just created
2842
2843 // push the "unpack" frame
2844 const unsigned int framesize_in_bytes = __ push_frame_abi160(0);
2845
2846 // stack: (unpack frame, skeletal interpreter frame, ..., optional
2847 // skeletal interpreter frame, (resized) caller of deoptee, ...).
2848
2849 // set the "unpack" frame as last_Java_frame
2850 __ get_PC(Z_R1_scratch);
2851 __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
2852
2853 // indicate it is the uncommon trap case
2854 BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()");
2855 __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
2856 // let the unpacker layout information in the skeletal frames just allocated.
2857 __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread);
2858
2859 __ reset_last_Java_frame();
2860 // pop the "unpack" frame
2861 __ pop_frame();
2862 // restore LR from top interpreter frame
2863 __ restore_return_pc();
2864
2865 // stack: (top interpreter frame, ..., optional interpreter frame,
2866 // (resized) caller of deoptee, ...).
2867
2868 __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
2869 __ restore_bcp();
2870 __ restore_locals();
2871 __ restore_esp();
2872
2873 // return to the interpreter entry point
2874 __ z_br(Z_R14);
2875
2876 masm->flush();
2877 return UncommonTrapBlob::create(&buffer, nullptr, framesize_in_bytes/wordSize);
2878 }
2879 #endif // COMPILER2
2880
2881
2882 //------------------------------generate_handler_blob------
2883 //
2884 // Generate a special Compile2Runtime blob that saves all registers,
2885 // and setup oopmap.
2886 SafepointBlob* SharedRuntime::generate_handler_blob(StubId id, address call_ptr) {
2887 assert(StubRoutines::forward_exception_entry() != nullptr,
2888 "must be generated before");
2889 assert(is_polling_page_id(id), "expected a polling page stub id");
2890
2891 ResourceMark rm;
2892 OopMapSet *oop_maps = new OopMapSet();
2893 OopMap* map;
2894
2895 // Allocate space for the code. Setup code generation tools.
2896 const char* name = SharedRuntime::stub_name(id);
2897 CodeBuffer buffer(name, 2048, 1024);
2898 MacroAssembler* masm = new MacroAssembler(&buffer);
2899
2900 unsigned int start_off = __ offset();
2901 address call_pc = nullptr;
2902 int frame_size_in_bytes;
2903
2904 bool cause_return = (id == StubId::shared_polling_page_return_handler_id);
2905 // Make room for return address (or push it again)
2906 if (!cause_return) {
2907 __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
2908 }
2909
2910 bool save_vectors = (id == StubId::shared_polling_page_vectors_safepoint_handler_id);
2911 // Save registers, fpu state, and flags
2912 map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R14, save_vectors);
2913
2914 if (!cause_return) {
2915 // Keep a copy of the return pc to detect if it gets modified.
2916 __ z_lgr(Z_R6, Z_R14);
2917 }
2918
2919 // The following is basically a call_VM. However, we need the precise
2920 // address of the call in order to generate an oopmap. Hence, we do all the
2921 // work ourselves.
2922 __ set_last_Java_frame(Z_SP, noreg);
2923
2924 // call into the runtime to handle the safepoint poll
2925 __ call_VM_leaf(call_ptr, Z_thread);
2926
2927
2928 // Set an oopmap for the call site. This oopmap will map all
2929 // oop-registers and debug-info registers as callee-saved. This
2930 // will allow deoptimization at this safepoint to find all possible
2931 // debug-info recordings, as well as let GC find all oops.
2932
2933 oop_maps->add_gc_map((int)(__ offset()-start_off), map);
2934
2935 Label noException;
2936
2937 __ reset_last_Java_frame();
2938
2939 __ load_and_test_long(Z_R1, thread_(pending_exception));
2940 __ z_bre(noException);
2941
2942 // Pending exception case, used (sporadically) by
2943 // api/java_lang/Thread.State/index#ThreadState et al.
2944 RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers, save_vectors);
2945
2946 // Jump to forward_exception_entry, with the issuing PC in Z_R14
2947 // so it looks like the original nmethod called forward_exception_entry.
2948 __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2949 __ z_br(Z_R1_scratch);
2950
2951 // No exception case
2952 __ bind(noException);
2953
2954 if (!cause_return) {
2955 Label no_adjust;
2956 // If our stashed return pc was modified by the runtime we avoid touching it
2957 const int offset_of_return_pc = _z_common_abi(return_pc) + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers, save_vectors);
2958 __ z_cg(Z_R6, offset_of_return_pc, Z_SP);
2959 __ z_brne(no_adjust);
2960
2961 // Adjust return pc forward to step over the safepoint poll instruction
2962 __ instr_size(Z_R1_scratch, Z_R6);
2963 __ z_agr(Z_R6, Z_R1_scratch);
2964 __ z_stg(Z_R6, offset_of_return_pc, Z_SP);
2965
2966 __ bind(no_adjust);
2967 }
2968
2969 // Normal exit, restore registers and exit.
2970 RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers, save_vectors);
2971
2972 __ z_br(Z_R14);
2973
2974 // Make sure all code is generated
2975 masm->flush();
2976
2977 // Fill-out other meta info
2978 return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers, save_vectors)/wordSize);
2979 }
2980
2981
2982 //
2983 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
2984 //
2985 // Generate a stub that calls into vm to find out the proper destination
2986 // of a Java call. All the argument registers are live at this point
2987 // but since this is generic code we don't know what they are and the caller
2988 // must do any gc of the args.
2989 //
2990 RuntimeStub* SharedRuntime::generate_resolve_blob(StubId id, address destination) {
2991 assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before");
2992 assert(is_resolve_id(id), "expected a resolve stub id");
2993
2994 // allocate space for the code
2995 ResourceMark rm;
2996
2997 const char* name = SharedRuntime::stub_name(id);
2998 CodeBuffer buffer(name, 1000, 512);
2999 MacroAssembler* masm = new MacroAssembler(&buffer);
3000
3001 OopMapSet *oop_maps = new OopMapSet();
3002 OopMap* map = nullptr;
3003
3004 unsigned int start_off = __ offset();
3005
3006 map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3007
3008 // We must save a PC from within the stub as return PC
3009 // C code doesn't store the LR where we expect the PC,
3010 // so we would run into trouble upon stack walking.
3011 __ get_PC(Z_R1_scratch);
3012
3013 unsigned int frame_complete = __ offset();
3014
3015 __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch);
3016
3017 __ call_VM_leaf(destination, Z_thread, Z_method);
3018
3019
3020 // Set an oopmap for the call site.
3021 // We need this not only for callee-saved registers, but also for volatile
3022 // registers that the compiler might be keeping live across a safepoint.
3023
3024 oop_maps->add_gc_map((int)(frame_complete-start_off), map);
3025
3026 // clear last_Java_sp
3027 __ reset_last_Java_frame();
3028
3029 // check for pending exceptions
3030 Label pending;
3031 __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
3032 __ z_brne(pending);
3033
3034 __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation.
3035 RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3036
3037 // get the returned method
3038 __ get_vm_result_metadata(Z_method);
3039
3040 // We are back to the original state on entry and ready to go.
3041 __ z_br(Z_R1_scratch);
3042
3043 // Pending exception after the safepoint
3044
3045 __ bind(pending);
3046
3047 RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3048
3049 // exception pending => remove activation and forward to exception handler
3050
3051 __ z_lgr(Z_R2, Z_R0); // pending_exception
3052 __ clear_mem(Address(Z_thread, JavaThread::vm_result_oop_offset()), sizeof(jlong));
3053 __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3054 __ z_br(Z_R1_scratch);
3055
3056 // -------------
3057 // make sure all code is generated
3058 masm->flush();
3059
3060 // return the blob
3061 // frame_size_words or bytes??
3062 return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize,
3063 oop_maps, true);
3064
3065 }
3066
3067 // Continuation point for throwing of implicit exceptions that are
3068 // not handled in the current activation. Fabricates an exception
3069 // oop and initiates normal exception dispatching in this
3070 // frame. Only callee-saved registers are preserved (through the
3071 // normal RegisterMap handling). If the compiler
3072 // needs all registers to be preserved between the fault point and
3073 // the exception handler then it must assume responsibility for that
3074 // in AbstractCompiler::continuation_for_implicit_null_exception or
3075 // continuation_for_implicit_division_by_zero_exception. All other
3076 // implicit exceptions (e.g., NullPointerException or
3077 // AbstractMethodError on entry) are either at call sites or
3078 // otherwise assume that stack unwinding will be initiated, so
3079 // caller saved registers were assumed volatile in the compiler.
3080
3081 // Note that we generate only this stub into a RuntimeStub, because
3082 // it needs to be properly traversed and ignored during GC, so we
3083 // change the meaning of the "__" macro within this method.
3084
3085 // Note: the routine set_pc_not_at_call_for_caller in
3086 // SharedRuntime.cpp requires that this code be generated into a
3087 // RuntimeStub.
3088
3089 RuntimeStub* SharedRuntime::generate_throw_exception(StubId id, address runtime_entry) {
3090 assert(is_throw_id(id), "expected a throw stub id");
3091
3092 const char* name = SharedRuntime::stub_name(id);
3093
3094 int insts_size = 256;
3095 int locs_size = 0;
3096
3097 ResourceMark rm;
3098 const char* timer_msg = "SharedRuntime generate_throw_exception";
3099 TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime));
3100
3101 CodeBuffer code(name, insts_size, locs_size);
3102 MacroAssembler* masm = new MacroAssembler(&code);
3103 int framesize_in_bytes;
3104 address start = __ pc();
3105
3106 __ save_return_pc();
3107 framesize_in_bytes = __ push_frame_abi160(0);
3108
3109 address frame_complete_pc = __ pc();
3110
3111 // Note that we always have a runtime stub frame on the top of stack at this point.
3112 __ get_PC(Z_R1);
3113 __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1);
3114
3115 // Do the call.
3116 BLOCK_COMMENT("call runtime_entry");
3117 __ call_VM_leaf(runtime_entry, Z_thread);
3118
3119 __ reset_last_Java_frame();
3120
3121 #ifdef ASSERT
3122 // Make sure that this code is only executed if there is a pending exception.
3123 { Label L;
3124 __ z_lg(Z_R0,
3125 in_bytes(Thread::pending_exception_offset()),
3126 Z_thread);
3127 __ z_ltgr(Z_R0, Z_R0);
3128 __ z_brne(L);
3129 __ stop("SharedRuntime::throw_exception: no pending exception");
3130 __ bind(L);
3131 }
3132 #endif
3133
3134 __ pop_frame();
3135 __ restore_return_pc();
3136
3137 __ load_const_optimized(Z_R1, StubRoutines::forward_exception_entry());
3138 __ z_br(Z_R1);
3139
3140 RuntimeStub* stub =
3141 RuntimeStub::new_runtime_stub(name, &code,
3142 frame_complete_pc - start,
3143 framesize_in_bytes/wordSize,
3144 nullptr /*oop_maps*/, false);
3145
3146 return stub;
3147 }
3148
3149 //------------------------------Montgomery multiplication------------------------
3150 //
3151
3152 // Subtract 0:b from carry:a. Return carry.
3153 static unsigned long
3154 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3155 unsigned long i, c = 8 * (unsigned long)(len - 1);
3156 __asm__ __volatile__ (
3157 "SLGR %[i], %[i] \n" // initialize to 0 and pre-set carry
3158 "LGHI 0, 8 \n" // index increment (for BRXLG)
3159 "LGR 1, %[c] \n" // index limit (for BRXLG)
3160 "0: \n"
3161 "LG %[c], 0(%[i],%[a]) \n"
3162 "SLBG %[c], 0(%[i],%[b]) \n" // subtract with borrow
3163 "STG %[c], 0(%[i],%[a]) \n"
3164 "BRXLG %[i], 0, 0b \n" // while ((i+=8)<limit);
3165 "SLBGR %[c], %[c] \n" // save carry - 1
3166 : [i]"=&a"(i), [c]"+r"(c)
3167 : [a]"a"(a), [b]"a"(b)
3168 : "cc", "memory", "r0", "r1"
3169 );
3170 return carry + c;
3171 }
3172
3173 // Multiply (unsigned) Long A by Long B, accumulating the double-
3174 // length result into the accumulator formed of T0, T1, and T2.
3175 inline void MACC(unsigned long A[], long A_ind,
3176 unsigned long B[], long B_ind,
3177 unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3178 long A_si = 8 * A_ind,
3179 B_si = 8 * B_ind;
3180 __asm__ __volatile__ (
3181 "LG 1, 0(%[A_si],%[A]) \n"
3182 "MLG 0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3183 "ALGR %[T0], 1 \n"
3184 "LGHI 1, 0 \n" // r1 = 0
3185 "ALCGR %[T1], 0 \n"
3186 "ALCGR %[T2], 1 \n"
3187 : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3188 : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si)
3189 : "cc", "r0", "r1"
3190 );
3191 }
3192
3193 // As above, but add twice the double-length result into the
3194 // accumulator.
3195 inline void MACC2(unsigned long A[], long A_ind,
3196 unsigned long B[], long B_ind,
3197 unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3198 const unsigned long zero = 0;
3199 long A_si = 8 * A_ind,
3200 B_si = 8 * B_ind;
3201 __asm__ __volatile__ (
3202 "LG 1, 0(%[A_si],%[A]) \n"
3203 "MLG 0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3204 "ALGR %[T0], 1 \n"
3205 "ALCGR %[T1], 0 \n"
3206 "ALCGR %[T2], %[zero] \n"
3207 "ALGR %[T0], 1 \n"
3208 "ALCGR %[T1], 0 \n"
3209 "ALCGR %[T2], %[zero] \n"
3210 : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3211 : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si), [zero]"r"(zero)
3212 : "cc", "r0", "r1"
3213 );
3214 }
3215
3216 // Fast Montgomery multiplication. The derivation of the algorithm is
3217 // in "A Cryptographic Library for the Motorola DSP56000,
3218 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3219 static void
3220 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3221 unsigned long m[], unsigned long inv, int len) {
3222 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3223 int i;
3224
3225 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3226
3227 for (i = 0; i < len; i++) {
3228 int j;
3229 for (j = 0; j < i; j++) {
3230 MACC(a, j, b, i-j, t0, t1, t2);
3231 MACC(m, j, n, i-j, t0, t1, t2);
3232 }
3233 MACC(a, i, b, 0, t0, t1, t2);
3234 m[i] = t0 * inv;
3235 MACC(m, i, n, 0, t0, t1, t2);
3236
3237 assert(t0 == 0, "broken Montgomery multiply");
3238
3239 t0 = t1; t1 = t2; t2 = 0;
3240 }
3241
3242 for (i = len; i < 2 * len; i++) {
3243 int j;
3244 for (j = i - len + 1; j < len; j++) {
3245 MACC(a, j, b, i-j, t0, t1, t2);
3246 MACC(m, j, n, i-j, t0, t1, t2);
3247 }
3248 m[i-len] = t0;
3249 t0 = t1; t1 = t2; t2 = 0;
3250 }
3251
3252 while (t0) {
3253 t0 = sub(m, n, t0, len);
3254 }
3255 }
3256
3257 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3258 // multiplies so it should be up to 25% faster than Montgomery
3259 // multiplication. However, its loop control is more complex and it
3260 // may actually run slower on some machines.
3261 static void
3262 montgomery_square(unsigned long a[], unsigned long n[],
3263 unsigned long m[], unsigned long inv, int len) {
3264 unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3265 int i;
3266
3267 assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3268
3269 for (i = 0; i < len; i++) {
3270 int j;
3271 int end = (i+1)/2;
3272 for (j = 0; j < end; j++) {
3273 MACC2(a, j, a, i-j, t0, t1, t2);
3274 MACC(m, j, n, i-j, t0, t1, t2);
3275 }
3276 if ((i & 1) == 0) {
3277 MACC(a, j, a, j, t0, t1, t2);
3278 }
3279 for (; j < i; j++) {
3280 MACC(m, j, n, i-j, t0, t1, t2);
3281 }
3282 m[i] = t0 * inv;
3283 MACC(m, i, n, 0, t0, t1, t2);
3284
3285 assert(t0 == 0, "broken Montgomery square");
3286
3287 t0 = t1; t1 = t2; t2 = 0;
3288 }
3289
3290 for (i = len; i < 2*len; i++) {
3291 int start = i-len+1;
3292 int end = start + (len - start)/2;
3293 int j;
3294 for (j = start; j < end; j++) {
3295 MACC2(a, j, a, i-j, t0, t1, t2);
3296 MACC(m, j, n, i-j, t0, t1, t2);
3297 }
3298 if ((i & 1) == 0) {
3299 MACC(a, j, a, j, t0, t1, t2);
3300 }
3301 for (; j < len; j++) {
3302 MACC(m, j, n, i-j, t0, t1, t2);
3303 }
3304 m[i-len] = t0;
3305 t0 = t1; t1 = t2; t2 = 0;
3306 }
3307
3308 while (t0) {
3309 t0 = sub(m, n, t0, len);
3310 }
3311 }
3312
3313 // The threshold at which squaring is advantageous was determined
3314 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3315 // Value seems to be ok for other platforms, too.
3316 #define MONTGOMERY_SQUARING_THRESHOLD 64
3317
3318 // Copy len longwords from s to d, word-swapping as we go. The
3319 // destination array is reversed.
3320 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3321 d += len;
3322 while(len-- > 0) {
3323 d--;
3324 unsigned long s_val = *s;
3325 // Swap words in a longword on little endian machines.
3326 #ifdef VM_LITTLE_ENDIAN
3327 Unimplemented();
3328 #endif
3329 *d = s_val;
3330 s++;
3331 }
3332 }
3333
3334 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3335 jint len, jlong inv,
3336 jint *m_ints) {
3337 len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3338 assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3339 int longwords = len/2;
3340
3341 // Make very sure we don't use so much space that the stack might
3342 // overflow. 512 jints corresponds to an 16384-bit integer and
3343 // will use here a total of 8k bytes of stack space.
3344 int divisor = sizeof(unsigned long) * 4;
3345 guarantee(longwords <= 8192 / divisor, "must be");
3346 int total_allocation = longwords * sizeof (unsigned long) * 4;
3347 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3348
3349 // Local scratch arrays
3350 unsigned long
3351 *a = scratch + 0 * longwords,
3352 *b = scratch + 1 * longwords,
3353 *n = scratch + 2 * longwords,
3354 *m = scratch + 3 * longwords;
3355
3356 reverse_words((unsigned long *)a_ints, a, longwords);
3357 reverse_words((unsigned long *)b_ints, b, longwords);
3358 reverse_words((unsigned long *)n_ints, n, longwords);
3359
3360 ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3361
3362 reverse_words(m, (unsigned long *)m_ints, longwords);
3363 }
3364
3365 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3366 jint len, jlong inv,
3367 jint *m_ints) {
3368 len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3369 assert(len % 2 == 0, "array length in montgomery_square must be even");
3370 int longwords = len/2;
3371
3372 // Make very sure we don't use so much space that the stack might
3373 // overflow. 512 jints corresponds to an 16384-bit integer and
3374 // will use here a total of 6k bytes of stack space.
3375 int divisor = sizeof(unsigned long) * 3;
3376 guarantee(longwords <= (8192 / divisor), "must be");
3377 int total_allocation = longwords * sizeof (unsigned long) * 3;
3378 unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3379
3380 // Local scratch arrays
3381 unsigned long
3382 *a = scratch + 0 * longwords,
3383 *n = scratch + 1 * longwords,
3384 *m = scratch + 2 * longwords;
3385
3386 reverse_words((unsigned long *)a_ints, a, longwords);
3387 reverse_words((unsigned long *)n_ints, n, longwords);
3388
3389 if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3390 ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3391 } else {
3392 ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3393 }
3394
3395 reverse_words(m, (unsigned long *)m_ints, longwords);
3396 }
3397
3398 extern "C"
3399 int SpinPause() {
3400 return 0;
3401 }
3402
3403 #if INCLUDE_JFR
3404 RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() {
3405 if (!Continuations::enabled()) return nullptr;
3406 Unimplemented();
3407 return nullptr;
3408 }
3409
3410 RuntimeStub* SharedRuntime::generate_jfr_return_lease() {
3411 if (!Continuations::enabled()) return nullptr;
3412 Unimplemented();
3413 return nullptr;
3414 }
3415
3416 #endif // INCLUDE_JFR
3417
3418 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j;
3419 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;
3420
3421 int SharedRuntime::java_return_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed) {
3422 Unimplemented();
3423 return 0;
3424 }
3425
3426 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) {
3427 Unimplemented();
3428 return nullptr;
3429 }