1 /* 2 * Copyright (c) 2016, 2025, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2016, 2024 SAP SE. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "asm/macroAssembler.inline.hpp" 27 #include "registerSaver_s390.hpp" 28 #include "gc/shared/barrierSet.hpp" 29 #include "gc/shared/barrierSetAssembler.hpp" 30 #include "gc/shared/barrierSetNMethod.hpp" 31 #include "interpreter/interpreter.hpp" 32 #include "interpreter/interp_masm.hpp" 33 #include "memory/universe.hpp" 34 #include "nativeInst_s390.hpp" 35 #include "oops/instanceOop.hpp" 36 #include "oops/objArrayKlass.hpp" 37 #include "oops/oop.inline.hpp" 38 #include "prims/methodHandles.hpp" 39 #include "prims/upcallLinker.hpp" 40 #include "runtime/frame.inline.hpp" 41 #include "runtime/handles.inline.hpp" 42 #include "runtime/javaThread.hpp" 43 #include "runtime/sharedRuntime.hpp" 44 #include "runtime/stubCodeGenerator.hpp" 45 #include "runtime/stubRoutines.hpp" 46 #include "utilities/formatBuffer.hpp" 47 #include "utilities/macros.hpp" 48 #include "utilities/powerOfTwo.hpp" 49 50 // Declaration and definition of StubGenerator (no .hpp file). 51 // For a more detailed description of the stub routine structure 52 // see the comment in stubRoutines.hpp. 53 54 #ifdef PRODUCT 55 #define __ _masm-> 56 #else 57 #define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)-> 58 #endif 59 60 #define BLOCK_COMMENT(str) if (PrintAssembly || PrintStubCode) __ block_comment(str) 61 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":") 62 63 64 // These static, partially const, variables are for the AES intrinsics. 65 // They are declared/initialized here to make them available across function bodies. 66 67 static const int AES_parmBlk_align = 32; // octoword alignment. 68 static const int AES_stackSpace_incr = AES_parmBlk_align; // add'l stack space is allocated in such increments. 69 // Must be multiple of AES_parmBlk_align. 70 71 static int AES_ctrVal_len = 0; // ctr init value len (in bytes), expected: length of dataBlk (16) 72 static int AES_ctrVec_len = 0; // # of ctr vector elements. That many block can be ciphered with one instruction execution 73 static int AES_ctrArea_len = 0; // reserved stack space (in bytes) for ctr (= ctrVal_len * ctrVec_len) 74 75 static int AES_parmBlk_addspace = 0; // Must be multiple of AES_parmblk_align. 76 // Will be set by stub generator to stub specific value. 77 static int AES_dataBlk_space = 0; // Must be multiple of AES_parmblk_align. 78 // Will be set by stub generator to stub specific value. 79 static int AES_dataBlk_offset = 0; // offset of the local src and dst dataBlk buffers 80 // Will be set by stub generator to stub specific value. 81 82 // These offsets are relative to the parameter block address (Register parmBlk = Z_R1) 83 static const int keylen_offset = -1; 84 static const int fCode_offset = -2; 85 static const int ctrVal_len_offset = -4; 86 static const int msglen_offset = -8; 87 static const int unextSP_offset = -16; 88 static const int rem_msgblk_offset = -20; 89 static const int argsave_offset = -2*AES_parmBlk_align; 90 static const int regsave_offset = -4*AES_parmBlk_align; // save space for work regs (Z_R10..13) 91 static const int msglen_red_offset = regsave_offset + AES_parmBlk_align; // reduced len after preLoop; 92 static const int counter_offset = msglen_red_offset+8; // current counter vector position. 93 static const int localSpill_offset = argsave_offset + 24; // arg2..arg4 are saved 94 95 96 // ----------------------------------------------------------------------- 97 // Stub Code definitions 98 99 class StubGenerator: public StubCodeGenerator { 100 private: 101 102 //---------------------------------------------------------------------- 103 // Call stubs are used to call Java from C. 104 105 // 106 // Arguments: 107 // 108 // R2 - call wrapper address : address 109 // R3 - result : intptr_t* 110 // R4 - result type : BasicType 111 // R5 - method : method 112 // R6 - frame mgr entry point : address 113 // [SP+160] - parameter block : intptr_t* 114 // [SP+172] - parameter count in words : int 115 // [SP+176] - thread : Thread* 116 // 117 address generate_call_stub(address& return_address) { 118 // Set up a new C frame, copy Java arguments, call frame manager 119 // or native_entry, and process result. 120 121 StubGenStubId stub_id = StubGenStubId::call_stub_id; 122 StubCodeMark mark(this, stub_id); 123 address start = __ pc(); 124 125 Register r_arg_call_wrapper_addr = Z_ARG1; 126 Register r_arg_result_addr = Z_ARG2; 127 Register r_arg_result_type = Z_ARG3; 128 Register r_arg_method = Z_ARG4; 129 Register r_arg_entry = Z_ARG5; 130 131 // offsets to fp 132 #define d_arg_thread 176 133 #define d_arg_argument_addr 160 134 #define d_arg_argument_count 168+4 135 136 Register r_entryframe_fp = Z_tmp_1; 137 Register r_top_of_arguments_addr = Z_ARG4; 138 Register r_new_arg_entry = Z_R14; 139 140 // macros for frame offsets 141 #define call_wrapper_address_offset \ 142 _z_entry_frame_locals_neg(call_wrapper_address) 143 #define result_address_offset \ 144 _z_entry_frame_locals_neg(result_address) 145 #define result_type_offset \ 146 _z_entry_frame_locals_neg(result_type) 147 #define arguments_tos_address_offset \ 148 _z_entry_frame_locals_neg(arguments_tos_address) 149 150 { 151 // 152 // STACK on entry to call_stub: 153 // 154 // F1 [C_FRAME] 155 // ... 156 // 157 158 Register r_argument_addr = Z_tmp_3; 159 Register r_argumentcopy_addr = Z_tmp_4; 160 Register r_argument_size_in_bytes = Z_ARG5; 161 Register r_frame_size = Z_R1; 162 163 Label arguments_copied; 164 165 // Save non-volatile registers to ABI of caller frame. 166 BLOCK_COMMENT("save registers, push frame {"); 167 __ z_stmg(Z_R6, Z_R14, 16, Z_SP); 168 __ z_std(Z_F8, 96, Z_SP); 169 __ z_std(Z_F9, 104, Z_SP); 170 __ z_std(Z_F10, 112, Z_SP); 171 __ z_std(Z_F11, 120, Z_SP); 172 __ z_std(Z_F12, 128, Z_SP); 173 __ z_std(Z_F13, 136, Z_SP); 174 __ z_std(Z_F14, 144, Z_SP); 175 __ z_std(Z_F15, 152, Z_SP); 176 177 // 178 // Push ENTRY_FRAME including arguments: 179 // 180 // F0 [TOP_IJAVA_FRAME_ABI] 181 // [outgoing Java arguments] 182 // [ENTRY_FRAME_LOCALS] 183 // F1 [C_FRAME] 184 // ... 185 // 186 187 // Calculate new frame size and push frame. 188 #define abi_plus_locals_size \ 189 (frame::z_top_ijava_frame_abi_size + frame::z_entry_frame_locals_size) 190 if (abi_plus_locals_size % BytesPerWord == 0) { 191 // Preload constant part of frame size. 192 __ load_const_optimized(r_frame_size, -abi_plus_locals_size/BytesPerWord); 193 // Keep copy of our frame pointer (caller's SP). 194 __ z_lgr(r_entryframe_fp, Z_SP); 195 // Add space required by arguments to frame size. 196 __ z_slgf(r_frame_size, d_arg_argument_count, Z_R0, Z_SP); 197 // Move Z_ARG5 early, it will be used as a local. 198 __ z_lgr(r_new_arg_entry, r_arg_entry); 199 // Convert frame size from words to bytes. 200 __ z_sllg(r_frame_size, r_frame_size, LogBytesPerWord); 201 __ push_frame(r_frame_size, r_entryframe_fp, 202 false/*don't copy SP*/, true /*frame size sign inverted*/); 203 } else { 204 guarantee(false, "frame sizes should be multiples of word size (BytesPerWord)"); 205 } 206 BLOCK_COMMENT("} save, push"); 207 208 // Load argument registers for call. 209 BLOCK_COMMENT("prepare/copy arguments {"); 210 __ z_lgr(Z_method, r_arg_method); 211 __ z_lg(Z_thread, d_arg_thread, r_entryframe_fp); 212 213 // Calculate top_of_arguments_addr which will be tos (not prepushed) later. 214 // Wimply use SP + frame::top_ijava_frame_size. 215 __ add2reg(r_top_of_arguments_addr, 216 frame::z_top_ijava_frame_abi_size - BytesPerWord, Z_SP); 217 218 // Initialize call_stub locals (step 1). 219 if ((call_wrapper_address_offset + BytesPerWord == result_address_offset) && 220 (result_address_offset + BytesPerWord == result_type_offset) && 221 (result_type_offset + BytesPerWord == arguments_tos_address_offset)) { 222 223 __ z_stmg(r_arg_call_wrapper_addr, r_top_of_arguments_addr, 224 call_wrapper_address_offset, r_entryframe_fp); 225 } else { 226 __ z_stg(r_arg_call_wrapper_addr, 227 call_wrapper_address_offset, r_entryframe_fp); 228 __ z_stg(r_arg_result_addr, 229 result_address_offset, r_entryframe_fp); 230 __ z_stg(r_arg_result_type, 231 result_type_offset, r_entryframe_fp); 232 __ z_stg(r_top_of_arguments_addr, 233 arguments_tos_address_offset, r_entryframe_fp); 234 } 235 236 // Copy Java arguments. 237 238 // Any arguments to copy? 239 __ load_and_test_int2long(Z_R1, Address(r_entryframe_fp, d_arg_argument_count)); 240 __ z_bre(arguments_copied); 241 242 // Prepare loop and copy arguments in reverse order. 243 { 244 // Calculate argument size in bytes. 245 __ z_sllg(r_argument_size_in_bytes, Z_R1, LogBytesPerWord); 246 247 // Get addr of first incoming Java argument. 248 __ z_lg(r_argument_addr, d_arg_argument_addr, r_entryframe_fp); 249 250 // Let r_argumentcopy_addr point to last outgoing Java argument. 251 __ add2reg(r_argumentcopy_addr, BytesPerWord, r_top_of_arguments_addr); // = Z_SP+160 effectively. 252 253 // Let r_argument_addr point to last incoming Java argument. 254 __ add2reg_with_index(r_argument_addr, -BytesPerWord, 255 r_argument_size_in_bytes, r_argument_addr); 256 257 // Now loop while Z_R1 > 0 and copy arguments. 258 { 259 Label next_argument; 260 __ bind(next_argument); 261 // Mem-mem move. 262 __ z_mvc(0, BytesPerWord-1, r_argumentcopy_addr, 0, r_argument_addr); 263 __ add2reg(r_argument_addr, -BytesPerWord); 264 __ add2reg(r_argumentcopy_addr, BytesPerWord); 265 __ z_brct(Z_R1, next_argument); 266 } 267 } // End of argument copy loop. 268 269 __ bind(arguments_copied); 270 } 271 BLOCK_COMMENT("} arguments"); 272 273 BLOCK_COMMENT("call {"); 274 { 275 // Call frame manager or native entry. 276 277 // 278 // Register state on entry to frame manager / native entry: 279 // 280 // Z_ARG1 = r_top_of_arguments_addr - intptr_t *sender tos (prepushed) 281 // Lesp = (SP) + copied_arguments_offset - 8 282 // Z_method - method 283 // Z_thread - JavaThread* 284 // 285 286 // Here, the usual SP is the initial_caller_sp. 287 __ z_lgr(Z_R10, Z_SP); 288 289 // Z_esp points to the slot below the last argument. 290 __ z_lgr(Z_esp, r_top_of_arguments_addr); 291 292 // 293 // Stack on entry to frame manager / native entry: 294 // 295 // F0 [TOP_IJAVA_FRAME_ABI] 296 // [outgoing Java arguments] 297 // [ENTRY_FRAME_LOCALS] 298 // F1 [C_FRAME] 299 // ... 300 // 301 302 // Do a light-weight C-call here, r_new_arg_entry holds the address 303 // of the interpreter entry point (frame manager or native entry) 304 // and save runtime-value of return_pc in return_address 305 // (call by reference argument). 306 return_address = __ call_stub(r_new_arg_entry); 307 } 308 BLOCK_COMMENT("} call"); 309 310 { 311 BLOCK_COMMENT("restore registers {"); 312 // Returned from frame manager or native entry. 313 // Now pop frame, process result, and return to caller. 314 315 // 316 // Stack on exit from frame manager / native entry: 317 // 318 // F0 [ABI] 319 // ... 320 // [ENTRY_FRAME_LOCALS] 321 // F1 [C_FRAME] 322 // ... 323 // 324 // Just pop the topmost frame ... 325 // 326 327 // Restore frame pointer. 328 __ z_lg(r_entryframe_fp, _z_abi(callers_sp), Z_SP); 329 // Pop frame. Done here to minimize stalls. 330 __ pop_frame(); 331 332 // Reload some volatile registers which we've spilled before the call 333 // to frame manager / native entry. 334 // Access all locals via frame pointer, because we know nothing about 335 // the topmost frame's size. 336 __ z_lg(r_arg_result_addr, result_address_offset, r_entryframe_fp); 337 __ z_lg(r_arg_result_type, result_type_offset, r_entryframe_fp); 338 339 // Restore non-volatiles. 340 __ z_lmg(Z_R6, Z_R14, 16, Z_SP); 341 __ z_ld(Z_F8, 96, Z_SP); 342 __ z_ld(Z_F9, 104, Z_SP); 343 __ z_ld(Z_F10, 112, Z_SP); 344 __ z_ld(Z_F11, 120, Z_SP); 345 __ z_ld(Z_F12, 128, Z_SP); 346 __ z_ld(Z_F13, 136, Z_SP); 347 __ z_ld(Z_F14, 144, Z_SP); 348 __ z_ld(Z_F15, 152, Z_SP); 349 BLOCK_COMMENT("} restore"); 350 351 // 352 // Stack on exit from call_stub: 353 // 354 // 0 [C_FRAME] 355 // ... 356 // 357 // No call_stub frames left. 358 // 359 360 // All non-volatiles have been restored at this point!! 361 362 //------------------------------------------------------------------------ 363 // The following code makes some assumptions on the T_<type> enum values. 364 // The enum is defined in globalDefinitions.hpp. 365 // The validity of the assumptions is tested as far as possible. 366 // The assigned values should not be shuffled 367 // T_BOOLEAN==4 - lowest used enum value 368 // T_NARROWOOP==16 - largest used enum value 369 //------------------------------------------------------------------------ 370 BLOCK_COMMENT("process result {"); 371 Label firstHandler; 372 int handlerLen= 8; 373 #ifdef ASSERT 374 char assertMsg[] = "check BasicType definition in globalDefinitions.hpp"; 375 __ z_chi(r_arg_result_type, T_BOOLEAN); 376 __ asm_assert(Assembler::bcondNotLow, assertMsg, 0x0234); 377 __ z_chi(r_arg_result_type, T_NARROWOOP); 378 __ asm_assert(Assembler::bcondNotHigh, assertMsg, 0x0235); 379 #endif 380 __ add2reg(r_arg_result_type, -T_BOOLEAN); // Remove offset. 381 __ z_larl(Z_R1, firstHandler); // location of first handler 382 __ z_sllg(r_arg_result_type, r_arg_result_type, 3); // Each handler is 8 bytes long. 383 __ z_bc(MacroAssembler::bcondAlways, 0, r_arg_result_type, Z_R1); 384 385 __ align(handlerLen); 386 __ bind(firstHandler); 387 // T_BOOLEAN: 388 guarantee(T_BOOLEAN == 4, "check BasicType definition in globalDefinitions.hpp"); 389 __ z_st(Z_RET, 0, r_arg_result_addr); 390 __ z_br(Z_R14); // Return to caller. 391 __ align(handlerLen); 392 // T_CHAR: 393 guarantee(T_CHAR == T_BOOLEAN+1, "check BasicType definition in globalDefinitions.hpp"); 394 __ z_st(Z_RET, 0, r_arg_result_addr); 395 __ z_br(Z_R14); // Return to caller. 396 __ align(handlerLen); 397 // T_FLOAT: 398 guarantee(T_FLOAT == T_CHAR+1, "check BasicType definition in globalDefinitions.hpp"); 399 __ z_ste(Z_FRET, 0, r_arg_result_addr); 400 __ z_br(Z_R14); // Return to caller. 401 __ align(handlerLen); 402 // T_DOUBLE: 403 guarantee(T_DOUBLE == T_FLOAT+1, "check BasicType definition in globalDefinitions.hpp"); 404 __ z_std(Z_FRET, 0, r_arg_result_addr); 405 __ z_br(Z_R14); // Return to caller. 406 __ align(handlerLen); 407 // T_BYTE: 408 guarantee(T_BYTE == T_DOUBLE+1, "check BasicType definition in globalDefinitions.hpp"); 409 __ z_st(Z_RET, 0, r_arg_result_addr); 410 __ z_br(Z_R14); // Return to caller. 411 __ align(handlerLen); 412 // T_SHORT: 413 guarantee(T_SHORT == T_BYTE+1, "check BasicType definition in globalDefinitions.hpp"); 414 __ z_st(Z_RET, 0, r_arg_result_addr); 415 __ z_br(Z_R14); // Return to caller. 416 __ align(handlerLen); 417 // T_INT: 418 guarantee(T_INT == T_SHORT+1, "check BasicType definition in globalDefinitions.hpp"); 419 __ z_st(Z_RET, 0, r_arg_result_addr); 420 __ z_br(Z_R14); // Return to caller. 421 __ align(handlerLen); 422 // T_LONG: 423 guarantee(T_LONG == T_INT+1, "check BasicType definition in globalDefinitions.hpp"); 424 __ z_stg(Z_RET, 0, r_arg_result_addr); 425 __ z_br(Z_R14); // Return to caller. 426 __ align(handlerLen); 427 // T_OBJECT: 428 guarantee(T_OBJECT == T_LONG+1, "check BasicType definition in globalDefinitions.hpp"); 429 __ z_stg(Z_RET, 0, r_arg_result_addr); 430 __ z_br(Z_R14); // Return to caller. 431 __ align(handlerLen); 432 // T_ARRAY: 433 guarantee(T_ARRAY == T_OBJECT+1, "check BasicType definition in globalDefinitions.hpp"); 434 __ z_stg(Z_RET, 0, r_arg_result_addr); 435 __ z_br(Z_R14); // Return to caller. 436 __ align(handlerLen); 437 // T_VOID: 438 guarantee(T_VOID == T_ARRAY+1, "check BasicType definition in globalDefinitions.hpp"); 439 __ z_stg(Z_RET, 0, r_arg_result_addr); 440 __ z_br(Z_R14); // Return to caller. 441 __ align(handlerLen); 442 // T_ADDRESS: 443 guarantee(T_ADDRESS == T_VOID+1, "check BasicType definition in globalDefinitions.hpp"); 444 __ z_stg(Z_RET, 0, r_arg_result_addr); 445 __ z_br(Z_R14); // Return to caller. 446 __ align(handlerLen); 447 // T_NARROWOOP: 448 guarantee(T_NARROWOOP == T_ADDRESS+1, "check BasicType definition in globalDefinitions.hpp"); 449 __ z_st(Z_RET, 0, r_arg_result_addr); 450 __ z_br(Z_R14); // Return to caller. 451 __ align(handlerLen); 452 BLOCK_COMMENT("} process result"); 453 } 454 return start; 455 } 456 457 // Return point for a Java call if there's an exception thrown in 458 // Java code. The exception is caught and transformed into a 459 // pending exception stored in JavaThread that can be tested from 460 // within the VM. 461 address generate_catch_exception() { 462 StubGenStubId stub_id = StubGenStubId::catch_exception_id; 463 StubCodeMark mark(this, stub_id); 464 465 address start = __ pc(); 466 467 // 468 // Registers alive 469 // 470 // Z_thread 471 // Z_ARG1 - address of pending exception 472 // Z_ARG2 - return address in call stub 473 // 474 475 const Register exception_file = Z_R0; 476 const Register exception_line = Z_R1; 477 478 __ load_const_optimized(exception_file, (void*)__FILE__); 479 __ load_const_optimized(exception_line, (void*)__LINE__); 480 481 __ z_stg(Z_ARG1, thread_(pending_exception)); 482 // Store into `char *'. 483 __ z_stg(exception_file, thread_(exception_file)); 484 // Store into `int'. 485 __ z_st(exception_line, thread_(exception_line)); 486 487 // Complete return to VM. 488 assert(StubRoutines::_call_stub_return_address != nullptr, "must have been generated before"); 489 490 // Continue in call stub. 491 __ z_br(Z_ARG2); 492 493 return start; 494 } 495 496 // Continuation point for runtime calls returning with a pending 497 // exception. The pending exception check happened in the runtime 498 // or native call stub. The pending exception in Thread is 499 // converted into a Java-level exception. 500 // 501 // Read: 502 // Z_R14: pc the runtime library callee wants to return to. 503 // Since the exception occurred in the callee, the return pc 504 // from the point of view of Java is the exception pc. 505 // 506 // Invalidate: 507 // Volatile registers (except below). 508 // 509 // Update: 510 // Z_ARG1: exception 511 // (Z_R14 is unchanged and is live out). 512 // 513 address generate_forward_exception() { 514 StubGenStubId stub_id = StubGenStubId::forward_exception_id; 515 StubCodeMark mark(this, stub_id); 516 address start = __ pc(); 517 518 #define pending_exception_offset in_bytes(Thread::pending_exception_offset()) 519 #ifdef ASSERT 520 // Get pending exception oop. 521 __ z_lg(Z_ARG1, pending_exception_offset, Z_thread); 522 523 // Make sure that this code is only executed if there is a pending exception. 524 { 525 Label L; 526 __ z_ltgr(Z_ARG1, Z_ARG1); 527 __ z_brne(L); 528 __ stop("StubRoutines::forward exception: no pending exception (1)"); 529 __ bind(L); 530 } 531 532 __ verify_oop(Z_ARG1, "StubRoutines::forward exception: not an oop"); 533 #endif 534 535 __ z_lgr(Z_ARG2, Z_R14); // Copy exception pc into Z_ARG2. 536 __ save_return_pc(); 537 __ push_frame_abi160(0); 538 // Find exception handler. 539 __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), 540 Z_thread, 541 Z_ARG2); 542 // Copy handler's address. 543 __ z_lgr(Z_R1, Z_RET); 544 __ pop_frame(); 545 __ restore_return_pc(); 546 547 // Set up the arguments for the exception handler: 548 // - Z_ARG1: exception oop 549 // - Z_ARG2: exception pc 550 551 // Load pending exception oop. 552 __ z_lg(Z_ARG1, pending_exception_offset, Z_thread); 553 554 // The exception pc is the return address in the caller, 555 // must load it into Z_ARG2 556 __ z_lgr(Z_ARG2, Z_R14); 557 558 #ifdef ASSERT 559 // Make sure exception is set. 560 { Label L; 561 __ z_ltgr(Z_ARG1, Z_ARG1); 562 __ z_brne(L); 563 __ stop("StubRoutines::forward exception: no pending exception (2)"); 564 __ bind(L); 565 } 566 #endif 567 // Clear the pending exception. 568 __ clear_mem(Address(Z_thread, pending_exception_offset), sizeof(void *)); 569 // Jump to exception handler 570 __ z_br(Z_R1 /*handler address*/); 571 572 return start; 573 574 #undef pending_exception_offset 575 } 576 577 #undef __ 578 #ifdef PRODUCT 579 #define __ _masm-> 580 #else 581 #define __ (Verbose ? (_masm->block_comment(FILE_AND_LINE),_masm):_masm)-> 582 #endif 583 584 // Support for uint StubRoutine::zarch::partial_subtype_check(Klass 585 // sub, Klass super); 586 // 587 // Arguments: 588 // ret : Z_RET, returned 589 // sub : Z_ARG2, argument, not changed 590 // super: Z_ARG3, argument, not changed 591 // 592 // raddr: Z_R14, blown by call 593 // 594 address generate_partial_subtype_check() { 595 StubGenStubId stub_id = StubGenStubId::partial_subtype_check_id; 596 StubCodeMark mark(this, stub_id); 597 Label miss; 598 599 address start = __ pc(); 600 601 const Register Rsubklass = Z_ARG2; // subklass 602 const Register Rsuperklass = Z_ARG3; // superklass 603 604 // No args, but tmp registers that are killed. 605 const Register Rlength = Z_ARG4; // cache array length 606 const Register Rarray_ptr = Z_ARG5; // Current value from cache array. 607 608 if (UseCompressedOops) { 609 assert(Universe::heap() != nullptr, "java heap must be initialized to generate partial_subtype_check stub"); 610 } 611 612 // Always take the slow path. 613 __ check_klass_subtype_slow_path(Rsubklass, Rsuperklass, 614 Rarray_ptr, Rlength, nullptr, &miss); 615 616 // Match falls through here. 617 __ clear_reg(Z_RET); // Zero indicates a match. Set EQ flag in CC. 618 __ z_br(Z_R14); 619 620 __ BIND(miss); 621 __ load_const_optimized(Z_RET, 1); // One indicates a miss. 622 __ z_ltgr(Z_RET, Z_RET); // Set NE flag in CR. 623 __ z_br(Z_R14); 624 625 return start; 626 } 627 628 void generate_lookup_secondary_supers_table_stub() { 629 StubGenStubId stub_id = StubGenStubId::lookup_secondary_supers_table_id; 630 StubCodeMark mark(this, stub_id); 631 632 const Register 633 r_super_klass = Z_ARG1, 634 r_sub_klass = Z_ARG2, 635 r_array_index = Z_ARG3, 636 r_array_length = Z_ARG4, 637 r_array_base = Z_ARG5, 638 r_bitmap = Z_R10, 639 r_result = Z_R11; 640 for (int slot = 0; slot < Klass::SECONDARY_SUPERS_TABLE_SIZE; slot++) { 641 StubRoutines::_lookup_secondary_supers_table_stubs[slot] = __ pc(); 642 __ lookup_secondary_supers_table_const(r_sub_klass, r_super_klass, 643 r_array_base, r_array_length, r_array_index, 644 r_bitmap, r_result, slot); 645 646 __ z_br(Z_R14); 647 } 648 } 649 650 // Slow path implementation for UseSecondarySupersTable. 651 address generate_lookup_secondary_supers_table_slow_path_stub() { 652 StubGenStubId stub_id = StubGenStubId::lookup_secondary_supers_table_slow_path_id; 653 StubCodeMark mark(this, stub_id); 654 655 address start = __ pc(); 656 657 const Register 658 r_super_klass = Z_ARG1, 659 r_array_base = Z_ARG5, 660 r_temp1 = Z_ARG4, 661 r_array_index = Z_ARG3, 662 r_bitmap = Z_R10, 663 r_result = Z_R11; 664 665 __ lookup_secondary_supers_table_slow_path(r_super_klass, r_array_base, 666 r_array_index, r_bitmap, r_temp1, r_result, /* is_stub */ true); 667 668 __ z_br(Z_R14); 669 670 return start; 671 } 672 673 #if !defined(PRODUCT) 674 // Wrapper which calls oopDesc::is_oop_or_null() 675 // Only called by MacroAssembler::verify_oop 676 static void verify_oop_helper(const char* message, oopDesc* o) { 677 if (!oopDesc::is_oop_or_null(o)) { 678 fatal("%s. oop: " PTR_FORMAT, message, p2i(o)); 679 } 680 ++ StubRoutines::_verify_oop_count; 681 } 682 #endif 683 684 // Return address of code to be called from code generated by 685 // MacroAssembler::verify_oop. 686 // 687 // Don't generate, rather use C++ code. 688 address generate_verify_oop_subroutine() { 689 // Don't generate a StubCodeMark, because no code is generated! 690 // Generating the mark triggers notifying the oprofile jvmti agent 691 // about the dynamic code generation, but the stub without 692 // code (code_size == 0) confuses opjitconv 693 // StubCodeMark mark(this, "StubRoutines", "verify_oop_stub"); 694 695 address start = nullptr; 696 697 #if !defined(PRODUCT) 698 start = CAST_FROM_FN_PTR(address, verify_oop_helper); 699 #endif 700 701 return start; 702 } 703 704 // This is to test that the count register contains a positive int value. 705 // Required because C2 does not respect int to long conversion for stub calls. 706 void assert_positive_int(Register count) { 707 #ifdef ASSERT 708 __ z_srag(Z_R0, count, 31); // Just leave the sign (must be zero) in Z_R0. 709 __ asm_assert(Assembler::bcondZero, "missing zero extend", 0xAFFE); 710 #endif 711 } 712 713 // Generate overlap test for array copy stubs. 714 // If no actual overlap is detected, control is transferred to the 715 // "normal" copy stub (entry address passed in disjoint_copy_target). 716 // Otherwise, execution continues with the code generated by the 717 // caller of array_overlap_test. 718 // 719 // Input: 720 // Z_ARG1 - from 721 // Z_ARG2 - to 722 // Z_ARG3 - element count 723 void array_overlap_test(address disjoint_copy_target, int log2_elem_size) { 724 __ MacroAssembler::compare_and_branch_optimized(Z_ARG2, Z_ARG1, Assembler::bcondNotHigh, 725 disjoint_copy_target, /*len64=*/true, /*has_sign=*/false); 726 727 Register index = Z_ARG3; 728 if (log2_elem_size > 0) { 729 __ z_sllg(Z_R1, Z_ARG3, log2_elem_size); // byte count 730 index = Z_R1; 731 } 732 __ add2reg_with_index(Z_R1, 0, index, Z_ARG1); // First byte after "from" range. 733 734 __ MacroAssembler::compare_and_branch_optimized(Z_R1, Z_ARG2, Assembler::bcondNotHigh, 735 disjoint_copy_target, /*len64=*/true, /*has_sign=*/false); 736 737 // Destructive overlap: let caller generate code for that. 738 } 739 740 // Generate stub for disjoint array copy. If "aligned" is true, the 741 // "from" and "to" addresses are assumed to be heapword aligned. 742 // 743 // Arguments for generated stub: 744 // from: Z_ARG1 745 // to: Z_ARG2 746 // count: Z_ARG3 treated as signed 747 void generate_disjoint_copy(bool aligned, int element_size, 748 bool branchToEnd, 749 bool restoreArgs) { 750 // This is the zarch specific stub generator for general array copy tasks. 751 // It has the following prereqs and features: 752 // 753 // - No destructive overlap allowed (else unpredictable results). 754 // - Destructive overlap does not exist if the leftmost byte of the target 755 // does not coincide with any of the source bytes (except the leftmost). 756 // 757 // Register usage upon entry: 758 // Z_ARG1 == Z_R2 : address of source array 759 // Z_ARG2 == Z_R3 : address of target array 760 // Z_ARG3 == Z_R4 : length of operands (# of elements on entry) 761 // 762 // Register usage within the generator: 763 // - Z_R0 and Z_R1 are KILLed by the stub routine (target addr/len). 764 // Used as pair register operand in complex moves, scratch registers anyway. 765 // - Z_R5 is KILLed by the stub routine (source register pair addr/len) (even/odd reg). 766 // Same as R0/R1, but no scratch register. 767 // - Z_ARG1, Z_ARG2, Z_ARG3 are USEd but preserved by the stub routine, 768 // but they might get temporarily overwritten. 769 770 Register save_reg = Z_ARG4; // (= Z_R5), holds original target operand address for restore. 771 772 { 773 Register llen_reg = Z_R1; // Holds left operand len (odd reg). 774 Register laddr_reg = Z_R0; // Holds left operand addr (even reg), overlaps with data_reg. 775 Register rlen_reg = Z_R5; // Holds right operand len (odd reg), overlaps with save_reg. 776 Register raddr_reg = Z_R4; // Holds right operand addr (even reg), overlaps with len_reg. 777 778 Register data_reg = Z_R0; // Holds copied data chunk in alignment process and copy loop. 779 Register len_reg = Z_ARG3; // Holds operand len (#elements at entry, #bytes shortly after). 780 Register dst_reg = Z_ARG2; // Holds left (target) operand addr. 781 Register src_reg = Z_ARG1; // Holds right (source) operand addr. 782 783 Label doMVCLOOP, doMVCLOOPcount, doMVCLOOPiterate; 784 Label doMVCUnrolled; 785 NearLabel doMVC, doMVCgeneral, done; 786 Label MVC_template; 787 address pcMVCblock_b, pcMVCblock_e; 788 789 bool usedMVCLE = true; 790 bool usedMVCLOOP = true; 791 bool usedMVCUnrolled = false; 792 bool usedMVC = false; 793 bool usedMVCgeneral = false; 794 795 int stride; 796 Register stride_reg; 797 Register ix_reg; 798 799 assert((element_size<=256) && (256%element_size == 0), "element size must be <= 256, power of 2"); 800 unsigned int log2_size = exact_log2(element_size); 801 802 switch (element_size) { 803 case 1: BLOCK_COMMENT("ARRAYCOPY DISJOINT byte {"); break; 804 case 2: BLOCK_COMMENT("ARRAYCOPY DISJOINT short {"); break; 805 case 4: BLOCK_COMMENT("ARRAYCOPY DISJOINT int {"); break; 806 case 8: BLOCK_COMMENT("ARRAYCOPY DISJOINT long {"); break; 807 default: BLOCK_COMMENT("ARRAYCOPY DISJOINT {"); break; 808 } 809 810 assert_positive_int(len_reg); 811 812 BLOCK_COMMENT("preparation {"); 813 814 // No copying if len <= 0. 815 if (branchToEnd) { 816 __ compare64_and_branch(len_reg, (intptr_t) 0, Assembler::bcondNotHigh, done); 817 } else { 818 if (VM_Version::has_CompareBranch()) { 819 __ z_cgib(len_reg, 0, Assembler::bcondNotHigh, 0, Z_R14); 820 } else { 821 __ z_ltgr(len_reg, len_reg); 822 __ z_bcr(Assembler::bcondNotPositive, Z_R14); 823 } 824 } 825 826 // Prefetch just one cache line. Speculative opt for short arrays. 827 // Do not use Z_R1 in prefetch. Is undefined here. 828 if (VM_Version::has_Prefetch()) { 829 __ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access. 830 __ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access. 831 } 832 833 BLOCK_COMMENT("} preparation"); 834 835 // Save args only if really needed. 836 // Keep len test local to branch. Is generated only once. 837 838 BLOCK_COMMENT("mode selection {"); 839 840 // Special handling for arrays with only a few elements. 841 // Nothing fancy: just an executed MVC. 842 if (log2_size > 0) { 843 __ z_sllg(Z_R1, len_reg, log2_size); // Remember #bytes in Z_R1. 844 } 845 if (element_size != 8) { 846 __ z_cghi(len_reg, 256/element_size); 847 __ z_brnh(doMVC); 848 usedMVC = true; 849 } 850 if (element_size == 8) { // Long and oop arrays are always aligned. 851 __ z_cghi(len_reg, 256/element_size); 852 __ z_brnh(doMVCUnrolled); 853 usedMVCUnrolled = true; 854 } 855 856 // Prefetch another cache line. We, for sure, have more than one line to copy. 857 if (VM_Version::has_Prefetch()) { 858 __ z_pfd(0x01, 256, Z_R0, src_reg); // Fetch access. 859 __ z_pfd(0x02, 256, Z_R0, dst_reg); // Store access. 860 } 861 862 if (restoreArgs) { 863 // Remember entry value of ARG2 to restore all arguments later from that knowledge. 864 __ z_lgr(save_reg, dst_reg); 865 } 866 867 __ z_cghi(len_reg, 4096/element_size); 868 if (log2_size == 0) { 869 __ z_lgr(Z_R1, len_reg); // Init Z_R1 with #bytes 870 } 871 __ z_brnh(doMVCLOOP); 872 873 // Fall through to MVCLE case. 874 875 BLOCK_COMMENT("} mode selection"); 876 877 // MVCLE: for long arrays 878 // DW aligned: Best performance for sizes > 4kBytes. 879 // unaligned: Least complex for sizes > 256 bytes. 880 if (usedMVCLE) { 881 BLOCK_COMMENT("mode MVCLE {"); 882 883 // Setup registers for mvcle. 884 //__ z_lgr(llen_reg, len_reg);// r1 <- r4 #bytes already in Z_R1, aka llen_reg. 885 __ z_lgr(laddr_reg, dst_reg); // r0 <- r3 886 __ z_lgr(raddr_reg, src_reg); // r4 <- r2 887 __ z_lgr(rlen_reg, llen_reg); // r5 <- r1 888 889 __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb0); // special: bypass cache 890 // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0xb8); // special: Hold data in cache. 891 // __ MacroAssembler::move_long_ext(laddr_reg, raddr_reg, 0); 892 893 if (restoreArgs) { 894 // MVCLE updates the source (Z_R4,Z_R5) and target (Z_R0,Z_R1) register pairs. 895 // Dst_reg (Z_ARG2) and src_reg (Z_ARG1) are left untouched. No restore required. 896 // Len_reg (Z_ARG3) is destroyed and must be restored. 897 __ z_slgr(laddr_reg, dst_reg); // copied #bytes 898 if (log2_size > 0) { 899 __ z_srag(Z_ARG3, laddr_reg, log2_size); // Convert back to #elements. 900 } else { 901 __ z_lgr(Z_ARG3, laddr_reg); 902 } 903 } 904 if (branchToEnd) { 905 __ z_bru(done); 906 } else { 907 __ z_br(Z_R14); 908 } 909 BLOCK_COMMENT("} mode MVCLE"); 910 } 911 // No fallthru possible here. 912 913 // MVCUnrolled: for short, aligned arrays. 914 915 if (usedMVCUnrolled) { 916 BLOCK_COMMENT("mode MVC unrolled {"); 917 stride = 8; 918 919 // Generate unrolled MVC instructions. 920 for (int ii = 32; ii > 1; ii--) { 921 __ z_mvc(0, ii * stride-1, dst_reg, 0, src_reg); // ii*8 byte copy 922 if (branchToEnd) { 923 __ z_bru(done); 924 } else { 925 __ z_br(Z_R14); 926 } 927 } 928 929 pcMVCblock_b = __ pc(); 930 __ z_mvc(0, 1 * stride-1, dst_reg, 0, src_reg); // 8 byte copy 931 if (branchToEnd) { 932 __ z_bru(done); 933 } else { 934 __ z_br(Z_R14); 935 } 936 937 pcMVCblock_e = __ pc(); 938 Label MVC_ListEnd; 939 __ bind(MVC_ListEnd); 940 941 // This is an absolute fast path: 942 // - Array len in bytes must be not greater than 256. 943 // - Array len in bytes must be an integer mult of DW 944 // to save expensive handling of trailing bytes. 945 // - Argument restore is not done, 946 // i.e. previous code must not alter arguments (this code doesn't either). 947 948 __ bind(doMVCUnrolled); 949 950 // Avoid mul, prefer shift where possible. 951 // Combine shift right (for #DW) with shift left (for block size). 952 // Set CC for zero test below (asm_assert). 953 // Note: #bytes comes in Z_R1, #DW in len_reg. 954 unsigned int MVCblocksize = pcMVCblock_e - pcMVCblock_b; 955 unsigned int logMVCblocksize = 0xffffffffU; // Pacify compiler ("used uninitialized" warning). 956 957 if (log2_size > 0) { // Len was scaled into Z_R1. 958 switch (MVCblocksize) { 959 960 case 8: logMVCblocksize = 3; 961 __ z_ltgr(Z_R0, Z_R1); // #bytes is index 962 break; // reasonable size, use shift 963 964 case 16: logMVCblocksize = 4; 965 __ z_slag(Z_R0, Z_R1, logMVCblocksize-log2_size); 966 break; // reasonable size, use shift 967 968 default: logMVCblocksize = 0; 969 __ z_ltgr(Z_R0, len_reg); // #DW for mul 970 break; // all other sizes: use mul 971 } 972 } else { 973 guarantee(log2_size, "doMVCUnrolled: only for DW entities"); 974 } 975 976 // This test (and branch) is redundant. Previous code makes sure that 977 // - element count > 0 978 // - element size == 8. 979 // Thus, len reg should never be zero here. We insert an asm_assert() here, 980 // just to double-check and to be on the safe side. 981 __ asm_assert(false, "zero len cannot occur", 99); 982 983 __ z_larl(Z_R1, MVC_ListEnd); // Get addr of last instr block. 984 // Avoid mul, prefer shift where possible. 985 if (logMVCblocksize == 0) { 986 __ z_mghi(Z_R0, MVCblocksize); 987 } 988 __ z_slgr(Z_R1, Z_R0); 989 __ z_br(Z_R1); 990 BLOCK_COMMENT("} mode MVC unrolled"); 991 } 992 // No fallthru possible here. 993 994 // MVC execute template 995 // Must always generate. Usage may be switched on below. 996 // There is no suitable place after here to put the template. 997 __ bind(MVC_template); 998 __ z_mvc(0,0,dst_reg,0,src_reg); // Instr template, never exec directly! 999 1000 1001 // MVC Loop: for medium-sized arrays 1002 1003 // Only for DW aligned arrays (src and dst). 1004 // #bytes to copy must be at least 256!!! 1005 // Non-aligned cases handled separately. 1006 stride = 256; 1007 stride_reg = Z_R1; // Holds #bytes when control arrives here. 1008 ix_reg = Z_ARG3; // Alias for len_reg. 1009 1010 1011 if (usedMVCLOOP) { 1012 BLOCK_COMMENT("mode MVC loop {"); 1013 __ bind(doMVCLOOP); 1014 1015 __ z_lcgr(ix_reg, Z_R1); // Ix runs from -(n-2)*stride to 1*stride (inclusive). 1016 __ z_llill(stride_reg, stride); 1017 __ add2reg(ix_reg, 2*stride); // Thus: increment ix by 2*stride. 1018 1019 __ bind(doMVCLOOPiterate); 1020 __ z_mvc(0, stride-1, dst_reg, 0, src_reg); 1021 __ add2reg(dst_reg, stride); 1022 __ add2reg(src_reg, stride); 1023 __ bind(doMVCLOOPcount); 1024 __ z_brxlg(ix_reg, stride_reg, doMVCLOOPiterate); 1025 1026 // Don 't use add2reg() here, since we must set the condition code! 1027 __ z_aghi(ix_reg, -2*stride); // Compensate incr from above: zero diff means "all copied". 1028 1029 if (restoreArgs) { 1030 __ z_lcgr(Z_R1, ix_reg); // Prepare ix_reg for copy loop, #bytes expected in Z_R1. 1031 __ z_brnz(doMVCgeneral); // We're not done yet, ix_reg is not zero. 1032 1033 // ARG1, ARG2, and ARG3 were altered by the code above, so restore them building on save_reg. 1034 __ z_slgr(dst_reg, save_reg); // copied #bytes 1035 __ z_slgr(src_reg, dst_reg); // = ARG1 (now restored) 1036 if (log2_size) { 1037 __ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3. 1038 } else { 1039 __ z_lgr(Z_ARG3, dst_reg); 1040 } 1041 __ z_lgr(Z_ARG2, save_reg); // ARG2 now restored. 1042 1043 if (branchToEnd) { 1044 __ z_bru(done); 1045 } else { 1046 __ z_br(Z_R14); 1047 } 1048 1049 } else { 1050 if (branchToEnd) { 1051 __ z_brz(done); // CC set by aghi instr. 1052 } else { 1053 __ z_bcr(Assembler::bcondZero, Z_R14); // We're all done if zero. 1054 } 1055 1056 __ z_lcgr(Z_R1, ix_reg); // Prepare ix_reg for copy loop, #bytes expected in Z_R1. 1057 // __ z_bru(doMVCgeneral); // fallthru 1058 } 1059 usedMVCgeneral = true; 1060 BLOCK_COMMENT("} mode MVC loop"); 1061 } 1062 // Fallthru to doMVCgeneral 1063 1064 // MVCgeneral: for short, unaligned arrays, after other copy operations 1065 1066 // Somewhat expensive due to use of EX instruction, but simple. 1067 if (usedMVCgeneral) { 1068 BLOCK_COMMENT("mode MVC general {"); 1069 __ bind(doMVCgeneral); 1070 1071 __ add2reg(len_reg, -1, Z_R1); // Get #bytes-1 for EXECUTE. 1072 if (VM_Version::has_ExecuteExtensions()) { 1073 __ z_exrl(len_reg, MVC_template); // Execute MVC with variable length. 1074 } else { 1075 __ z_larl(Z_R1, MVC_template); // Get addr of instr template. 1076 __ z_ex(len_reg, 0, Z_R0, Z_R1); // Execute MVC with variable length. 1077 } // penalty: 9 ticks 1078 1079 if (restoreArgs) { 1080 // ARG1, ARG2, and ARG3 were altered by code executed before, so restore them building on save_reg 1081 __ z_slgr(dst_reg, save_reg); // Copied #bytes without the "doMVCgeneral" chunk 1082 __ z_slgr(src_reg, dst_reg); // = ARG1 (now restored), was not advanced for "doMVCgeneral" chunk 1083 __ add2reg_with_index(dst_reg, 1, len_reg, dst_reg); // Len of executed MVC was not accounted for, yet. 1084 if (log2_size) { 1085 __ z_srag(Z_ARG3, dst_reg, log2_size); // Convert back to #elements to restore ARG3 1086 } else { 1087 __ z_lgr(Z_ARG3, dst_reg); 1088 } 1089 __ z_lgr(Z_ARG2, save_reg); // ARG2 now restored. 1090 } 1091 1092 if (usedMVC) { 1093 if (branchToEnd) { 1094 __ z_bru(done); 1095 } else { 1096 __ z_br(Z_R14); 1097 } 1098 } else { 1099 if (!branchToEnd) __ z_br(Z_R14); 1100 } 1101 BLOCK_COMMENT("} mode MVC general"); 1102 } 1103 // Fallthru possible if following block not generated. 1104 1105 // MVC: for short, unaligned arrays 1106 1107 // Somewhat expensive due to use of EX instruction, but simple. penalty: 9 ticks. 1108 // Differs from doMVCgeneral in reconstruction of ARG2, ARG3, and ARG4. 1109 if (usedMVC) { 1110 BLOCK_COMMENT("mode MVC {"); 1111 __ bind(doMVC); 1112 1113 // get #bytes-1 for EXECUTE 1114 if (log2_size) { 1115 __ add2reg(Z_R1, -1); // Length was scaled into Z_R1. 1116 } else { 1117 __ add2reg(Z_R1, -1, len_reg); // Length was not scaled. 1118 } 1119 1120 if (VM_Version::has_ExecuteExtensions()) { 1121 __ z_exrl(Z_R1, MVC_template); // Execute MVC with variable length. 1122 } else { 1123 __ z_lgr(Z_R0, Z_R5); // Save ARG4, may be unnecessary. 1124 __ z_larl(Z_R5, MVC_template); // Get addr of instr template. 1125 __ z_ex(Z_R1, 0, Z_R0, Z_R5); // Execute MVC with variable length. 1126 __ z_lgr(Z_R5, Z_R0); // Restore ARG4, may be unnecessary. 1127 } 1128 1129 if (!branchToEnd) { 1130 __ z_br(Z_R14); 1131 } 1132 BLOCK_COMMENT("} mode MVC"); 1133 } 1134 1135 __ bind(done); 1136 1137 switch (element_size) { 1138 case 1: BLOCK_COMMENT("} ARRAYCOPY DISJOINT byte "); break; 1139 case 2: BLOCK_COMMENT("} ARRAYCOPY DISJOINT short"); break; 1140 case 4: BLOCK_COMMENT("} ARRAYCOPY DISJOINT int "); break; 1141 case 8: BLOCK_COMMENT("} ARRAYCOPY DISJOINT long "); break; 1142 default: BLOCK_COMMENT("} ARRAYCOPY DISJOINT "); break; 1143 } 1144 } 1145 } 1146 1147 // Generate stub for conjoint array copy. If "aligned" is true, the 1148 // "from" and "to" addresses are assumed to be heapword aligned. 1149 // 1150 // Arguments for generated stub: 1151 // from: Z_ARG1 1152 // to: Z_ARG2 1153 // count: Z_ARG3 treated as signed 1154 void generate_conjoint_copy(bool aligned, int element_size, bool branchToEnd) { 1155 1156 // This is the zarch specific stub generator for general array copy tasks. 1157 // It has the following prereqs and features: 1158 // 1159 // - Destructive overlap exists and is handled by reverse copy. 1160 // - Destructive overlap exists if the leftmost byte of the target 1161 // does coincide with any of the source bytes (except the leftmost). 1162 // - Z_R0 and Z_R1 are KILLed by the stub routine (data and stride) 1163 // - Z_ARG1 and Z_ARG2 are USEd but preserved by the stub routine. 1164 // - Z_ARG3 is USED but preserved by the stub routine. 1165 // - Z_ARG4 is used as index register and is thus KILLed. 1166 // 1167 { 1168 Register stride_reg = Z_R1; // Stride & compare value in loop (negative element_size). 1169 Register data_reg = Z_R0; // Holds value of currently processed element. 1170 Register ix_reg = Z_ARG4; // Holds byte index of currently processed element. 1171 Register len_reg = Z_ARG3; // Holds length (in #elements) of arrays. 1172 Register dst_reg = Z_ARG2; // Holds left operand addr. 1173 Register src_reg = Z_ARG1; // Holds right operand addr. 1174 1175 assert(256%element_size == 0, "Element size must be power of 2."); 1176 assert(element_size <= 8, "Can't handle more than DW units."); 1177 1178 switch (element_size) { 1179 case 1: BLOCK_COMMENT("ARRAYCOPY CONJOINT byte {"); break; 1180 case 2: BLOCK_COMMENT("ARRAYCOPY CONJOINT short {"); break; 1181 case 4: BLOCK_COMMENT("ARRAYCOPY CONJOINT int {"); break; 1182 case 8: BLOCK_COMMENT("ARRAYCOPY CONJOINT long {"); break; 1183 default: BLOCK_COMMENT("ARRAYCOPY CONJOINT {"); break; 1184 } 1185 1186 assert_positive_int(len_reg); 1187 1188 if (VM_Version::has_Prefetch()) { 1189 __ z_pfd(0x01, 0, Z_R0, src_reg); // Fetch access. 1190 __ z_pfd(0x02, 0, Z_R0, dst_reg); // Store access. 1191 } 1192 1193 unsigned int log2_size = exact_log2(element_size); 1194 if (log2_size) { 1195 __ z_sllg(ix_reg, len_reg, log2_size); 1196 } else { 1197 __ z_lgr(ix_reg, len_reg); 1198 } 1199 1200 // Optimize reverse copy loop. 1201 // Main loop copies DW units which may be unaligned. Unaligned access adds some penalty ticks. 1202 // Unaligned DW access (neither fetch nor store) is DW-atomic, but should be alignment-atomic. 1203 // Preceding the main loop, some bytes are copied to obtain a DW-multiple remaining length. 1204 1205 Label countLoop1; 1206 Label copyLoop1; 1207 Label skipBY; 1208 Label skipHW; 1209 int stride = -8; 1210 1211 __ load_const_optimized(stride_reg, stride); // Prepare for DW copy loop. 1212 1213 if (element_size == 8) // Nothing to do here. 1214 __ z_bru(countLoop1); 1215 else { // Do not generate dead code. 1216 __ z_tmll(ix_reg, 7); // Check the "odd" bits. 1217 __ z_bre(countLoop1); // There are none, very good! 1218 } 1219 1220 if (log2_size == 0) { // Handle leftover Byte. 1221 __ z_tmll(ix_reg, 1); 1222 __ z_bre(skipBY); 1223 __ z_lb(data_reg, -1, ix_reg, src_reg); 1224 __ z_stcy(data_reg, -1, ix_reg, dst_reg); 1225 __ add2reg(ix_reg, -1); // Decrement delayed to avoid AGI. 1226 __ bind(skipBY); 1227 // fallthru 1228 } 1229 if (log2_size <= 1) { // Handle leftover HW. 1230 __ z_tmll(ix_reg, 2); 1231 __ z_bre(skipHW); 1232 __ z_lhy(data_reg, -2, ix_reg, src_reg); 1233 __ z_sthy(data_reg, -2, ix_reg, dst_reg); 1234 __ add2reg(ix_reg, -2); // Decrement delayed to avoid AGI. 1235 __ bind(skipHW); 1236 __ z_tmll(ix_reg, 4); 1237 __ z_bre(countLoop1); 1238 // fallthru 1239 } 1240 if (log2_size <= 2) { // There are just 4 bytes (left) that need to be copied. 1241 __ z_ly(data_reg, -4, ix_reg, src_reg); 1242 __ z_sty(data_reg, -4, ix_reg, dst_reg); 1243 __ add2reg(ix_reg, -4); // Decrement delayed to avoid AGI. 1244 __ z_bru(countLoop1); 1245 } 1246 1247 // Control can never get to here. Never! Never ever! 1248 __ z_illtrap(0x99); 1249 __ bind(copyLoop1); 1250 __ z_lg(data_reg, 0, ix_reg, src_reg); 1251 __ z_stg(data_reg, 0, ix_reg, dst_reg); 1252 __ bind(countLoop1); 1253 __ z_brxhg(ix_reg, stride_reg, copyLoop1); 1254 1255 if (!branchToEnd) 1256 __ z_br(Z_R14); 1257 1258 switch (element_size) { 1259 case 1: BLOCK_COMMENT("} ARRAYCOPY CONJOINT byte "); break; 1260 case 2: BLOCK_COMMENT("} ARRAYCOPY CONJOINT short"); break; 1261 case 4: BLOCK_COMMENT("} ARRAYCOPY CONJOINT int "); break; 1262 case 8: BLOCK_COMMENT("} ARRAYCOPY CONJOINT long "); break; 1263 default: BLOCK_COMMENT("} ARRAYCOPY CONJOINT "); break; 1264 } 1265 } 1266 } 1267 1268 address generate_disjoint_nonoop_copy(StubGenStubId stub_id) { 1269 bool aligned; 1270 int element_size; 1271 switch (stub_id) { 1272 case jbyte_disjoint_arraycopy_id: 1273 aligned = false; 1274 element_size = 1; 1275 break; 1276 case arrayof_jbyte_disjoint_arraycopy_id: 1277 aligned = true; 1278 element_size = 1; 1279 break; 1280 case jshort_disjoint_arraycopy_id: 1281 aligned = false; 1282 element_size = 2; 1283 break; 1284 case arrayof_jshort_disjoint_arraycopy_id: 1285 aligned = true; 1286 element_size = 2; 1287 break; 1288 case jint_disjoint_arraycopy_id: 1289 aligned = false; 1290 element_size = 4; 1291 break; 1292 case arrayof_jint_disjoint_arraycopy_id: 1293 aligned = true; 1294 element_size = 4; 1295 break; 1296 case jlong_disjoint_arraycopy_id: 1297 aligned = false; 1298 element_size = 8; 1299 break; 1300 case arrayof_jlong_disjoint_arraycopy_id: 1301 aligned = true; 1302 element_size = 8; 1303 break; 1304 default: 1305 ShouldNotReachHere(); 1306 } 1307 StubCodeMark mark(this, stub_id); 1308 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1309 generate_disjoint_copy(aligned, element_size, false, false); 1310 return __ addr_at(start_off); 1311 } 1312 1313 address generate_disjoint_oop_copy(StubGenStubId stub_id) { 1314 bool aligned; 1315 bool dest_uninitialized; 1316 switch (stub_id) { 1317 case oop_disjoint_arraycopy_id: 1318 aligned = false; 1319 dest_uninitialized = false; 1320 break; 1321 case arrayof_oop_disjoint_arraycopy_id: 1322 aligned = true; 1323 dest_uninitialized = false; 1324 break; 1325 case oop_disjoint_arraycopy_uninit_id: 1326 aligned = false; 1327 dest_uninitialized = true; 1328 break; 1329 case arrayof_oop_disjoint_arraycopy_uninit_id: 1330 aligned = true; 1331 dest_uninitialized = true; 1332 break; 1333 default: 1334 ShouldNotReachHere(); 1335 } 1336 StubCodeMark mark(this, stub_id); 1337 // This is the zarch specific stub generator for oop array copy. 1338 // Refer to generate_disjoint_copy for a list of prereqs and features. 1339 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1340 unsigned int size = UseCompressedOops ? 4 : 8; 1341 1342 DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; 1343 if (dest_uninitialized) { 1344 decorators |= IS_DEST_UNINITIALIZED; 1345 } 1346 if (aligned) { 1347 decorators |= ARRAYCOPY_ALIGNED; 1348 } 1349 1350 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 1351 bs->arraycopy_prologue(_masm, decorators, T_OBJECT, Z_ARG1, Z_ARG2, Z_ARG3); 1352 1353 generate_disjoint_copy(aligned, size, true, true); 1354 1355 bs->arraycopy_epilogue(_masm, decorators, T_OBJECT, Z_ARG2, Z_ARG3, true); 1356 1357 return __ addr_at(start_off); 1358 } 1359 1360 address generate_conjoint_nonoop_copy(StubGenStubId stub_id) { 1361 bool aligned; 1362 int shift; // i.e. log2(element size) 1363 address nooverlap_target; 1364 switch (stub_id) { 1365 case jbyte_arraycopy_id: 1366 aligned = false; 1367 shift = 0; 1368 nooverlap_target = StubRoutines::jbyte_disjoint_arraycopy(); 1369 break; 1370 case arrayof_jbyte_arraycopy_id: 1371 aligned = true; 1372 shift = 0; 1373 nooverlap_target = StubRoutines::arrayof_jbyte_disjoint_arraycopy(); 1374 break; 1375 case jshort_arraycopy_id: 1376 aligned = false; 1377 shift = 1; 1378 nooverlap_target = StubRoutines::jshort_disjoint_arraycopy(); 1379 break; 1380 case arrayof_jshort_arraycopy_id: 1381 aligned = true; 1382 shift = 1; 1383 nooverlap_target = StubRoutines::arrayof_jshort_disjoint_arraycopy(); 1384 break; 1385 case jint_arraycopy_id: 1386 aligned = false; 1387 shift = 2; 1388 nooverlap_target = StubRoutines::jint_disjoint_arraycopy(); 1389 break; 1390 case arrayof_jint_arraycopy_id: 1391 aligned = true; 1392 shift = 2; 1393 nooverlap_target = StubRoutines::arrayof_jint_disjoint_arraycopy(); 1394 break; 1395 case jlong_arraycopy_id: 1396 aligned = false; 1397 shift = 3; 1398 nooverlap_target = StubRoutines::jlong_disjoint_arraycopy(); 1399 break; 1400 case arrayof_jlong_arraycopy_id: 1401 aligned = true; 1402 shift = 3; 1403 nooverlap_target = StubRoutines::arrayof_jlong_disjoint_arraycopy(); 1404 break; 1405 default: 1406 ShouldNotReachHere(); 1407 } 1408 StubCodeMark mark(this, stub_id); 1409 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1410 array_overlap_test(nooverlap_target, shift); // Branch away to nooverlap_target if disjoint. 1411 generate_conjoint_copy(aligned, 1 << shift, false); 1412 return __ addr_at(start_off); 1413 } 1414 1415 address generate_conjoint_oop_copy(StubGenStubId stub_id) { 1416 bool aligned; 1417 bool dest_uninitialized; 1418 address nooverlap_target; 1419 switch (stub_id) { 1420 case oop_arraycopy_id: 1421 aligned = false; 1422 dest_uninitialized = false; 1423 nooverlap_target = StubRoutines::oop_disjoint_arraycopy(dest_uninitialized); 1424 break; 1425 case arrayof_oop_arraycopy_id: 1426 aligned = true; 1427 dest_uninitialized = false; 1428 nooverlap_target = StubRoutines::arrayof_oop_disjoint_arraycopy(dest_uninitialized); 1429 break; 1430 case oop_arraycopy_uninit_id: 1431 aligned = false; 1432 dest_uninitialized = true; 1433 nooverlap_target = StubRoutines::oop_disjoint_arraycopy(dest_uninitialized); 1434 break; 1435 case arrayof_oop_arraycopy_uninit_id: 1436 aligned = true; 1437 dest_uninitialized = true; 1438 nooverlap_target = StubRoutines::arrayof_oop_disjoint_arraycopy(dest_uninitialized); 1439 break; 1440 default: 1441 ShouldNotReachHere(); 1442 } 1443 StubCodeMark mark(this, stub_id); 1444 // This is the zarch specific stub generator for overlapping oop array copy. 1445 // Refer to generate_conjoint_copy for a list of prereqs and features. 1446 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1447 unsigned int size = UseCompressedOops ? 4 : 8; 1448 unsigned int shift = UseCompressedOops ? 2 : 3; 1449 1450 // Branch to disjoint_copy (if applicable) before pre_barrier to avoid double pre_barrier. 1451 array_overlap_test(nooverlap_target, shift); // Branch away to nooverlap_target if disjoint. 1452 1453 DecoratorSet decorators = IN_HEAP | IS_ARRAY; 1454 if (dest_uninitialized) { 1455 decorators |= IS_DEST_UNINITIALIZED; 1456 } 1457 if (aligned) { 1458 decorators |= ARRAYCOPY_ALIGNED; 1459 } 1460 1461 BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); 1462 bs->arraycopy_prologue(_masm, decorators, T_OBJECT, Z_ARG1, Z_ARG2, Z_ARG3); 1463 1464 generate_conjoint_copy(aligned, size, true); // Must preserve ARG2, ARG3. 1465 1466 bs->arraycopy_epilogue(_masm, decorators, T_OBJECT, Z_ARG2, Z_ARG3, true); 1467 1468 return __ addr_at(start_off); 1469 } 1470 1471 // 1472 // Generate 'unsafe' set memory stub 1473 // Though just as safe as the other stubs, it takes an unscaled 1474 // size_t (# bytes) argument instead of an element count. 1475 // 1476 // Input: 1477 // Z_ARG1 - destination array address 1478 // Z_ARG2 - byte count (size_t) 1479 // Z_ARG3 - byte value 1480 // 1481 address generate_unsafe_setmemory(address unsafe_byte_fill) { 1482 __ align(CodeEntryAlignment); 1483 StubCodeMark mark(this, StubGenStubId::unsafe_setmemory_id); 1484 unsigned int start_off = __ offset(); 1485 1486 // bump this on entry, not on exit: 1487 // inc_counter_np(SharedRuntime::_unsafe_set_memory_ctr); 1488 1489 const Register dest = Z_ARG1; 1490 const Register size = Z_ARG2; 1491 const Register byteVal = Z_ARG3; 1492 NearLabel tail, finished; 1493 // fill_to_memory_atomic(unsigned char*, unsigned long, unsigned char) 1494 1495 // Mark remaining code as such which performs Unsafe accesses. 1496 UnsafeMemoryAccessMark umam(this, true, false); 1497 1498 __ z_vlvgb(Z_V0, byteVal, 0); 1499 __ z_vrepb(Z_V0, Z_V0, 0); 1500 1501 __ z_aghi(size, -32); 1502 __ z_brl(tail); 1503 1504 { 1505 NearLabel again; 1506 __ bind(again); 1507 __ z_vst(Z_V0, Address(dest, 0)); 1508 __ z_vst(Z_V0, Address(dest, 16)); 1509 __ z_aghi(dest, 32); 1510 __ z_aghi(size, -32); 1511 __ z_brnl(again); 1512 } 1513 1514 __ bind(tail); 1515 1516 { 1517 NearLabel dont; 1518 __ testbit(size, 4); 1519 __ z_brz(dont); 1520 __ z_vst(Z_V0, Address(dest, 0)); 1521 __ z_aghi(dest, 16); 1522 __ bind(dont); 1523 } 1524 1525 { 1526 NearLabel dont; 1527 __ testbit(size, 3); 1528 __ z_brz(dont); 1529 __ z_vsteg(Z_V0, 0, Z_R0, dest, 0); 1530 __ z_aghi(dest, 8); 1531 __ bind(dont); 1532 } 1533 1534 __ z_tmll(size, 7); 1535 __ z_brc(Assembler::bcondAllZero, finished); 1536 1537 { 1538 NearLabel dont; 1539 __ testbit(size, 2); 1540 __ z_brz(dont); 1541 __ z_vstef(Z_V0, 0, Z_R0, dest, 0); 1542 __ z_aghi(dest, 4); 1543 __ bind(dont); 1544 } 1545 1546 { 1547 NearLabel dont; 1548 __ testbit(size, 1); 1549 __ z_brz(dont); 1550 __ z_vsteh(Z_V0, 0, Z_R0, dest, 0); 1551 __ z_aghi(dest, 2); 1552 __ bind(dont); 1553 } 1554 1555 { 1556 NearLabel dont; 1557 __ testbit(size, 0); 1558 __ z_brz(dont); 1559 __ z_vsteb(Z_V0, 0, Z_R0, dest, 0); 1560 __ bind(dont); 1561 } 1562 1563 __ bind(finished); 1564 __ z_br(Z_R14); 1565 1566 return __ addr_at(start_off); 1567 } 1568 1569 // This is common errorexit stub for UnsafeMemoryAccess. 1570 address generate_unsafecopy_common_error_exit() { 1571 unsigned int start_off = __ offset(); 1572 __ z_lghi(Z_RET, 0); // return 0 1573 __ z_br(Z_R14); 1574 return __ addr_at(start_off); 1575 } 1576 1577 void generate_arraycopy_stubs() { 1578 1579 // Note: the disjoint stubs must be generated first, some of 1580 // the conjoint stubs use them. 1581 1582 address ucm_common_error_exit = generate_unsafecopy_common_error_exit(); 1583 UnsafeMemoryAccess::set_common_exit_stub_pc(ucm_common_error_exit); 1584 1585 StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_nonoop_copy (StubGenStubId::jbyte_disjoint_arraycopy_id); 1586 StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_nonoop_copy(StubGenStubId::jshort_disjoint_arraycopy_id); 1587 StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_nonoop_copy (StubGenStubId::jint_disjoint_arraycopy_id); 1588 StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_nonoop_copy (StubGenStubId::jlong_disjoint_arraycopy_id); 1589 StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_oop_copy (StubGenStubId::oop_disjoint_arraycopy_id); 1590 StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy (StubGenStubId::oop_disjoint_arraycopy_uninit_id); 1591 1592 StubRoutines::_arrayof_jbyte_disjoint_arraycopy = generate_disjoint_nonoop_copy (StubGenStubId::arrayof_jbyte_disjoint_arraycopy_id); 1593 StubRoutines::_arrayof_jshort_disjoint_arraycopy = generate_disjoint_nonoop_copy(StubGenStubId::arrayof_jshort_disjoint_arraycopy_id); 1594 StubRoutines::_arrayof_jint_disjoint_arraycopy = generate_disjoint_nonoop_copy (StubGenStubId::arrayof_jint_disjoint_arraycopy_id); 1595 StubRoutines::_arrayof_jlong_disjoint_arraycopy = generate_disjoint_nonoop_copy (StubGenStubId::arrayof_jlong_disjoint_arraycopy_id); 1596 StubRoutines::_arrayof_oop_disjoint_arraycopy = generate_disjoint_oop_copy (StubGenStubId::arrayof_oop_disjoint_arraycopy_id); 1597 StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = generate_disjoint_oop_copy (StubGenStubId::arrayof_oop_disjoint_arraycopy_uninit_id); 1598 1599 StubRoutines::_jbyte_arraycopy = generate_conjoint_nonoop_copy(StubGenStubId::jbyte_arraycopy_id); 1600 StubRoutines::_jshort_arraycopy = generate_conjoint_nonoop_copy(StubGenStubId::jshort_arraycopy_id); 1601 StubRoutines::_jint_arraycopy = generate_conjoint_nonoop_copy(StubGenStubId::jint_arraycopy_id); 1602 StubRoutines::_jlong_arraycopy = generate_conjoint_nonoop_copy(StubGenStubId::jlong_arraycopy_id); 1603 StubRoutines::_oop_arraycopy = generate_conjoint_oop_copy(StubGenStubId::oop_arraycopy_id); 1604 StubRoutines::_oop_arraycopy_uninit = generate_conjoint_oop_copy(StubGenStubId::oop_arraycopy_uninit_id); 1605 1606 StubRoutines::_arrayof_jbyte_arraycopy = generate_conjoint_nonoop_copy(StubGenStubId::arrayof_jbyte_arraycopy_id); 1607 StubRoutines::_arrayof_jshort_arraycopy = generate_conjoint_nonoop_copy(StubGenStubId::arrayof_jshort_arraycopy_id); 1608 StubRoutines::_arrayof_jint_arraycopy = generate_conjoint_nonoop_copy (StubGenStubId::arrayof_jint_arraycopy_id); 1609 StubRoutines::_arrayof_jlong_arraycopy = generate_conjoint_nonoop_copy(StubGenStubId::arrayof_jlong_arraycopy_id); 1610 StubRoutines::_arrayof_oop_arraycopy = generate_conjoint_oop_copy(StubGenStubId::arrayof_oop_arraycopy_id); 1611 StubRoutines::_arrayof_oop_arraycopy_uninit = generate_conjoint_oop_copy(StubGenStubId::arrayof_oop_arraycopy_uninit_id); 1612 1613 #ifdef COMPILER2 1614 StubRoutines::_unsafe_setmemory = 1615 VM_Version::has_VectorFacility() ? generate_unsafe_setmemory(StubRoutines::_jbyte_fill) : nullptr; 1616 1617 #endif // COMPILER2 1618 } 1619 1620 // Call interface for AES_encryptBlock, AES_decryptBlock stubs. 1621 // 1622 // Z_ARG1 - source data block. Ptr to leftmost byte to be processed. 1623 // Z_ARG2 - destination data block. Ptr to leftmost byte to be stored. 1624 // For in-place encryption/decryption, ARG1 and ARG2 can point 1625 // to the same piece of storage. 1626 // Z_ARG3 - Crypto key address (expanded key). The first n bits of 1627 // the expanded key constitute the original AES-<n> key (see below). 1628 // 1629 // Z_RET - return value. First unprocessed byte offset in src buffer. 1630 // 1631 // Some remarks: 1632 // The crypto key, as passed from the caller to these encryption stubs, 1633 // is a so-called expanded key. It is derived from the original key 1634 // by the Rijndael key schedule, see http://en.wikipedia.org/wiki/Rijndael_key_schedule 1635 // With the expanded key, the cipher/decipher task is decomposed in 1636 // multiple, less complex steps, called rounds. Sun SPARC and Intel 1637 // processors obviously implement support for those less complex steps. 1638 // z/Architecture provides instructions for full cipher/decipher complexity. 1639 // Therefore, we need the original, not the expanded key here. 1640 // Luckily, the first n bits of an AES-<n> expanded key are formed 1641 // by the original key itself. That takes us out of trouble. :-) 1642 // The key length (in bytes) relation is as follows: 1643 // original expanded rounds key bit keylen 1644 // key bytes key bytes length in words 1645 // 16 176 11 128 44 1646 // 24 208 13 192 52 1647 // 32 240 15 256 60 1648 // 1649 // The crypto instructions used in the AES* stubs have some specific register requirements. 1650 // Z_R0 holds the crypto function code. Please refer to the KM/KMC instruction 1651 // description in the "z/Architecture Principles of Operation" manual for details. 1652 // Z_R1 holds the parameter block address. The parameter block contains the cryptographic key 1653 // (KM instruction) and the chaining value (KMC instruction). 1654 // dst must designate an even-numbered register, holding the address of the output message. 1655 // src must designate an even/odd register pair, holding the address/length of the original message 1656 1657 // Helper function which generates code to 1658 // - load the function code in register fCode (== Z_R0). 1659 // - load the data block length (depends on cipher function) into register srclen if requested. 1660 // - is_decipher switches between cipher/decipher function codes 1661 // - set_len requests (if true) loading the data block length in register srclen 1662 void generate_load_AES_fCode(Register keylen, Register fCode, Register srclen, bool is_decipher) { 1663 1664 BLOCK_COMMENT("Set fCode {"); { 1665 Label fCode_set; 1666 int mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher; 1667 bool identical_dataBlk_len = (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk) 1668 && (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk); 1669 // Expanded key length is 44/52/60 * 4 bytes for AES-128/AES-192/AES-256. 1670 __ z_cghi(keylen, 52); // Check only once at the beginning. keylen and fCode may share the same register. 1671 1672 __ z_lghi(fCode, VM_Version::Cipher::_AES128 + mode); 1673 if (!identical_dataBlk_len) { 1674 __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk); 1675 } 1676 __ z_brl(fCode_set); // keyLen < 52: AES128 1677 1678 __ z_lghi(fCode, VM_Version::Cipher::_AES192 + mode); 1679 if (!identical_dataBlk_len) { 1680 __ z_lghi(srclen, VM_Version::Cipher::_AES192_dataBlk); 1681 } 1682 __ z_bre(fCode_set); // keyLen == 52: AES192 1683 1684 __ z_lghi(fCode, VM_Version::Cipher::_AES256 + mode); 1685 if (!identical_dataBlk_len) { 1686 __ z_lghi(srclen, VM_Version::Cipher::_AES256_dataBlk); 1687 } 1688 // __ z_brh(fCode_set); // keyLen < 52: AES128 // fallthru 1689 1690 __ bind(fCode_set); 1691 if (identical_dataBlk_len) { 1692 __ z_lghi(srclen, VM_Version::Cipher::_AES128_dataBlk); 1693 } 1694 } 1695 BLOCK_COMMENT("} Set fCode"); 1696 } 1697 1698 // Push a parameter block for the cipher/decipher instruction on the stack. 1699 // Layout of the additional stack space allocated for AES_cipherBlockChaining: 1700 // 1701 // | | 1702 // +--------+ <-- SP before expansion 1703 // | | 1704 // : : alignment loss (part 2), 0..(AES_parmBlk_align-1) bytes 1705 // | | 1706 // +--------+ 1707 // | | 1708 // : : space for parameter block, size VM_Version::Cipher::_AES*_parmBlk_C 1709 // | | 1710 // +--------+ <-- parmBlk, octoword-aligned, start of parameter block 1711 // | | 1712 // : : additional stack space for spills etc., size AES_parmBlk_addspace, DW @ Z_SP not usable!!! 1713 // | | 1714 // +--------+ <-- Z_SP + alignment loss, octoword-aligned 1715 // | | 1716 // : : alignment loss (part 1), 0..(AES_parmBlk_align-1) bytes. DW @ Z_SP not usable!!! 1717 // | | 1718 // +--------+ <-- Z_SP after expansion 1719 1720 void generate_push_Block(int dataBlk_len, int parmBlk_len, int crypto_fCode, 1721 Register parmBlk, Register keylen, Register fCode, Register cv, Register key) { 1722 1723 AES_parmBlk_addspace = AES_parmBlk_align; // Must be multiple of AES_parmblk_align. 1724 // spill space for regs etc., don't use DW @SP! 1725 const int cv_len = dataBlk_len; 1726 const int key_len = parmBlk_len - cv_len; 1727 // This len must be known at JIT compile time. Only then are we able to recalc the SP before resize. 1728 // We buy this knowledge by wasting some (up to AES_parmBlk_align) bytes of stack space. 1729 const int resize_len = cv_len + key_len + AES_parmBlk_align + AES_parmBlk_addspace; 1730 1731 // Use parmBlk as temp reg here to hold the frame pointer. 1732 __ resize_frame(-resize_len, parmBlk, true); 1733 1734 // calculate parmBlk address from updated (resized) SP. 1735 __ add2reg(parmBlk, resize_len - (cv_len + key_len), Z_SP); 1736 __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // Align parameter block. 1737 1738 // There is room for stuff in the range [parmBlk-AES_parmBlk_addspace+8, parmBlk). 1739 __ z_stg(keylen, -8, parmBlk); // Spill keylen for later use. 1740 1741 // calculate (SP before resize) from updated SP. 1742 __ add2reg(keylen, resize_len, Z_SP); // keylen holds prev SP for now. 1743 __ z_stg(keylen, -16, parmBlk); // Spill prev SP for easy revert. 1744 1745 __ z_mvc(0, cv_len-1, parmBlk, 0, cv); // Copy cv. 1746 __ z_mvc(cv_len, key_len-1, parmBlk, 0, key); // Copy key. 1747 __ z_lghi(fCode, crypto_fCode); 1748 } 1749 1750 // NOTE: 1751 // Before returning, the stub has to copy the chaining value from 1752 // the parmBlk, where it was updated by the crypto instruction, back 1753 // to the chaining value array the address of which was passed in the cv argument. 1754 // As all the available registers are used and modified by KMC, we need to save 1755 // the key length across the KMC instruction. We do so by spilling it to the stack, 1756 // just preceding the parmBlk (at (parmBlk - 8)). 1757 void generate_push_parmBlk(Register keylen, Register fCode, Register parmBlk, Register key, Register cv, bool is_decipher) { 1758 int mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher; 1759 Label parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set; 1760 1761 BLOCK_COMMENT("push parmBlk {"); 1762 // We have just three cipher strengths which translates into three 1763 // possible extended key lengths: 44, 52, and 60 bytes. 1764 // We therefore can compare the actual length against the "middle" length 1765 // and get: lt -> len=44, eq -> len=52, gt -> len=60. 1766 __ z_cghi(keylen, 52); 1767 if (VM_Version::has_Crypto_AES128()) { __ z_brl(parmBlk_128); } // keyLen < 52: AES128 1768 if (VM_Version::has_Crypto_AES192()) { __ z_bre(parmBlk_192); } // keyLen == 52: AES192 1769 if (VM_Version::has_Crypto_AES256()) { __ z_brh(parmBlk_256); } // keyLen > 52: AES256 1770 1771 // Security net: requested AES function not available on this CPU. 1772 // NOTE: 1773 // As of now (March 2015), this safety net is not required. JCE policy files limit the 1774 // cryptographic strength of the keys used to 128 bit. If we have AES hardware support 1775 // at all, we have at least AES-128. 1776 __ stop_static("AES key strength not supported by CPU. Use -XX:-UseAES as remedy.", 0); 1777 1778 if (VM_Version::has_Crypto_AES256()) { 1779 __ bind(parmBlk_256); 1780 generate_push_Block(VM_Version::Cipher::_AES256_dataBlk, 1781 VM_Version::Cipher::_AES256_parmBlk_C, 1782 VM_Version::Cipher::_AES256 + mode, 1783 parmBlk, keylen, fCode, cv, key); 1784 if (VM_Version::has_Crypto_AES128() || VM_Version::has_Crypto_AES192()) { 1785 __ z_bru(parmBlk_set); // Fallthru otherwise. 1786 } 1787 } 1788 1789 if (VM_Version::has_Crypto_AES192()) { 1790 __ bind(parmBlk_192); 1791 generate_push_Block(VM_Version::Cipher::_AES192_dataBlk, 1792 VM_Version::Cipher::_AES192_parmBlk_C, 1793 VM_Version::Cipher::_AES192 + mode, 1794 parmBlk, keylen, fCode, cv, key); 1795 if (VM_Version::has_Crypto_AES128()) { 1796 __ z_bru(parmBlk_set); // Fallthru otherwise. 1797 } 1798 } 1799 1800 if (VM_Version::has_Crypto_AES128()) { 1801 __ bind(parmBlk_128); 1802 generate_push_Block(VM_Version::Cipher::_AES128_dataBlk, 1803 VM_Version::Cipher::_AES128_parmBlk_C, 1804 VM_Version::Cipher::_AES128 + mode, 1805 parmBlk, keylen, fCode, cv, key); 1806 // Fallthru 1807 } 1808 1809 __ bind(parmBlk_set); 1810 BLOCK_COMMENT("} push parmBlk"); 1811 } 1812 1813 // Pop a parameter block from the stack. The chaining value portion of the parameter block 1814 // is copied back to the cv array as it is needed for subsequent cipher steps. 1815 // The keylen value as well as the original SP (before resizing) was pushed to the stack 1816 // when pushing the parameter block. 1817 void generate_pop_parmBlk(Register keylen, Register parmBlk, Register key, Register cv) { 1818 1819 BLOCK_COMMENT("pop parmBlk {"); 1820 bool identical_dataBlk_len = (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES192_dataBlk) && 1821 (VM_Version::Cipher::_AES128_dataBlk == VM_Version::Cipher::_AES256_dataBlk); 1822 if (identical_dataBlk_len) { 1823 int cv_len = VM_Version::Cipher::_AES128_dataBlk; 1824 __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. 1825 } else { 1826 int cv_len; 1827 Label parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set; 1828 __ z_lg(keylen, -8, parmBlk); // restore keylen 1829 __ z_cghi(keylen, 52); 1830 if (VM_Version::has_Crypto_AES256()) __ z_brh(parmBlk_256); // keyLen > 52: AES256 1831 if (VM_Version::has_Crypto_AES192()) __ z_bre(parmBlk_192); // keyLen == 52: AES192 1832 // if (VM_Version::has_Crypto_AES128()) __ z_brl(parmBlk_128); // keyLen < 52: AES128 // fallthru 1833 1834 // Security net: there is no one here. If we would need it, we should have 1835 // fallen into it already when pushing the parameter block. 1836 if (VM_Version::has_Crypto_AES128()) { 1837 __ bind(parmBlk_128); 1838 cv_len = VM_Version::Cipher::_AES128_dataBlk; 1839 __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. 1840 if (VM_Version::has_Crypto_AES192() || VM_Version::has_Crypto_AES256()) { 1841 __ z_bru(parmBlk_set); 1842 } 1843 } 1844 1845 if (VM_Version::has_Crypto_AES192()) { 1846 __ bind(parmBlk_192); 1847 cv_len = VM_Version::Cipher::_AES192_dataBlk; 1848 __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. 1849 if (VM_Version::has_Crypto_AES256()) { 1850 __ z_bru(parmBlk_set); 1851 } 1852 } 1853 1854 if (VM_Version::has_Crypto_AES256()) { 1855 __ bind(parmBlk_256); 1856 cv_len = VM_Version::Cipher::_AES256_dataBlk; 1857 __ z_mvc(0, cv_len-1, cv, 0, parmBlk); // Copy cv. 1858 // __ z_bru(parmBlk_set); // fallthru 1859 } 1860 __ bind(parmBlk_set); 1861 } 1862 __ z_lg(Z_SP, -16, parmBlk); // Revert resize_frame_absolute. Z_SP saved by push_parmBlk. 1863 BLOCK_COMMENT("} pop parmBlk"); 1864 } 1865 1866 // Compute AES encrypt/decrypt function. 1867 void generate_AES_cipherBlock(bool is_decipher) { 1868 // Incoming arguments. 1869 Register from = Z_ARG1; // source byte array 1870 Register to = Z_ARG2; // destination byte array 1871 Register key = Z_ARG3; // expanded key array 1872 1873 const Register keylen = Z_R0; // Temporarily (until fCode is set) holds the expanded key array length. 1874 1875 // Register definitions as required by KM instruction. 1876 const Register fCode = Z_R0; // crypto function code 1877 const Register parmBlk = Z_R1; // parameter block address (points to crypto key) 1878 const Register src = Z_ARG1; // Must be even reg (KM requirement). 1879 const Register srclen = Z_ARG2; // Must be odd reg and pair with src. Overwrites destination address. 1880 const Register dst = Z_ARG3; // Must be even reg (KM requirement). Overwrites expanded key address. 1881 1882 // Read key len of expanded key (in 4-byte words). 1883 __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 1884 1885 // Copy arguments to registers as required by crypto instruction. 1886 __ z_lgr(parmBlk, key); // crypto key (in T_INT array). 1887 __ lgr_if_needed(src, from); // Copy src address. Will not emit, src/from are identical. 1888 __ z_lgr(dst, to); // Copy dst address, even register required. 1889 1890 // Construct function code into fCode(Z_R0), data block length into srclen(Z_ARG2). 1891 generate_load_AES_fCode(keylen, fCode, srclen, is_decipher); 1892 1893 __ km(dst, src); // Cipher the message. 1894 1895 __ z_br(Z_R14); 1896 } 1897 1898 // Compute AES encrypt function. 1899 address generate_AES_encryptBlock() { 1900 __ align(CodeEntryAlignment); 1901 StubGenStubId stub_id = StubGenStubId::aescrypt_encryptBlock_id; 1902 StubCodeMark mark(this, stub_id); 1903 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1904 1905 generate_AES_cipherBlock(false); 1906 1907 return __ addr_at(start_off); 1908 } 1909 1910 // Compute AES decrypt function. 1911 address generate_AES_decryptBlock() { 1912 __ align(CodeEntryAlignment); 1913 StubGenStubId stub_id = StubGenStubId::aescrypt_decryptBlock_id; 1914 StubCodeMark mark(this, stub_id); 1915 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1916 1917 generate_AES_cipherBlock(true); 1918 1919 return __ addr_at(start_off); 1920 } 1921 1922 // These stubs receive the addresses of the cryptographic key and of the chaining value as two separate 1923 // arguments (registers "key" and "cv", respectively). The KMC instruction, on the other hand, requires 1924 // chaining value and key to be, in this sequence, adjacent in storage. Thus, we need to allocate some 1925 // thread-local working storage. Using heap memory incurs all the hassles of allocating/freeing. 1926 // Stack space, on the contrary, is deallocated automatically when we return from the stub to the caller. 1927 // *** WARNING *** 1928 // Please note that we do not formally allocate stack space, nor do we 1929 // update the stack pointer. Therefore, no function calls are allowed 1930 // and nobody else must use the stack range where the parameter block 1931 // is located. 1932 // We align the parameter block to the next available octoword. 1933 // 1934 // Compute chained AES encrypt function. 1935 void generate_AES_cipherBlockChaining(bool is_decipher) { 1936 1937 Register from = Z_ARG1; // source byte array (clear text) 1938 Register to = Z_ARG2; // destination byte array (ciphered) 1939 Register key = Z_ARG3; // expanded key array. 1940 Register cv = Z_ARG4; // chaining value 1941 const Register msglen = Z_ARG5; // Total length of the msg to be encrypted. Value must be returned 1942 // in Z_RET upon completion of this stub. Is 32-bit integer. 1943 1944 const Register keylen = Z_R0; // Expanded key length, as read from key array. Temp only. 1945 const Register fCode = Z_R0; // crypto function code 1946 const Register parmBlk = Z_R1; // parameter block address (points to crypto key) 1947 const Register src = Z_ARG1; // is Z_R2 1948 const Register srclen = Z_ARG2; // Overwrites destination address. 1949 const Register dst = Z_ARG3; // Overwrites key address. 1950 1951 // Read key len of expanded key (in 4-byte words). 1952 __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 1953 1954 // Construct parm block address in parmBlk (== Z_R1), copy cv and key to parm block. 1955 // Construct function code in fCode (Z_R0). 1956 generate_push_parmBlk(keylen, fCode, parmBlk, key, cv, is_decipher); 1957 1958 // Prepare other registers for instruction. 1959 __ lgr_if_needed(src, from); // Copy src address. Will not emit, src/from are identical. 1960 __ z_lgr(dst, to); 1961 __ z_llgfr(srclen, msglen); // We pass the offsets as ints, not as longs as required. 1962 1963 __ kmc(dst, src); // Cipher the message. 1964 1965 generate_pop_parmBlk(keylen, parmBlk, key, cv); 1966 1967 __ z_llgfr(Z_RET, msglen); // We pass the offsets as ints, not as longs as required. 1968 __ z_br(Z_R14); 1969 } 1970 1971 // Compute chained AES encrypt function. 1972 address generate_cipherBlockChaining_AES_encrypt() { 1973 __ align(CodeEntryAlignment); 1974 StubGenStubId stub_id = StubGenStubId::cipherBlockChaining_encryptAESCrypt_id; 1975 StubCodeMark mark(this, stub_id); 1976 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1977 1978 generate_AES_cipherBlockChaining(false); 1979 1980 return __ addr_at(start_off); 1981 } 1982 1983 // Compute chained AES decrypt function. 1984 address generate_cipherBlockChaining_AES_decrypt() { 1985 __ align(CodeEntryAlignment); 1986 StubGenStubId stub_id = StubGenStubId::cipherBlockChaining_decryptAESCrypt_id; 1987 StubCodeMark mark(this, stub_id); 1988 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 1989 1990 generate_AES_cipherBlockChaining(true); 1991 1992 return __ addr_at(start_off); 1993 } 1994 1995 1996 // ***************************************************************************** 1997 1998 // AES CounterMode 1999 // Push a parameter block for the cipher/decipher instruction on the stack. 2000 // Layout of the additional stack space allocated for counterMode_AES_cipherBlock 2001 // 2002 // | | 2003 // +--------+ <-- SP before expansion 2004 // | | 2005 // : : alignment loss (part 2), 0..(AES_parmBlk_align-1) bytes. 2006 // | | 2007 // +--------+ <-- gap = parmBlk + parmBlk_len + ctrArea_len 2008 // | | 2009 // : : byte[] ctr - kmctr expects a counter vector the size of the input vector. 2010 // : : The interface only provides byte[16] iv, the init vector. 2011 // : : The size of this area is a tradeoff between stack space, init effort, and speed. 2012 // | | Each counter is a 128bit int. Vector element [0] is a copy of iv. 2013 // | | Vector element [i] is formed by incrementing element [i-1]. 2014 // +--------+ <-- ctr = parmBlk + parmBlk_len 2015 // | | 2016 // : : space for parameter block, size VM_Version::Cipher::_AES*_parmBlk_G 2017 // | | 2018 // +--------+ <-- parmBlk = Z_SP + (alignment loss (part 1+2)) + AES_dataBlk_space + AES_parmBlk_addSpace, octoword-aligned, start of parameter block 2019 // | | 2020 // : : additional stack space for spills etc., min. size AES_parmBlk_addspace, all bytes usable. 2021 // | | 2022 // +--------+ <-- Z_SP + alignment loss (part 1+2) + AES_dataBlk_space, octoword-aligned 2023 // | | 2024 // : : space for one source data block and one dest data block. 2025 // | | 2026 // +--------+ <-- Z_SP + alignment loss (part 1+2), octoword-aligned 2027 // | | 2028 // : : additional alignment loss. Blocks above can't tolerate unusable DW @SP. 2029 // | | 2030 // +--------+ <-- Z_SP + alignment loss (part 1), octoword-aligned 2031 // | | 2032 // : : alignment loss (part 1), 0..(AES_parmBlk_align-1) bytes. DW @ Z_SP holds frame ptr. 2033 // | | 2034 // +--------+ <-- Z_SP after expansion 2035 // 2036 // additional space allocation (per DW): 2037 // spillSpace = parmBlk - AES_parmBlk_addspace 2038 // dataBlocks = spillSpace - AES_dataBlk_space 2039 // 2040 // parmBlk-8 various fields of various lengths 2041 // parmBlk-1: key_len (only one byte is stored at parmBlk-1) 2042 // parmBlk-2: fCode (only one byte is stored at parmBlk-2) 2043 // parmBlk-4: ctrVal_len (as retrieved from iv array), in bytes, as HW 2044 // parmBlk-8: msglen length (in bytes) of crypto msg, as passed in by caller 2045 // return value is calculated from this: rv = msglen - processed. 2046 // parmBlk-16 old_SP (SP before resize) 2047 // parmBlk-24 temp values 2048 // up to and including main loop in generate_counterMode_AES 2049 // - parmBlk-20: remmsg_len remaining msg len (aka unprocessed msg bytes) 2050 // after main loop in generate_counterMode_AES 2051 // - parmBlk-24: spill slot for various address values 2052 // 2053 // parmBlk-40 free spill slot, used for local spills. 2054 // parmBlk-64 ARG2(dst) ptr spill slot 2055 // parmBlk-56 ARG3(crypto key) ptr spill slot 2056 // parmBlk-48 ARG4(icv value) ptr spill slot 2057 // 2058 // parmBlk-72 2059 // parmBlk-80 2060 // parmBlk-88 counter vector current position 2061 // parmBlk-96 reduced msg len (after preLoop processing) 2062 // 2063 // parmBlk-104 Z_R13 spill slot (preLoop only) 2064 // parmBlk-112 Z_R12 spill slot (preLoop only) 2065 // parmBlk-120 Z_R11 spill slot (preLoop only) 2066 // parmBlk-128 Z_R10 spill slot (preLoop only) 2067 // 2068 // 2069 // Layout of the parameter block (instruction KMCTR, function KMCTR-AES* 2070 // 2071 // +--------+ key_len: +16 (AES-128), +24 (AES-192), +32 (AES-256) 2072 // | | 2073 // | | cryptographic key 2074 // | | 2075 // +--------+ <-- parmBlk 2076 // 2077 // On exit: 2078 // Z_SP points to resized frame 2079 // Z_SP before resize available from -16(parmBlk) 2080 // parmBlk points to crypto instruction parameter block 2081 // parameter block is filled with crypto key. 2082 // msglen unchanged, saved for later at -24(parmBlk) 2083 // fCode contains function code for instruction 2084 // key unchanged 2085 // 2086 void generate_counterMode_prepare_Stack(Register parmBlk, Register ctr, Register counter, Register scratch) { 2087 2088 BLOCK_COMMENT("prepare stack counterMode_AESCrypt {"); 2089 2090 // save argument registers. 2091 // ARG1(from) is Z_RET as well. Not saved or restored. 2092 // ARG5(msglen) is restored by other means. 2093 __ z_stmg(Z_ARG2, Z_ARG4, argsave_offset, parmBlk); 2094 2095 assert(AES_ctrVec_len > 0, "sanity. We need a counter vector"); 2096 __ add2reg(counter, AES_parmBlk_align, parmBlk); // counter array is located behind crypto key. Available range is disp12 only. 2097 __ z_mvc(0, AES_ctrVal_len-1, counter, 0, ctr); // move first copy of iv 2098 for (int j = 1; j < AES_ctrVec_len; j+=j) { // j (and amount of moved data) doubles with every iteration 2099 int offset = j * AES_ctrVal_len; 2100 if (offset <= 256) { 2101 __ z_mvc(offset, offset-1, counter, 0, counter); // move iv 2102 } else { 2103 for (int k = 0; k < offset; k += 256) { 2104 __ z_mvc(offset+k, 255, counter, 0, counter); 2105 } 2106 } 2107 } 2108 2109 Label noCarry, done; 2110 __ z_lg(scratch, Address(ctr, 8)); // get low-order DW of initial counter. 2111 __ z_algfi(scratch, AES_ctrVec_len); // check if we will overflow during init. 2112 __ z_brc(Assembler::bcondLogNoCarry, noCarry); // No, 64-bit increment is sufficient. 2113 2114 for (int j = 1; j < AES_ctrVec_len; j++) { // start with j = 1; no need to add 0 to the first counter value. 2115 int offset = j * AES_ctrVal_len; 2116 generate_increment128(counter, offset, j, scratch); // increment iv by index value 2117 } 2118 __ z_bru(done); 2119 2120 __ bind(noCarry); 2121 for (int j = 1; j < AES_ctrVec_len; j++) { // start with j = 1; no need to add 0 to the first counter value. 2122 int offset = j * AES_ctrVal_len; 2123 generate_increment64(counter, offset, j); // increment iv by index value 2124 } 2125 2126 __ bind(done); 2127 2128 BLOCK_COMMENT("} prepare stack counterMode_AESCrypt"); 2129 } 2130 2131 2132 void generate_counterMode_increment_ctrVector(Register parmBlk, Register counter, Register scratch, bool v0_only) { 2133 2134 BLOCK_COMMENT("increment ctrVector counterMode_AESCrypt {"); 2135 2136 __ add2reg(counter, AES_parmBlk_align, parmBlk); // ptr to counter array needs to be restored 2137 2138 if (v0_only) { 2139 int offset = 0; 2140 generate_increment128(counter, offset, AES_ctrVec_len, scratch); // increment iv by # vector elements 2141 } else { 2142 int j = 0; 2143 if (VM_Version::has_VectorFacility()) { 2144 bool first_call = true; 2145 for (; j < (AES_ctrVec_len - 3); j+=4) { // increment blocks of 4 iv elements 2146 int offset = j * AES_ctrVal_len; 2147 generate_increment128x4(counter, offset, AES_ctrVec_len, first_call); 2148 first_call = false; 2149 } 2150 } 2151 for (; j < AES_ctrVec_len; j++) { 2152 int offset = j * AES_ctrVal_len; 2153 generate_increment128(counter, offset, AES_ctrVec_len, scratch); // increment iv by # vector elements 2154 } 2155 } 2156 2157 BLOCK_COMMENT("} increment ctrVector counterMode_AESCrypt"); 2158 } 2159 2160 // IBM s390 (IBM z/Architecture, to be more exact) uses Big-Endian number representation. 2161 // Therefore, the bits are ordered from most significant to least significant. The address 2162 // of a number in memory points to its lowest location where the most significant bit is stored. 2163 void generate_increment64(Register counter, int offset, int increment) { 2164 __ z_algsi(offset + 8, counter, increment); // increment, no overflow check 2165 } 2166 2167 void generate_increment128(Register counter, int offset, int increment, Register scratch) { 2168 __ clear_reg(scratch); // prepare to add carry to high-order DW 2169 __ z_algsi(offset + 8, counter, increment); // increment low order DW 2170 __ z_alcg(scratch, Address(counter, offset)); // add carry to high-order DW 2171 __ z_stg(scratch, Address(counter, offset)); // store back 2172 } 2173 2174 void generate_increment128(Register counter, int offset, Register increment, Register scratch) { 2175 __ clear_reg(scratch); // prepare to add carry to high-order DW 2176 __ z_alg(increment, Address(counter, offset + 8)); // increment low order DW 2177 __ z_stg(increment, Address(counter, offset + 8)); // store back 2178 __ z_alcg(scratch, Address(counter, offset)); // add carry to high-order DW 2179 __ z_stg(scratch, Address(counter, offset)); // store back 2180 } 2181 2182 // This is the vector variant of increment128, incrementing 4 ctr vector elements per call. 2183 void generate_increment128x4(Register counter, int offset, int increment, bool init) { 2184 VectorRegister Vincr = Z_V16; 2185 VectorRegister Vctr0 = Z_V20; 2186 VectorRegister Vctr1 = Z_V21; 2187 VectorRegister Vctr2 = Z_V22; 2188 VectorRegister Vctr3 = Z_V23; 2189 2190 // Initialize the increment value only once for a series of increments. 2191 // It must be assured that the non-initializing generator calls are 2192 // immediately subsequent. Otherwise, there is no guarantee for Vincr to be unchanged. 2193 if (init) { 2194 __ z_vzero(Vincr); // preset VReg with constant increment 2195 __ z_vleih(Vincr, increment, 7); // rightmost HW has ix = 7 2196 } 2197 2198 __ z_vlm(Vctr0, Vctr3, offset, counter); // get the counter values 2199 __ z_vaq(Vctr0, Vctr0, Vincr); // increment them 2200 __ z_vaq(Vctr1, Vctr1, Vincr); 2201 __ z_vaq(Vctr2, Vctr2, Vincr); 2202 __ z_vaq(Vctr3, Vctr3, Vincr); 2203 __ z_vstm(Vctr0, Vctr3, offset, counter); // store the counter values 2204 } 2205 2206 unsigned int generate_counterMode_push_Block(int dataBlk_len, int parmBlk_len, int crypto_fCode, 2207 Register parmBlk, Register msglen, Register fCode, Register key) { 2208 2209 // space for data blocks (src and dst, one each) for partial block processing) 2210 AES_parmBlk_addspace = AES_stackSpace_incr // spill space (temp data) 2211 + AES_stackSpace_incr // for argument save/restore 2212 + AES_stackSpace_incr*2 // for work reg save/restore 2213 ; 2214 AES_dataBlk_space = roundup(2*dataBlk_len, AES_parmBlk_align); 2215 AES_dataBlk_offset = -(AES_parmBlk_addspace+AES_dataBlk_space); 2216 const int key_len = parmBlk_len; // The length of the unextended key (16, 24, 32) 2217 2218 assert((AES_ctrVal_len == 0) || (AES_ctrVal_len == dataBlk_len), "varying dataBlk_len is not supported."); 2219 AES_ctrVal_len = dataBlk_len; // ctr init value len (in bytes) 2220 AES_ctrArea_len = AES_ctrVec_len * AES_ctrVal_len; // space required on stack for ctr vector 2221 2222 // This len must be known at JIT compile time. Only then are we able to recalc the SP before resize. 2223 // We buy this knowledge by wasting some (up to AES_parmBlk_align) bytes of stack space. 2224 const int resize_len = AES_parmBlk_align // room for alignment of parmBlk 2225 + AES_parmBlk_align // extra room for alignment 2226 + AES_dataBlk_space // one src and one dst data blk 2227 + AES_parmBlk_addspace // spill space for local data 2228 + roundup(parmBlk_len, AES_parmBlk_align) // aligned length of parmBlk 2229 + AES_ctrArea_len // stack space for ctr vector 2230 ; 2231 Register scratch = fCode; // We can use fCode as a scratch register. It's contents on entry 2232 // is irrelevant and it is set at the very end of this code block. 2233 2234 assert(key_len < 256, "excessive crypto key len: %d, limit: 256", key_len); 2235 2236 BLOCK_COMMENT(err_msg("push_Block (%d bytes) counterMode_AESCrypt%d {", resize_len, parmBlk_len*8)); 2237 2238 // After the frame is resized, the parmBlk is positioned such 2239 // that it is octoword-aligned. This potentially creates some 2240 // alignment waste in addspace and/or in the gap area. 2241 // After resize_frame, scratch contains the frame pointer. 2242 __ resize_frame(-resize_len, scratch, true); 2243 #ifdef ASSERT 2244 __ clear_mem(Address(Z_SP, (intptr_t)8), resize_len - 8); 2245 #endif 2246 2247 // calculate aligned parmBlk address from updated (resized) SP. 2248 __ add2reg(parmBlk, AES_parmBlk_addspace + AES_dataBlk_space + (2*AES_parmBlk_align-1), Z_SP); 2249 __ z_nill(parmBlk, (~(AES_parmBlk_align-1)) & 0xffff); // Align parameter block. 2250 2251 // There is room to spill stuff in the range [parmBlk-AES_parmBlk_addspace+8, parmBlk). 2252 __ z_mviy(keylen_offset, parmBlk, key_len - 1); // Spill crypto key length for later use. Decrement by one for direct use with xc template. 2253 __ z_mviy(fCode_offset, parmBlk, crypto_fCode); // Crypto function code, will be loaded into Z_R0 later. 2254 __ z_sty(msglen, msglen_offset, parmBlk); // full plaintext/ciphertext len. 2255 __ z_sty(msglen, msglen_red_offset, parmBlk); // save for main loop, may get updated in preLoop. 2256 __ z_sra(msglen, exact_log2(dataBlk_len)); // # full cipher blocks that can be formed from input text. 2257 __ z_sty(msglen, rem_msgblk_offset, parmBlk); 2258 2259 __ add2reg(scratch, resize_len, Z_SP); // calculate (SP before resize) from resized SP. 2260 __ z_stg(scratch, unextSP_offset, parmBlk); // Spill unextended SP for easy revert. 2261 __ z_stmg(Z_R10, Z_R13, regsave_offset, parmBlk); // make some regs available as work registers 2262 2263 // Fill parmBlk with all required data 2264 __ z_mvc(0, key_len-1, parmBlk, 0, key); // Copy key. Need to do it here - key_len is only known here. 2265 BLOCK_COMMENT(err_msg("} push_Block (%d bytes) counterMode_AESCrypt%d", resize_len, parmBlk_len*8)); 2266 return resize_len; 2267 } 2268 2269 2270 void generate_counterMode_pop_Block(Register parmBlk, Register msglen, Label& eraser) { 2271 // For added safety, clear the stack area where the crypto key was stored. 2272 Register scratch = msglen; 2273 assert_different_registers(scratch, Z_R0); // can't use Z_R0 for exrl. 2274 2275 // wipe out key on stack 2276 __ z_llgc(scratch, keylen_offset, parmBlk); // get saved (key_len-1) value (we saved just one byte!) 2277 __ z_exrl(scratch, eraser); // template relies on parmBlk still pointing to key on stack 2278 2279 // restore argument registers. 2280 // ARG1(from) is Z_RET as well. Not restored - will hold return value anyway. 2281 // ARG5(msglen) is restored further down. 2282 __ z_lmg(Z_ARG2, Z_ARG4, argsave_offset, parmBlk); 2283 2284 // restore work registers 2285 __ z_lmg(Z_R10, Z_R13, regsave_offset, parmBlk); // make some regs available as work registers 2286 2287 __ z_lgf(msglen, msglen_offset, parmBlk); // Restore msglen, only low order FW is valid 2288 #ifdef ASSERT 2289 { 2290 Label skip2last, skip2done; 2291 // Z_RET (aka Z_R2) can be used as scratch as well. It will be set from msglen before return. 2292 __ z_lgr(Z_RET, Z_SP); // save extended SP 2293 __ z_lg(Z_SP, unextSP_offset, parmBlk); // trim stack back to unextended size 2294 __ z_sgrk(Z_R1, Z_SP, Z_RET); 2295 2296 __ z_cghi(Z_R1, 256); 2297 __ z_brl(skip2last); 2298 __ z_xc(0, 255, Z_RET, 0, Z_RET); 2299 __ z_aghi(Z_RET, 256); 2300 __ z_aghi(Z_R1, -256); 2301 2302 __ z_cghi(Z_R1, 256); 2303 __ z_brl(skip2last); 2304 __ z_xc(0, 255, Z_RET, 0, Z_RET); 2305 __ z_aghi(Z_RET, 256); 2306 __ z_aghi(Z_R1, -256); 2307 2308 __ z_cghi(Z_R1, 256); 2309 __ z_brl(skip2last); 2310 __ z_xc(0, 255, Z_RET, 0, Z_RET); 2311 __ z_aghi(Z_RET, 256); 2312 __ z_aghi(Z_R1, -256); 2313 2314 __ bind(skip2last); 2315 __ z_lgr(Z_R0, Z_RET); 2316 __ z_aghik(Z_RET, Z_R1, -1); // decrement for exrl 2317 __ z_brl(skip2done); 2318 __ z_lgr(parmBlk, Z_R0); // parmBlk == Z_R1, used in eraser template 2319 __ z_exrl(Z_RET, eraser); 2320 2321 __ bind(skip2done); 2322 } 2323 #else 2324 __ z_lg(Z_SP, unextSP_offset, parmBlk); // trim stack back to unextended size 2325 #endif 2326 } 2327 2328 2329 int generate_counterMode_push_parmBlk(Register parmBlk, Register msglen, Register fCode, Register key, bool is_decipher) { 2330 int resize_len = 0; 2331 int mode = is_decipher ? VM_Version::CipherMode::decipher : VM_Version::CipherMode::cipher; 2332 Label parmBlk_128, parmBlk_192, parmBlk_256, parmBlk_set; 2333 Register keylen = fCode; // Expanded key length, as read from key array, Temp only. 2334 // use fCode as scratch; fCode receives its final value later. 2335 2336 // Read key len of expanded key (in 4-byte words). 2337 __ z_lgf(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT))); 2338 __ z_cghi(keylen, 52); 2339 if (VM_Version::has_Crypto_AES_CTR256()) { __ z_brh(parmBlk_256); } // keyLen > 52: AES256. Assume: most frequent 2340 if (VM_Version::has_Crypto_AES_CTR128()) { __ z_brl(parmBlk_128); } // keyLen < 52: AES128. 2341 if (VM_Version::has_Crypto_AES_CTR192()) { __ z_bre(parmBlk_192); } // keyLen == 52: AES192. Assume: least frequent 2342 2343 // Safety net: requested AES_CTR function for requested keylen not available on this CPU. 2344 __ stop_static("AES key strength not supported by CPU. Use -XX:-UseAESCTRIntrinsics as remedy.", 0); 2345 2346 if (VM_Version::has_Crypto_AES_CTR128()) { 2347 __ bind(parmBlk_128); 2348 resize_len = generate_counterMode_push_Block(VM_Version::Cipher::_AES128_dataBlk, 2349 VM_Version::Cipher::_AES128_parmBlk_G, 2350 VM_Version::Cipher::_AES128 + mode, 2351 parmBlk, msglen, fCode, key); 2352 if (VM_Version::has_Crypto_AES_CTR256() || VM_Version::has_Crypto_AES_CTR192()) { 2353 __ z_bru(parmBlk_set); // Fallthru otherwise. 2354 } 2355 } 2356 2357 if (VM_Version::has_Crypto_AES_CTR192()) { 2358 __ bind(parmBlk_192); 2359 resize_len = generate_counterMode_push_Block(VM_Version::Cipher::_AES192_dataBlk, 2360 VM_Version::Cipher::_AES192_parmBlk_G, 2361 VM_Version::Cipher::_AES192 + mode, 2362 parmBlk, msglen, fCode, key); 2363 if (VM_Version::has_Crypto_AES_CTR256()) { 2364 __ z_bru(parmBlk_set); // Fallthru otherwise. 2365 } 2366 } 2367 2368 if (VM_Version::has_Crypto_AES_CTR256()) { 2369 __ bind(parmBlk_256); 2370 resize_len = generate_counterMode_push_Block(VM_Version::Cipher::_AES256_dataBlk, 2371 VM_Version::Cipher::_AES256_parmBlk_G, 2372 VM_Version::Cipher::_AES256 + mode, 2373 parmBlk, msglen, fCode, key); 2374 // Fallthru 2375 } 2376 2377 __ bind(parmBlk_set); 2378 return resize_len; 2379 } 2380 2381 2382 void generate_counterMode_pop_parmBlk(Register parmBlk, Register msglen, Label& eraser) { 2383 2384 BLOCK_COMMENT("pop parmBlk counterMode_AESCrypt {"); 2385 2386 generate_counterMode_pop_Block(parmBlk, msglen, eraser); 2387 2388 BLOCK_COMMENT("} pop parmBlk counterMode_AESCrypt"); 2389 } 2390 2391 // Implementation of counter-mode AES encrypt/decrypt function. 2392 // 2393 void generate_counterMode_AES_impl(bool is_decipher) { 2394 2395 // On entry: 2396 // if there was a previous call to update(), and this previous call did not fully use 2397 // the current encrypted counter, that counter is available at arg6_Offset(Z_SP). 2398 // The index of the first unused bayte in the encrypted counter is available at arg7_Offset(Z_SP). 2399 // The index is in the range [1..AES_ctrVal_len] ([1..16]), where index == 16 indicates a fully 2400 // used previous encrypted counter. 2401 // The unencrypted counter has already been incremented and is ready to be used for the next 2402 // data block, after the unused bytes from the previous call have been consumed. 2403 // The unencrypted counter follows the "increment-after use" principle. 2404 2405 // On exit: 2406 // The index of the first unused byte of the encrypted counter is written back to arg7_Offset(Z_SP). 2407 // A value of AES_ctrVal_len (16) indicates there is no leftover byte. 2408 // If there is at least one leftover byte (1 <= index < AES_ctrVal_len), the encrypted counter value 2409 // is written back to arg6_Offset(Z_SP). If there is no leftover, nothing is written back. 2410 // The unencrypted counter value is written back after having been incremented. 2411 2412 Register from = Z_ARG1; // byte[], source byte array (clear text) 2413 Register to = Z_ARG2; // byte[], destination byte array (ciphered) 2414 Register key = Z_ARG3; // byte[], expanded key array. 2415 Register ctr = Z_ARG4; // byte[], counter byte array. 2416 const Register msglen = Z_ARG5; // int, Total length of the msg to be encrypted. Value must be 2417 // returned in Z_RET upon completion of this stub. 2418 // This is a jint. Negative values are illegal, but technically possible. 2419 // Do not rely on high word. Contents is undefined. 2420 // encCtr = Z_ARG6 - encrypted counter (byte array), 2421 // address passed on stack at _z_abi(remaining_cargs) + 0 * WordSize 2422 // cvIndex = Z_ARG7 - # used (consumed) bytes of encrypted counter, 2423 // passed on stack at _z_abi(remaining_cargs) + 1 * WordSize 2424 // Caution:4-byte value, right-justified in 8-byte stack word 2425 2426 const Register fCode = Z_R0; // crypto function code 2427 const Register parmBlk = Z_R1; // parameter block address (points to crypto key) 2428 const Register src = Z_ARG1; // is Z_R2, forms even/odd pair with srclen 2429 const Register srclen = Z_ARG2; // Overwrites destination address. 2430 const Register dst = Z_ARG3; // Overwrites key address. 2431 const Register counter = Z_ARG5; // Overwrites msglen. Must have counter array in an even register. 2432 2433 Label srcMover, dstMover, fromMover, ctrXOR, dataEraser; // EXRL (execution) templates. 2434 Label CryptoLoop, CryptoLoop_doit, CryptoLoop_end, CryptoLoop_setupAndDoLast, CryptoLoop_ctrVal_inc; 2435 Label allDone, allDone_noInc, popAndExit, Exit; 2436 2437 int arg6_Offset = _z_abi(remaining_cargs) + 0 * HeapWordSize; 2438 int arg7_Offset = _z_abi(remaining_cargs) + 1 * HeapWordSize; // stack slot holds ptr to int value 2439 int oldSP_Offset = 0; 2440 2441 // Is there anything to do at all? Protect against negative len as well. 2442 __ z_ltr(msglen, msglen); 2443 __ z_brnh(Exit); 2444 2445 // Expand stack, load parm block address into parmBlk (== Z_R1), copy crypto key to parm block. 2446 oldSP_Offset = generate_counterMode_push_parmBlk(parmBlk, msglen, fCode, key, is_decipher); 2447 arg6_Offset += oldSP_Offset; 2448 arg7_Offset += oldSP_Offset; 2449 2450 // Check if there is a leftover, partially used encrypted counter from last invocation. 2451 // If so, use those leftover counter bytes first before starting the "normal" encryption. 2452 2453 // We do not have access to the encrypted counter value. It is generated and used only 2454 // internally within the previous kmctr instruction. But, at the end of call to this stub, 2455 // the last encrypted couner is extracted by ciphering a 0x00 byte stream. The result is 2456 // stored at the arg6 location for use with the subsequent call. 2457 // 2458 // The #used bytes of the encrypted counter (from a previous call) is provided via arg7. 2459 // It is used as index into the encrypted counter to access the first byte availabla for ciphering. 2460 // To cipher the input text, we move the number of remaining bytes in the encrypted counter from 2461 // input to output. Then we simply XOR the output bytes with the associated encrypted counter bytes. 2462 2463 Register cvIxAddr = Z_R10; // Address of index into encCtr. Preserved for use @CryptoLoop_end. 2464 __ z_lg(cvIxAddr, arg7_Offset, Z_SP); // arg7: addr of field encCTR_index. 2465 2466 { 2467 Register cvUnused = Z_R11; // # unused bytes of encrypted counter value (= 16 - cvIndex) 2468 Register encCtr = Z_R12; // encrypted counter value, points to first ununsed byte. 2469 Register cvIndex = Z_R13; // # index of first unused byte of encrypted counter value 2470 Label preLoop_end; 2471 2472 // preLoop is necessary only if there is a partially used encrypted counter (encCtr). 2473 // Partially used means cvIndex is in [1, dataBlk_len-1]. 2474 // cvIndex == 0: encCtr is set up but not used at all. Should not occur. 2475 // cvIndex == dataBlk_len: encCtr is exhausted, all bytes used. 2476 // Using unsigned compare protects against cases where (cvIndex < 0). 2477 __ z_clfhsi(0, cvIxAddr, AES_ctrVal_len); // check #used bytes in encCtr against ctr len. 2478 __ z_brnl(preLoop_end); // if encCtr is fully used, skip to normal processing. 2479 __ z_ltgf(cvIndex, 0, Z_R0, cvIxAddr); // # used bytes in encCTR. 2480 __ z_brz(preLoop_end); // if encCtr has no used bytes, skip to normal processing. 2481 2482 __ z_lg(encCtr, arg6_Offset, Z_SP); // encrypted counter from last call to update() 2483 __ z_agr(encCtr, cvIndex); // now points to first unused byte 2484 2485 __ add2reg(cvUnused, -AES_ctrVal_len, cvIndex); // calculate #unused bytes in encCtr. 2486 __ z_lcgr(cvUnused, cvUnused); // previous checks ensure cvUnused in range [1, dataBlk_len-1] 2487 2488 __ z_lgf(msglen, msglen_offset, parmBlk); // Restore msglen (jint value) 2489 __ z_cr(cvUnused, msglen); // check if msg can consume all unused encCtr bytes 2490 __ z_locr(cvUnused, msglen, Assembler::bcondHigh); // take the shorter length 2491 __ z_aghi(cvUnused, -1); // decrement # unused bytes by 1 for exrl instruction 2492 // preceding checks ensure cvUnused in range [1, dataBlk_len-1] 2493 __ z_exrl(cvUnused, fromMover); 2494 __ z_exrl(cvUnused, ctrXOR); 2495 2496 __ z_aghi(cvUnused, 1); // revert decrement from above 2497 __ z_agr(cvIndex, cvUnused); // update index into encCtr (first unused byte) 2498 __ z_st(cvIndex, 0, cvIxAddr); // write back arg7, cvIxAddr is still valid 2499 2500 // update pointers and counters to prepare for main loop 2501 __ z_agr(from, cvUnused); 2502 __ z_agr(to, cvUnused); 2503 __ z_sr(msglen, cvUnused); // #bytes not yet processed 2504 __ z_sty(msglen, msglen_red_offset, parmBlk); // save for calculations in main loop 2505 __ z_srak(Z_R0, msglen, exact_log2(AES_ctrVal_len));// # full cipher blocks that can be formed from input text. 2506 __ z_sty(Z_R0, rem_msgblk_offset, parmBlk); 2507 2508 // check remaining msglen. If zero, all msg bytes were processed in preLoop. 2509 __ z_ltr(msglen, msglen); 2510 __ z_brnh(popAndExit); 2511 2512 __ bind(preLoop_end); 2513 } 2514 2515 // Create count vector on stack to accommodate up to AES_ctrVec_len blocks. 2516 generate_counterMode_prepare_Stack(parmBlk, ctr, counter, fCode); 2517 2518 // Prepare other registers for instruction. 2519 __ lgr_if_needed(src, from); // Copy src address. Will not emit, src/from are identical. 2520 __ z_lgr(dst, to); 2521 __ z_llgc(fCode, fCode_offset, Z_R0, parmBlk); 2522 2523 __ bind(CryptoLoop); 2524 __ z_lghi(srclen, AES_ctrArea_len); // preset len (#bytes) for next iteration: max possible. 2525 __ z_asi(rem_msgblk_offset, parmBlk, -AES_ctrVec_len); // decrement #remaining blocks (16 bytes each). Range: [+127..-128] 2526 __ z_brl(CryptoLoop_setupAndDoLast); // Handling the last iteration (using less than max #blocks) out-of-line 2527 2528 __ bind(CryptoLoop_doit); 2529 __ kmctr(dst, counter, src); // Cipher the message. 2530 2531 __ z_lt(srclen, rem_msgblk_offset, Z_R0, parmBlk); // check if this was the last iteration 2532 __ z_brz(CryptoLoop_ctrVal_inc); // == 0: ctrVector fully used. Need to increment the first 2533 // vector element to encrypt remaining unprocessed bytes. 2534 // __ z_brl(CryptoLoop_end); // < 0: this was detected before and handled at CryptoLoop_setupAndDoLast 2535 // > 0: this is the fallthru case, need another iteration 2536 2537 generate_counterMode_increment_ctrVector(parmBlk, counter, srclen, false); // srclen unused here (serves as scratch) 2538 __ z_bru(CryptoLoop); 2539 2540 __ bind(CryptoLoop_end); 2541 2542 // OK, when we arrive here, we have encrypted all of the "from" byte stream 2543 // except for the last few [0..dataBlk_len) bytes. In addition, we know that 2544 // there are no more unused bytes in the previously generated encrypted counter. 2545 // The (unencrypted) counter, however, is ready to use (it was incremented before). 2546 2547 // To encrypt the few remaining bytes, we need to form an extra src and dst 2548 // data block of dataBlk_len each. This is because we can only process full 2549 // blocks but we must not read or write beyond the boundaries of the argument 2550 // arrays. Here is what we do: 2551 // - The ctrVector has at least one unused element. This is ensured by CryptoLoop code. 2552 // - The (first) unused element is pointed at by the counter register. 2553 // - The src data block is filled with the remaining "from" bytes, remainder of block undefined. 2554 // - The single src data block is encrypted into the dst data block. 2555 // - The dst data block is copied into the "to" array, but only the leftmost few bytes 2556 // (as many as were left in the source byte stream). 2557 // - The counter value to be used is pointed at by the counter register. 2558 // - Fortunately, the crypto instruction (kmctr) has updated all related addresses such that 2559 // we know where to continue with "from" and "to" and which counter value to use next. 2560 2561 Register encCtr = Z_R12; // encrypted counter value, points to stub argument. 2562 Register tmpDst = Z_R12; // addr of temp destination (for last partial block encryption) 2563 2564 __ z_lgf(srclen, msglen_red_offset, parmBlk); // plaintext/ciphertext len after potential preLoop processing. 2565 __ z_nilf(srclen, AES_ctrVal_len - 1); // those rightmost bits indicate the unprocessed #bytes 2566 __ z_stg(srclen, localSpill_offset, parmBlk); // save for later reuse 2567 __ z_mvhi(0, cvIxAddr, 16); // write back arg7 (default 16 in case of allDone). 2568 __ z_braz(allDone_noInc); // no unprocessed bytes? Then we are done. 2569 // This also means the last block of data processed was 2570 // a full-sized block (AES_ctrVal_len bytes) which results 2571 // in no leftover encrypted counter bytes. 2572 __ z_st(srclen, 0, cvIxAddr); // This will be the index of the first unused byte in the encrypted counter. 2573 __ z_stg(counter, counter_offset, parmBlk); // save counter location for easy later restore 2574 2575 // calculate address (on stack) for final dst and src blocks. 2576 __ add2reg(tmpDst, AES_dataBlk_offset, parmBlk); // tmp dst (on stack) is right before tmp src 2577 2578 // We have a residue of [1..15] unprocessed bytes, srclen holds the exact number. 2579 // Residue == 0 was checked just above, residue == AES_ctrVal_len would be another 2580 // full-sized block and would have been handled by CryptoLoop. 2581 2582 __ add2reg(srclen, -1); // decrement for exrl 2583 __ z_exrl(srclen, srcMover); // copy remaining bytes of src byte stream 2584 __ load_const_optimized(srclen, AES_ctrVal_len); // kmctr processes only complete blocks 2585 __ add2reg(src, AES_ctrVal_len, tmpDst); // tmp dst is right before tmp src 2586 2587 __ kmctr(tmpDst, counter, src); // Cipher the remaining bytes. 2588 2589 __ add2reg(tmpDst, -AES_ctrVal_len, tmpDst); // restore tmp dst address 2590 __ z_lg(srclen, localSpill_offset, parmBlk); // residual len, saved above 2591 __ add2reg(srclen, -1); // decrement for exrl 2592 __ z_exrl(srclen, dstMover); 2593 2594 // Write back new encrypted counter 2595 __ add2reg(src, AES_dataBlk_offset, parmBlk); 2596 __ clear_mem(Address(src, RegisterOrConstant((intptr_t)0)), AES_ctrVal_len); 2597 __ load_const_optimized(srclen, AES_ctrVal_len); // kmctr processes only complete blocks 2598 __ z_lg(encCtr, arg6_Offset, Z_SP); // write encrypted counter to arg6 2599 __ z_lg(counter, counter_offset, parmBlk); // restore counter 2600 __ kmctr(encCtr, counter, src); 2601 2602 // The last used element of the counter vector contains the latest counter value that was used. 2603 // As described above, the counter value on exit must be the one to be used next. 2604 __ bind(allDone); 2605 __ z_lg(counter, counter_offset, parmBlk); // restore counter 2606 generate_increment128(counter, 0, 1, Z_R0); 2607 2608 __ bind(allDone_noInc); 2609 __ z_mvc(0, AES_ctrVal_len, ctr, 0, counter); 2610 2611 __ bind(popAndExit); 2612 generate_counterMode_pop_parmBlk(parmBlk, msglen, dataEraser); 2613 2614 __ bind(Exit); 2615 __ z_lgfr(Z_RET, msglen); 2616 2617 __ z_br(Z_R14); 2618 2619 //---------------------------- 2620 //---< out-of-line code >--- 2621 //---------------------------- 2622 __ bind(CryptoLoop_setupAndDoLast); 2623 __ z_lgf(srclen, rem_msgblk_offset, parmBlk); // remaining #blocks in memory is < 0 2624 __ z_aghi(srclen, AES_ctrVec_len); // recalculate the actually remaining #blocks 2625 __ z_sllg(srclen, srclen, exact_log2(AES_ctrVal_len)); // convert to #bytes. Counter value is same length as data block 2626 __ kmctr(dst, counter, src); // Cipher the last integral blocks of the message. 2627 __ z_bru(CryptoLoop_end); // There is at least one unused counter vector element. 2628 // no need to increment. 2629 2630 __ bind(CryptoLoop_ctrVal_inc); 2631 generate_counterMode_increment_ctrVector(parmBlk, counter, srclen, true); // srclen unused here (serves as scratch) 2632 __ z_bru(CryptoLoop_end); 2633 2634 //------------------------------------------- 2635 //---< execution templates for preLoop >--- 2636 //------------------------------------------- 2637 __ bind(fromMover); 2638 __ z_mvc(0, 0, to, 0, from); // Template instruction to move input data to dst. 2639 __ bind(ctrXOR); 2640 __ z_xc(0, 0, to, 0, encCtr); // Template instruction to XOR input data (now in to) with encrypted counter. 2641 2642 //------------------------------- 2643 //---< execution templates >--- 2644 //------------------------------- 2645 __ bind(dataEraser); 2646 __ z_xc(0, 0, parmBlk, 0, parmBlk); // Template instruction to erase crypto key on stack. 2647 __ bind(dstMover); 2648 __ z_mvc(0, 0, dst, 0, tmpDst); // Template instruction to move encrypted reminder from stack to dst. 2649 __ bind(srcMover); 2650 __ z_mvc(AES_ctrVal_len, 0, tmpDst, 0, src); // Template instruction to move reminder of source byte stream to stack. 2651 } 2652 2653 2654 // Create two intrinsic variants, optimized for short and long plaintexts. 2655 void generate_counterMode_AES(bool is_decipher) { 2656 2657 const Register msglen = Z_ARG5; // int, Total length of the msg to be encrypted. Value must be 2658 // returned in Z_RET upon completion of this stub. 2659 const int threshold = 256; // above this length (in bytes), text is considered long. 2660 const int vec_short = threshold>>6; // that many blocks (16 bytes each) per iteration, max 4 loop iterations 2661 const int vec_long = threshold>>2; // that many blocks (16 bytes each) per iteration. 2662 2663 Label AESCTR_short, AESCTR_long; 2664 2665 __ z_chi(msglen, threshold); 2666 __ z_brh(AESCTR_long); 2667 2668 __ bind(AESCTR_short); 2669 2670 BLOCK_COMMENT(err_msg("counterMode_AESCrypt (text len <= %d, block size = %d) {", threshold, vec_short*16)); 2671 2672 AES_ctrVec_len = vec_short; 2673 generate_counterMode_AES_impl(false); // control of generated code will not return 2674 2675 BLOCK_COMMENT(err_msg("} counterMode_AESCrypt (text len <= %d, block size = %d)", threshold, vec_short*16)); 2676 2677 __ align(32); // Octoword alignment benefits branch targets. 2678 2679 BLOCK_COMMENT(err_msg("counterMode_AESCrypt (text len > %d, block size = %d) {", threshold, vec_long*16)); 2680 2681 __ bind(AESCTR_long); 2682 AES_ctrVec_len = vec_long; 2683 generate_counterMode_AES_impl(false); // control of generated code will not return 2684 2685 BLOCK_COMMENT(err_msg("} counterMode_AESCrypt (text len > %d, block size = %d)", threshold, vec_long*16)); 2686 } 2687 2688 2689 // Compute AES-CTR crypto function. 2690 // Encrypt or decrypt is selected via parameters. Only one stub is necessary. 2691 address generate_counterMode_AESCrypt() { 2692 __ align(CodeEntryAlignment); 2693 StubGenStubId stub_id = StubGenStubId::counterMode_AESCrypt_id; 2694 StubCodeMark mark(this, stub_id); 2695 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 2696 2697 generate_counterMode_AES(false); 2698 2699 return __ addr_at(start_off); 2700 } 2701 2702 // ***************************************************************************** 2703 2704 // Compute GHASH function. 2705 address generate_ghash_processBlocks() { 2706 __ align(CodeEntryAlignment); 2707 StubGenStubId stub_id = StubGenStubId::ghash_processBlocks_id; 2708 StubCodeMark mark(this, stub_id); 2709 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 2710 2711 const Register state = Z_ARG1; 2712 const Register subkeyH = Z_ARG2; 2713 const Register data = Z_ARG3; // 1st of even-odd register pair. 2714 const Register blocks = Z_ARG4; 2715 const Register len = blocks; // 2nd of even-odd register pair. 2716 2717 const int param_block_size = 4 * 8; 2718 const int frame_resize = param_block_size + 8; // Extra space for copy of fp. 2719 2720 // Reserve stack space for parameter block (R1). 2721 __ z_lgr(Z_R1, Z_SP); 2722 __ resize_frame(-frame_resize, Z_R0, true); 2723 __ z_aghi(Z_R1, -param_block_size); 2724 2725 // Fill parameter block. 2726 __ z_mvc(Address(Z_R1) , Address(state) , 16); 2727 __ z_mvc(Address(Z_R1, 16), Address(subkeyH), 16); 2728 2729 // R4+5: data pointer + length 2730 __ z_llgfr(len, blocks); // Cast to 64-bit. 2731 2732 // R0: function code 2733 __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_GHASH); 2734 2735 // Compute. 2736 __ z_sllg(len, len, 4); // In bytes. 2737 __ kimd(data); 2738 2739 // Copy back result and free parameter block. 2740 __ z_mvc(Address(state), Address(Z_R1), 16); 2741 __ z_xc(Address(Z_R1), param_block_size, Address(Z_R1)); 2742 __ z_aghi(Z_SP, frame_resize); 2743 2744 __ z_br(Z_R14); 2745 2746 return __ addr_at(start_off); 2747 } 2748 2749 2750 // Call interface for all SHA* stubs. 2751 // 2752 // Z_ARG1 - source data block. Ptr to leftmost byte to be processed. 2753 // Z_ARG2 - current SHA state. Ptr to state area. This area serves as 2754 // parameter block as required by the crypto instruction. 2755 // Z_ARG3 - current byte offset in source data block. 2756 // Z_ARG4 - last byte offset in source data block. 2757 // (Z_ARG4 - Z_ARG3) gives the #bytes remaining to be processed. 2758 // 2759 // Z_RET - return value. First unprocessed byte offset in src buffer. 2760 // 2761 // A few notes on the call interface: 2762 // - All stubs, whether they are single-block or multi-block, are assumed to 2763 // digest an integer multiple of the data block length of data. All data 2764 // blocks are digested using the intermediate message digest (KIMD) instruction. 2765 // Special end processing, as done by the KLMD instruction, seems to be 2766 // emulated by the calling code. 2767 // 2768 // - Z_ARG1 addresses the first byte of source data. The offset (Z_ARG3) is 2769 // already accounted for. 2770 // 2771 // - The current SHA state (the intermediate message digest value) is contained 2772 // in an area addressed by Z_ARG2. The area size depends on the SHA variant 2773 // and is accessible via the enum VM_Version::MsgDigest::_SHA<n>_parmBlk_I 2774 // 2775 // - The single-block stub is expected to digest exactly one data block, starting 2776 // at the address passed in Z_ARG1. 2777 // 2778 // - The multi-block stub is expected to digest all data blocks which start in 2779 // the offset interval [srcOff(Z_ARG3), srcLimit(Z_ARG4)). The exact difference 2780 // (srcLimit-srcOff), rounded up to the next multiple of the data block length, 2781 // gives the number of blocks to digest. It must be assumed that the calling code 2782 // provides for a large enough source data buffer. 2783 // 2784 // Compute SHA-1 function. 2785 address generate_SHA1_stub(StubGenStubId stub_id) { 2786 bool multiBlock; 2787 switch (stub_id) { 2788 case sha1_implCompress_id: 2789 multiBlock = false; 2790 break; 2791 case sha1_implCompressMB_id: 2792 multiBlock = true; 2793 break; 2794 default: 2795 ShouldNotReachHere(); 2796 } 2797 __ align(CodeEntryAlignment); 2798 StubCodeMark mark(this, stub_id); 2799 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 2800 2801 const Register srcBuff = Z_ARG1; // Points to first block to process (offset already added). 2802 const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter for kimd register pairs. 2803 const Register srcOff = Z_ARG3; // int 2804 const Register srcLimit = Z_ARG4; // Only passed in multiBlock case. int 2805 2806 const Register SHAState_local = Z_R1; 2807 const Register SHAState_save = Z_ARG3; 2808 const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before. 2809 Label useKLMD, rtn; 2810 2811 __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA1); // function code 2812 __ z_lgr(SHAState_local, SHAState); // SHAState == parameter block 2813 2814 if (multiBlock) { // Process everything from offset to limit. 2815 2816 // The following description is valid if we get a raw (unpimped) source data buffer, 2817 // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailed above, 2818 // the calling convention for these stubs is different. We leave the description in 2819 // to inform the reader what must be happening hidden in the calling code. 2820 // 2821 // The data block to be processed can have arbitrary length, i.e. its length does not 2822 // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement 2823 // two different paths. If the length is an integer multiple, we use KIMD, saving us 2824 // to copy the SHA state back and forth. If the length is odd, we copy the SHA state 2825 // to the stack, execute a KLMD instruction on it and copy the result back to the 2826 // caller's SHA state location. 2827 2828 // Total #srcBuff blocks to process. 2829 if (VM_Version::has_DistinctOpnds()) { 2830 __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference 2831 __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1); // round up 2832 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff); 2833 __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value. 2834 __ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit. 2835 } else { 2836 __ z_lgfr(srcBufLen, srcLimit); // Exact difference. srcLimit passed as int. 2837 __ z_sgfr(srcBufLen, srcOff); // SrcOff passed as int, now properly casted to long. 2838 __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1); // round up 2839 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA1_dataBlk-1)) & 0xffff); 2840 __ z_lgr(srcLimit, srcOff); // SrcLimit temporarily holds return value. 2841 __ z_agr(srcLimit, srcBufLen); 2842 } 2843 2844 // Integral #blocks to digest? 2845 // As a result of the calculations above, srcBufLen MUST be an integer 2846 // multiple of _SHA1_dataBlk, or else we are in big trouble. 2847 // We insert an asm_assert into the KLMD case to guard against that. 2848 __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA1_dataBlk-1); 2849 __ z_brc(Assembler::bcondNotAllZero, useKLMD); 2850 2851 // Process all full blocks. 2852 __ kimd(srcBuff); 2853 2854 __ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer. 2855 } else { // Process one data block only. 2856 __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA1_dataBlk); // #srcBuff bytes to process 2857 __ kimd(srcBuff); 2858 __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA1_dataBlk, srcOff); // Offset of first unprocessed byte in buffer. No 32 to 64 bit extension needed. 2859 } 2860 2861 __ bind(rtn); 2862 __ z_br(Z_R14); 2863 2864 if (multiBlock) { 2865 __ bind(useKLMD); 2866 2867 #if 1 2868 // Security net: this stub is believed to be called for full-sized data blocks only 2869 // NOTE: The following code is believed to be correct, but is is not tested. 2870 __ stop_static("SHA128 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0); 2871 #endif 2872 } 2873 2874 return __ addr_at(start_off); 2875 } 2876 2877 // Compute SHA-256 function. 2878 address generate_SHA256_stub(StubGenStubId stub_id) { 2879 bool multiBlock; 2880 switch (stub_id) { 2881 case sha256_implCompress_id: 2882 multiBlock = false; 2883 break; 2884 case sha256_implCompressMB_id: 2885 multiBlock = true; 2886 break; 2887 default: 2888 ShouldNotReachHere(); 2889 } 2890 __ align(CodeEntryAlignment); 2891 StubCodeMark mark(this, stub_id); 2892 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 2893 2894 const Register srcBuff = Z_ARG1; 2895 const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter. 2896 const Register SHAState_local = Z_R1; 2897 const Register SHAState_save = Z_ARG3; 2898 const Register srcOff = Z_ARG3; 2899 const Register srcLimit = Z_ARG4; 2900 const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before. 2901 Label useKLMD, rtn; 2902 2903 __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA256); // function code 2904 __ z_lgr(SHAState_local, SHAState); // SHAState == parameter block 2905 2906 if (multiBlock) { // Process everything from offset to limit. 2907 // The following description is valid if we get a raw (unpimped) source data buffer, 2908 // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailed above, 2909 // the calling convention for these stubs is different. We leave the description in 2910 // to inform the reader what must be happening hidden in the calling code. 2911 // 2912 // The data block to be processed can have arbitrary length, i.e. its length does not 2913 // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement 2914 // two different paths. If the length is an integer multiple, we use KIMD, saving us 2915 // to copy the SHA state back and forth. If the length is odd, we copy the SHA state 2916 // to the stack, execute a KLMD instruction on it and copy the result back to the 2917 // caller's SHA state location. 2918 2919 // total #srcBuff blocks to process 2920 if (VM_Version::has_DistinctOpnds()) { 2921 __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference 2922 __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up 2923 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff); 2924 __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value. 2925 __ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit. 2926 } else { 2927 __ z_lgfr(srcBufLen, srcLimit); // exact difference 2928 __ z_sgfr(srcBufLen, srcOff); 2929 __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); // round up 2930 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA256_dataBlk-1)) & 0xffff); 2931 __ z_lgr(srcLimit, srcOff); // Srclimit temporarily holds return value. 2932 __ z_agr(srcLimit, srcBufLen); 2933 } 2934 2935 // Integral #blocks to digest? 2936 // As a result of the calculations above, srcBufLen MUST be an integer 2937 // multiple of _SHA1_dataBlk, or else we are in big trouble. 2938 // We insert an asm_assert into the KLMD case to guard against that. 2939 __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA256_dataBlk-1); 2940 __ z_brc(Assembler::bcondNotAllZero, useKLMD); 2941 2942 // Process all full blocks. 2943 __ kimd(srcBuff); 2944 2945 __ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer. 2946 } else { // Process one data block only. 2947 __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA256_dataBlk); // #srcBuff bytes to process 2948 __ kimd(srcBuff); 2949 __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA256_dataBlk, srcOff); // Offset of first unprocessed byte in buffer. 2950 } 2951 2952 __ bind(rtn); 2953 __ z_br(Z_R14); 2954 2955 if (multiBlock) { 2956 __ bind(useKLMD); 2957 #if 1 2958 // Security net: this stub is believed to be called for full-sized data blocks only. 2959 // NOTE: 2960 // The following code is believed to be correct, but is is not tested. 2961 __ stop_static("SHA256 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0); 2962 #endif 2963 } 2964 2965 return __ addr_at(start_off); 2966 } 2967 2968 // Compute SHA-512 function. 2969 address generate_SHA512_stub(StubGenStubId stub_id) { 2970 bool multiBlock; 2971 switch (stub_id) { 2972 case sha512_implCompress_id: 2973 multiBlock = false; 2974 break; 2975 case sha512_implCompressMB_id: 2976 multiBlock = true; 2977 break; 2978 default: 2979 ShouldNotReachHere(); 2980 } 2981 __ align(CodeEntryAlignment); 2982 StubCodeMark mark(this, stub_id); 2983 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 2984 2985 const Register srcBuff = Z_ARG1; 2986 const Register SHAState = Z_ARG2; // Only on entry. Reused soon thereafter. 2987 const Register SHAState_local = Z_R1; 2988 const Register SHAState_save = Z_ARG3; 2989 const Register srcOff = Z_ARG3; 2990 const Register srcLimit = Z_ARG4; 2991 const Register srcBufLen = Z_ARG2; // Destroys state address, must be copied before. 2992 Label useKLMD, rtn; 2993 2994 __ load_const_optimized(Z_R0, (int)VM_Version::MsgDigest::_SHA512); // function code 2995 __ z_lgr(SHAState_local, SHAState); // SHAState == parameter block 2996 2997 if (multiBlock) { // Process everything from offset to limit. 2998 // The following description is valid if we get a raw (unpimped) source data buffer, 2999 // spanning the range between [srcOff(Z_ARG3), srcLimit(Z_ARG4)). As detailed above, 3000 // the calling convention for these stubs is different. We leave the description in 3001 // to inform the reader what must be happening hidden in the calling code. 3002 // 3003 // The data block to be processed can have arbitrary length, i.e. its length does not 3004 // need to be an integer multiple of SHA<n>_datablk. Therefore, we need to implement 3005 // two different paths. If the length is an integer multiple, we use KIMD, saving us 3006 // to copy the SHA state back and forth. If the length is odd, we copy the SHA state 3007 // to the stack, execute a KLMD instruction on it and copy the result back to the 3008 // caller's SHA state location. 3009 3010 // total #srcBuff blocks to process 3011 if (VM_Version::has_DistinctOpnds()) { 3012 __ z_srk(srcBufLen, srcLimit, srcOff); // exact difference 3013 __ z_ahi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up 3014 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff); 3015 __ z_ark(srcLimit, srcOff, srcBufLen); // Srclimit temporarily holds return value. 3016 __ z_llgfr(srcBufLen, srcBufLen); // Cast to 64-bit. 3017 } else { 3018 __ z_lgfr(srcBufLen, srcLimit); // exact difference 3019 __ z_sgfr(srcBufLen, srcOff); 3020 __ z_aghi(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); // round up 3021 __ z_nill(srcBufLen, (~(VM_Version::MsgDigest::_SHA512_dataBlk-1)) & 0xffff); 3022 __ z_lgr(srcLimit, srcOff); // Srclimit temporarily holds return value. 3023 __ z_agr(srcLimit, srcBufLen); 3024 } 3025 3026 // integral #blocks to digest? 3027 // As a result of the calculations above, srcBufLen MUST be an integer 3028 // multiple of _SHA1_dataBlk, or else we are in big trouble. 3029 // We insert an asm_assert into the KLMD case to guard against that. 3030 __ z_tmll(srcBufLen, VM_Version::MsgDigest::_SHA512_dataBlk-1); 3031 __ z_brc(Assembler::bcondNotAllZero, useKLMD); 3032 3033 // Process all full blocks. 3034 __ kimd(srcBuff); 3035 3036 __ z_lgr(Z_RET, srcLimit); // Offset of first unprocessed byte in buffer. 3037 } else { // Process one data block only. 3038 __ load_const_optimized(srcBufLen, (int)VM_Version::MsgDigest::_SHA512_dataBlk); // #srcBuff bytes to process 3039 __ kimd(srcBuff); 3040 __ add2reg(Z_RET, (int)VM_Version::MsgDigest::_SHA512_dataBlk, srcOff); // Offset of first unprocessed byte in buffer. 3041 } 3042 3043 __ bind(rtn); 3044 __ z_br(Z_R14); 3045 3046 if (multiBlock) { 3047 __ bind(useKLMD); 3048 #if 1 3049 // Security net: this stub is believed to be called for full-sized data blocks only 3050 // NOTE: 3051 // The following code is believed to be correct, but is is not tested. 3052 __ stop_static("SHA512 stub can digest full data blocks only. Use -XX:-UseSHA as remedy.", 0); 3053 #endif 3054 } 3055 3056 return __ addr_at(start_off); 3057 } 3058 3059 3060 /** 3061 * Arguments: 3062 * 3063 * Inputs: 3064 * Z_ARG1 - int crc 3065 * Z_ARG2 - byte* buf 3066 * Z_ARG3 - int length (of buffer) 3067 * 3068 * Result: 3069 * Z_RET - int crc result 3070 **/ 3071 // Compute CRC function (generic, for all polynomials). 3072 void generate_CRC_updateBytes(Register table, bool invertCRC) { 3073 3074 // arguments to kernel_crc32: 3075 Register crc = Z_ARG1; // Current checksum, preset by caller or result from previous call, int. 3076 Register data = Z_ARG2; // source byte array 3077 Register dataLen = Z_ARG3; // #bytes to process, int 3078 // Register table = Z_ARG4; // crc table address. Preloaded and passed in by caller. 3079 const Register t0 = Z_R10; // work reg for kernel* emitters 3080 const Register t1 = Z_R11; // work reg for kernel* emitters 3081 const Register t2 = Z_R12; // work reg for kernel* emitters 3082 const Register t3 = Z_R13; // work reg for kernel* emitters 3083 3084 3085 assert_different_registers(crc, data, dataLen, table); 3086 3087 // We pass these values as ints, not as longs as required by C calling convention. 3088 // Crc used as int. 3089 __ z_llgfr(dataLen, dataLen); 3090 3091 __ resize_frame(-(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers. 3092 __ z_stmg(Z_R10, Z_R13, 1*8, Z_SP); // Spill regs 10..11 to make them available as work registers. 3093 __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, invertCRC); 3094 __ z_lmg(Z_R10, Z_R13, 1*8, Z_SP); // Spill regs 10..11 back from stack. 3095 __ resize_frame(+(6*8), Z_R0, true); // Resize frame to provide add'l space to spill 5 registers. 3096 3097 __ z_llgfr(Z_RET, crc); // Updated crc is function result. No copying required, just zero upper 32 bits. 3098 __ z_br(Z_R14); // Result already in Z_RET == Z_ARG1. 3099 } 3100 3101 3102 // Compute CRC32 function. 3103 address generate_CRC32_updateBytes() { 3104 __ align(CodeEntryAlignment); 3105 StubGenStubId stub_id = StubGenStubId::updateBytesCRC32_id; 3106 StubCodeMark mark(this, stub_id); 3107 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 3108 3109 assert(UseCRC32Intrinsics, "should not generate this stub (%s) with CRC32 intrinsics disabled", StubRoutines::get_stub_name(stub_id)); 3110 3111 BLOCK_COMMENT("CRC32_updateBytes {"); 3112 Register table = Z_ARG4; // crc32 table address. 3113 StubRoutines::zarch::generate_load_crc_table_addr(_masm, table); 3114 3115 generate_CRC_updateBytes(table, true); 3116 BLOCK_COMMENT("} CRC32_updateBytes"); 3117 3118 return __ addr_at(start_off); 3119 } 3120 3121 3122 // Compute CRC32C function. 3123 address generate_CRC32C_updateBytes() { 3124 __ align(CodeEntryAlignment); 3125 StubGenStubId stub_id = StubGenStubId::updateBytesCRC32C_id; 3126 StubCodeMark mark(this, stub_id); 3127 unsigned int start_off = __ offset(); // Remember stub start address (is rtn value). 3128 3129 assert(UseCRC32CIntrinsics, "should not generate this stub (%s) with CRC32C intrinsics disabled", StubRoutines::get_stub_name(stub_id)); 3130 3131 BLOCK_COMMENT("CRC32C_updateBytes {"); 3132 Register table = Z_ARG4; // crc32c table address. 3133 StubRoutines::zarch::generate_load_crc32c_table_addr(_masm, table); 3134 3135 generate_CRC_updateBytes(table, false); 3136 BLOCK_COMMENT("} CRC32C_updateBytes"); 3137 3138 return __ addr_at(start_off); 3139 } 3140 3141 3142 // Arguments: 3143 // Z_ARG1 - x address 3144 // Z_ARG2 - x length 3145 // Z_ARG3 - y address 3146 // Z_ARG4 - y length 3147 // Z_ARG5 - z address 3148 address generate_multiplyToLen() { 3149 __ align(CodeEntryAlignment); 3150 StubGenStubId stub_id = StubGenStubId::multiplyToLen_id; 3151 StubCodeMark mark(this, stub_id); 3152 3153 address start = __ pc(); 3154 3155 const Register x = Z_ARG1; 3156 const Register xlen = Z_ARG2; 3157 const Register y = Z_ARG3; 3158 const Register ylen = Z_ARG4; 3159 const Register z = Z_ARG5; 3160 3161 // Next registers will be saved on stack in multiply_to_len(). 3162 const Register tmp1 = Z_tmp_1; 3163 const Register tmp2 = Z_tmp_2; 3164 const Register tmp3 = Z_tmp_3; 3165 const Register tmp4 = Z_tmp_4; 3166 const Register tmp5 = Z_R9; 3167 3168 BLOCK_COMMENT("Entry:"); 3169 3170 __ z_llgfr(xlen, xlen); 3171 __ z_llgfr(ylen, ylen); 3172 3173 __ multiply_to_len(x, xlen, y, ylen, z, tmp1, tmp2, tmp3, tmp4, tmp5); 3174 3175 __ z_br(Z_R14); // Return to caller. 3176 3177 return start; 3178 } 3179 3180 address generate_method_entry_barrier() { 3181 __ align(CodeEntryAlignment); 3182 StubGenStubId stub_id = StubGenStubId::method_entry_barrier_id; 3183 StubCodeMark mark(this, stub_id); 3184 3185 address start = __ pc(); 3186 3187 int nbytes_volatile = (8 + 5) * BytesPerWord; 3188 3189 // VM-Call Prologue 3190 __ save_return_pc(); 3191 __ push_frame_abi160(nbytes_volatile); 3192 __ save_volatile_regs(Z_SP, frame::z_abi_160_size, true, false); 3193 3194 // Prep arg for VM call 3195 // Create ptr to stored return_pc in caller frame. 3196 __ z_la(Z_ARG1, _z_abi(return_pc) + frame::z_abi_160_size + nbytes_volatile, Z_R0, Z_SP); 3197 3198 // VM-Call: BarrierSetNMethod::nmethod_stub_entry_barrier(address* return_address_ptr) 3199 __ call_VM_leaf(CAST_FROM_FN_PTR(address, BarrierSetNMethod::nmethod_stub_entry_barrier)); 3200 __ z_ltr(Z_R0_scratch, Z_RET); 3201 3202 // VM-Call Epilogue 3203 __ restore_volatile_regs(Z_SP, frame::z_abi_160_size, true, false); 3204 __ pop_frame(); 3205 __ restore_return_pc(); 3206 3207 // Check return val of VM-Call 3208 __ z_bcr(Assembler::bcondZero, Z_R14); 3209 3210 // Pop frame built in prologue. 3211 // Required so wrong_method_stub can deduce caller. 3212 __ pop_frame(); 3213 __ restore_return_pc(); 3214 3215 // VM-Call indicates deoptimization required 3216 __ load_const_optimized(Z_R1_scratch, SharedRuntime::get_handle_wrong_method_stub()); 3217 __ z_br(Z_R1_scratch); 3218 3219 return start; 3220 } 3221 3222 address generate_cont_thaw(bool return_barrier, bool exception) { 3223 if (!Continuations::enabled()) return nullptr; 3224 Unimplemented(); 3225 return nullptr; 3226 } 3227 3228 address generate_cont_thaw() { 3229 if (!Continuations::enabled()) return nullptr; 3230 Unimplemented(); 3231 return nullptr; 3232 } 3233 3234 address generate_cont_returnBarrier() { 3235 if (!Continuations::enabled()) return nullptr; 3236 Unimplemented(); 3237 return nullptr; 3238 } 3239 3240 address generate_cont_returnBarrier_exception() { 3241 if (!Continuations::enabled()) return nullptr; 3242 Unimplemented(); 3243 return nullptr; 3244 } 3245 3246 // exception handler for upcall stubs 3247 address generate_upcall_stub_exception_handler() { 3248 StubGenStubId stub_id = StubGenStubId::upcall_stub_exception_handler_id; 3249 StubCodeMark mark(this, stub_id); 3250 address start = __ pc(); 3251 3252 // Native caller has no idea how to handle exceptions, 3253 // so we just crash here. Up to callee to catch exceptions. 3254 __ verify_oop(Z_ARG1); 3255 __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(uint64_t, UpcallLinker::handle_uncaught_exception)); 3256 __ call_c(Z_R1_scratch); 3257 __ should_not_reach_here(); 3258 3259 return start; 3260 } 3261 3262 // load Method* target of MethodHandle 3263 // Z_ARG1 = jobject receiver 3264 // Z_method = Method* result 3265 address generate_upcall_stub_load_target() { 3266 StubGenStubId stub_id = StubGenStubId::upcall_stub_load_target_id; 3267 StubCodeMark mark(this, stub_id); 3268 address start = __ pc(); 3269 3270 __ resolve_global_jobject(Z_ARG1, Z_tmp_1, Z_tmp_2); 3271 // Load target method from receiver 3272 __ load_heap_oop(Z_method, Address(Z_ARG1, java_lang_invoke_MethodHandle::form_offset()), 3273 noreg, noreg, IS_NOT_NULL); 3274 __ load_heap_oop(Z_method, Address(Z_method, java_lang_invoke_LambdaForm::vmentry_offset()), 3275 noreg, noreg, IS_NOT_NULL); 3276 __ load_heap_oop(Z_method, Address(Z_method, java_lang_invoke_MemberName::method_offset()), 3277 noreg, noreg, IS_NOT_NULL); 3278 __ z_lg(Z_method, Address(Z_method, java_lang_invoke_ResolvedMethodName::vmtarget_offset())); 3279 __ z_stg(Z_method, Address(Z_thread, JavaThread::callee_target_offset())); // just in case callee is deoptimized 3280 3281 __ z_br(Z_R14); 3282 3283 return start; 3284 } 3285 3286 void generate_initial_stubs() { 3287 // Generates all stubs and initializes the entry points. 3288 3289 // Entry points that exist in all platforms. 3290 // Note: This is code that could be shared among different 3291 // platforms - however the benefit seems to be smaller than the 3292 // disadvantage of having a much more complicated generator 3293 // structure. See also comment in stubRoutines.hpp. 3294 StubRoutines::_forward_exception_entry = generate_forward_exception(); 3295 3296 StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); 3297 StubRoutines::_catch_exception_entry = generate_catch_exception(); 3298 3299 //---------------------------------------------------------------------- 3300 // Entry points that are platform specific. 3301 3302 if (UnsafeMemoryAccess::_table == nullptr) { 3303 UnsafeMemoryAccess::create_table(4); // 4 for setMemory 3304 } 3305 3306 if (UseCRC32Intrinsics) { 3307 StubRoutines::_crc_table_adr = (address)StubRoutines::zarch::_crc_table; 3308 StubRoutines::_updateBytesCRC32 = generate_CRC32_updateBytes(); 3309 } 3310 3311 if (UseCRC32CIntrinsics) { 3312 StubRoutines::_crc32c_table_addr = (address)StubRoutines::zarch::_crc32c_table; 3313 StubRoutines::_updateBytesCRC32C = generate_CRC32C_updateBytes(); 3314 } 3315 3316 // Comapct string intrinsics: Translate table for string inflate intrinsic. Used by trot instruction. 3317 StubRoutines::zarch::_trot_table_addr = (address)StubRoutines::zarch::_trot_table; 3318 } 3319 3320 void generate_continuation_stubs() { 3321 if (!Continuations::enabled()) return; 3322 3323 // Continuation stubs: 3324 StubRoutines::_cont_thaw = generate_cont_thaw(); 3325 StubRoutines::_cont_returnBarrier = generate_cont_returnBarrier(); 3326 StubRoutines::_cont_returnBarrierExc = generate_cont_returnBarrier_exception(); 3327 } 3328 3329 void generate_final_stubs() { 3330 // Generates all stubs and initializes the entry points. 3331 3332 // Support for verify_oop (must happen after universe_init). 3333 StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop_subroutine(); 3334 3335 // Arraycopy stubs used by compilers. 3336 generate_arraycopy_stubs(); 3337 3338 // nmethod entry barriers for concurrent class unloading 3339 StubRoutines::_method_entry_barrier = generate_method_entry_barrier(); 3340 3341 #ifdef COMPILER2 3342 if (UseSecondarySupersTable) { 3343 StubRoutines::_lookup_secondary_supers_table_slow_path_stub = generate_lookup_secondary_supers_table_slow_path_stub(); 3344 if (!InlineSecondarySupersTest) { 3345 generate_lookup_secondary_supers_table_stub(); 3346 } 3347 } 3348 #endif // COMPILER2 3349 3350 StubRoutines::_upcall_stub_exception_handler = generate_upcall_stub_exception_handler(); 3351 StubRoutines::_upcall_stub_load_target = generate_upcall_stub_load_target(); 3352 } 3353 3354 void generate_compiler_stubs() { 3355 3356 StubRoutines::zarch::_partial_subtype_check = generate_partial_subtype_check(); 3357 3358 #if COMPILER2_OR_JVMCI 3359 // Generate AES intrinsics code. 3360 if (UseAESIntrinsics) { 3361 if (VM_Version::has_Crypto_AES()) { 3362 StubRoutines::_aescrypt_encryptBlock = generate_AES_encryptBlock(); 3363 StubRoutines::_aescrypt_decryptBlock = generate_AES_decryptBlock(); 3364 StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_AES_encrypt(); 3365 StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_AES_decrypt(); 3366 } else { 3367 // In PRODUCT builds, the function pointers will keep their initial (null) value. 3368 // LibraryCallKit::try_to_inline() will return false then, preventing the intrinsic to be called. 3369 assert(VM_Version::has_Crypto_AES(), "Inconsistent settings. Check vm_version_s390.cpp"); 3370 } 3371 } 3372 3373 if (UseAESCTRIntrinsics) { 3374 if (VM_Version::has_Crypto_AES_CTR()) { 3375 StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt(); 3376 } else { 3377 // In PRODUCT builds, the function pointers will keep their initial (null) value. 3378 // LibraryCallKit::try_to_inline() will return false then, preventing the intrinsic to be called. 3379 assert(VM_Version::has_Crypto_AES_CTR(), "Inconsistent settings. Check vm_version_s390.cpp"); 3380 } 3381 } 3382 3383 // Generate GHASH intrinsics code 3384 if (UseGHASHIntrinsics) { 3385 StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks(); 3386 } 3387 3388 // Generate SHA1/SHA256/SHA512 intrinsics code. 3389 if (UseSHA1Intrinsics) { 3390 StubRoutines::_sha1_implCompress = generate_SHA1_stub(StubGenStubId::sha1_implCompress_id); 3391 StubRoutines::_sha1_implCompressMB = generate_SHA1_stub(StubGenStubId::sha1_implCompressMB_id); 3392 } 3393 if (UseSHA256Intrinsics) { 3394 StubRoutines::_sha256_implCompress = generate_SHA256_stub(StubGenStubId::sha256_implCompress_id); 3395 StubRoutines::_sha256_implCompressMB = generate_SHA256_stub(StubGenStubId::sha256_implCompressMB_id); 3396 } 3397 if (UseSHA512Intrinsics) { 3398 StubRoutines::_sha512_implCompress = generate_SHA512_stub(StubGenStubId::sha512_implCompress_id); 3399 StubRoutines::_sha512_implCompressMB = generate_SHA512_stub(StubGenStubId::sha512_implCompressMB_id); 3400 } 3401 3402 #ifdef COMPILER2 3403 if (UseMultiplyToLenIntrinsic) { 3404 StubRoutines::_multiplyToLen = generate_multiplyToLen(); 3405 } 3406 if (UseMontgomeryMultiplyIntrinsic) { 3407 StubRoutines::_montgomeryMultiply 3408 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); 3409 } 3410 if (UseMontgomerySquareIntrinsic) { 3411 StubRoutines::_montgomerySquare 3412 = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); 3413 } 3414 #endif 3415 #endif // COMPILER2_OR_JVMCI 3416 } 3417 3418 public: 3419 StubGenerator(CodeBuffer* code, StubGenBlobId blob_id) : StubCodeGenerator(code, blob_id) { 3420 switch(blob_id) { 3421 case initial_id: 3422 generate_initial_stubs(); 3423 break; 3424 case continuation_id: 3425 generate_continuation_stubs(); 3426 break; 3427 case compiler_id: 3428 generate_compiler_stubs(); 3429 break; 3430 case final_id: 3431 generate_final_stubs(); 3432 break; 3433 default: 3434 fatal("unexpected blob id: %d", blob_id); 3435 break; 3436 }; 3437 } 3438 3439 private: 3440 int _stub_count; 3441 void stub_prolog(StubCodeDesc* cdesc) { 3442 #ifdef ASSERT 3443 // Put extra information in the stub code, to make it more readable. 3444 // Write the high part of the address. 3445 // [RGV] Check if there is a dependency on the size of this prolog. 3446 __ emit_data((intptr_t)cdesc >> 32); 3447 __ emit_data((intptr_t)cdesc); 3448 __ emit_data(++_stub_count); 3449 #endif 3450 align(true); 3451 } 3452 3453 void align(bool at_header = false) { 3454 // z/Architecture cache line size is 256 bytes. 3455 // There is no obvious benefit in aligning stub 3456 // code to cache lines. Use CodeEntryAlignment instead. 3457 const unsigned int icache_line_size = CodeEntryAlignment; 3458 const unsigned int icache_half_line_size = MIN2<unsigned int>(32, CodeEntryAlignment); 3459 3460 if (at_header) { 3461 while ((intptr_t)(__ pc()) % icache_line_size != 0) { 3462 __ z_illtrap(); 3463 } 3464 } else { 3465 while ((intptr_t)(__ pc()) % icache_half_line_size != 0) { 3466 __ z_nop(); 3467 } 3468 } 3469 } 3470 3471 }; 3472 3473 void StubGenerator_generate(CodeBuffer* code, StubGenBlobId blob_id) { 3474 StubGenerator g(code, blob_id); 3475 }