28 #include "opto/c2_MacroAssembler.hpp"
29 #include "opto/compile.hpp"
30 #include "opto/intrinsicnode.hpp"
31 #include "opto/output.hpp"
32 #include "opto/subnode.hpp"
33 #include "runtime/objectMonitorTable.hpp"
34 #include "runtime/stubRoutines.hpp"
35 #include "runtime/synchronizer.hpp"
36 #include "utilities/globalDefinitions.hpp"
37
38 #ifdef PRODUCT
39 #define BLOCK_COMMENT(str) /* nothing */
40 #define STOP(error) stop(error)
41 #else
42 #define BLOCK_COMMENT(str) block_comment(str)
43 #define STOP(error) block_comment(error); stop(error)
44 #endif
45
46 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
47
48 void C2_MacroAssembler::fast_lock(Register obj, Register box,
49 Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
50 // Flag register, zero for success; non-zero for failure.
51 Register flag = t1;
52
53 assert_different_registers(obj, box, tmp1, tmp2, tmp3, tmp4, flag, t0);
54
55 mv(flag, 1);
56
57 // Handle inflated monitor.
58 Label inflated;
59 // Finish fast lock successfully. MUST branch to with flag == 0
60 Label locked;
61 // Finish fast lock unsuccessfully. slow_path MUST branch to with flag != 0
62 Label slow_path;
63
64 if (UseObjectMonitorTable) {
65 // Clear cache in case fast locking succeeds or we need to take the slow-path.
66 sd(zr, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
67 }
2335
2336 __ j(stub.continuation());
2337 #undef __
2338 }
2339
2340 // j.l.Float.float16ToFloat
2341 void C2_MacroAssembler::float16_to_float(FloatRegister dst, Register src, Register tmp) {
2342 auto stub = C2CodeStub::make<FloatRegister, Register, Register>(dst, src, tmp, 20, float16_to_float_slow_path);
2343
2344 // On riscv, NaN needs a special process as fcvt does not work in that case.
2345 // On riscv, Inf does not need a special process as fcvt can handle it correctly.
2346 // but we consider to get the slow path to process NaN and Inf at the same time,
2347 // as both of them are rare cases, and if we try to get the slow path to handle
2348 // only NaN case it would sacrifise the performance for normal cases,
2349 // i.e. non-NaN and non-Inf cases.
2350
2351 // check whether it's a NaN or +/- Inf.
2352 mv(t0, 0x7c00);
2353 andr(tmp, src, t0);
2354 // jump to stub processing NaN and Inf cases.
2355 beq(t0, tmp, stub->entry(), true);
2356
2357 // non-NaN or non-Inf cases, just use built-in instructions.
2358 fmv_h_x(dst, src);
2359 fcvt_s_h(dst, dst);
2360
2361 bind(stub->continuation());
2362 }
2363
2364 static void float_to_float16_slow_path(C2_MacroAssembler& masm, C2GeneralStub<Register, FloatRegister, Register>& stub) {
2365 #define __ masm.
2366 Register dst = stub.data<0>();
2367 FloatRegister src = stub.data<1>();
2368 Register tmp = stub.data<2>();
2369 __ bind(stub.entry());
2370
2371 __ float_to_float16_NaN(dst, src, t0, tmp);
2372
2373 __ j(stub.continuation());
2374 #undef __
2375 }
2376
2377 // j.l.Float.floatToFloat16
2378 void C2_MacroAssembler::float_to_float16(Register dst, FloatRegister src, FloatRegister ftmp, Register xtmp) {
2379 auto stub = C2CodeStub::make<Register, FloatRegister, Register>(dst, src, xtmp, 64, float_to_float16_slow_path);
2380
2381 // On riscv, NaN needs a special process as fcvt does not work in that case.
2382
2383 // check whether it's a NaN.
2384 // replace fclass with feq as performance optimization.
2385 feq_s(t0, src, src);
2386 // jump to stub processing NaN cases.
2387 beqz(t0, stub->entry(), true);
2388
2389 // non-NaN cases, just use built-in instructions.
2390 fcvt_h_s(ftmp, src);
2391 fmv_x_h(dst, ftmp);
2392
2393 bind(stub->continuation());
2394 }
2395
2396 static void float16_to_float_v_slow_path(C2_MacroAssembler& masm, C2GeneralStub<VectorRegister, VectorRegister, uint>& stub) {
2397 #define __ masm.
2398 VectorRegister dst = stub.data<0>();
2399 VectorRegister src = stub.data<1>();
2400 uint vector_length = stub.data<2>();
2401 __ bind(stub.entry());
2402
2403 // following instructions mainly focus on NaN, as riscv does not handle
2404 // NaN well with vfwcvt_f_f_v, but the code also works for Inf at the same time.
2405 //
2406 // construct NaN's in 32 bits from the NaN's in 16 bits,
2407 // we need the payloads of non-canonical NaNs to be preserved.
2428 // On riscv, NaN needs a special process as vfwcvt_f_f_v does not work in that case.
2429 // On riscv, Inf does not need a special process as vfwcvt_f_f_v can handle it correctly.
2430 // but we consider to get the slow path to process NaN and Inf at the same time,
2431 // as both of them are rare cases, and if we try to get the slow path to handle
2432 // only NaN case it would sacrifise the performance for normal cases,
2433 // i.e. non-NaN and non-Inf cases.
2434
2435 vsetvli_helper(BasicType::T_SHORT, vector_length, Assembler::mf2);
2436
2437 // check whether there is a NaN or +/- Inf.
2438 mv(t0, 0x7c00);
2439 vand_vx(v0, src, t0);
2440 // v0 will be used as mask in slow path.
2441 vmseq_vx(v0, v0, t0);
2442 vcpop_m(t0, v0);
2443
2444 // For non-NaN or non-Inf cases, just use built-in instructions.
2445 vfwcvt_f_f_v(dst, src);
2446
2447 // jump to stub processing NaN and Inf cases if there is any of them in the vector-wide.
2448 bnez(t0, stub->entry(), true);
2449
2450 bind(stub->continuation());
2451 }
2452
2453 static void float_to_float16_v_slow_path(C2_MacroAssembler& masm,
2454 C2GeneralStub<VectorRegister, VectorRegister, VectorRegister>& stub) {
2455 #define __ masm.
2456 VectorRegister dst = stub.data<0>();
2457 VectorRegister src = stub.data<1>();
2458 VectorRegister vtmp = stub.data<2>();
2459 assert_different_registers(dst, src, vtmp);
2460
2461 __ bind(stub.entry());
2462
2463 // Active elements (NaNs) are marked in v0 mask register.
2464 // mul is already set to mf2 in float_to_float16_v.
2465
2466 // Float (32 bits)
2467 // Bit: 31 30 to 23 22 to 0
2468 // +---+------------------+-----------------------------+
2521 assert_different_registers(dst, src, vtmp);
2522
2523 auto stub = C2CodeStub::make<VectorRegister, VectorRegister, VectorRegister>
2524 (dst, src, vtmp, 56, float_to_float16_v_slow_path);
2525
2526 // On riscv, NaN needs a special process as vfncvt_f_f_w does not work in that case.
2527
2528 vsetvli_helper(BasicType::T_FLOAT, vector_length, Assembler::m1);
2529
2530 // check whether there is a NaN.
2531 // replace v_fclass with vmfne_vv as performance optimization.
2532 vmfne_vv(v0, src, src);
2533 vcpop_m(t0, v0);
2534
2535 vsetvli_helper(BasicType::T_SHORT, vector_length, Assembler::mf2, tmp);
2536
2537 // For non-NaN cases, just use built-in instructions.
2538 vfncvt_f_f_w(dst, src);
2539
2540 // jump to stub processing NaN cases.
2541 bnez(t0, stub->entry(), true);
2542
2543 bind(stub->continuation());
2544 }
2545
2546 void C2_MacroAssembler::signum_fp_v(VectorRegister dst, VectorRegister one, BasicType bt, int vlen) {
2547 vsetvli_helper(bt, vlen);
2548
2549 // check if input is -0, +0, signaling NaN or quiet NaN
2550 vfclass_v(v0, dst);
2551 mv(t0, FClassBits::zero | FClassBits::nan);
2552 vand_vx(v0, v0, t0);
2553 vmseq_vi(v0, v0, 0);
2554
2555 // use floating-point 1.0 with a sign of input
2556 vfsgnj_vv(dst, one, dst, v0_t);
2557 }
2558
2559 // j.l.Math.round(float)
2560 // Returns the closest int to the argument, with ties rounding to positive infinity.
2561 // We need to handle 3 special cases defined by java api spec:
|
28 #include "opto/c2_MacroAssembler.hpp"
29 #include "opto/compile.hpp"
30 #include "opto/intrinsicnode.hpp"
31 #include "opto/output.hpp"
32 #include "opto/subnode.hpp"
33 #include "runtime/objectMonitorTable.hpp"
34 #include "runtime/stubRoutines.hpp"
35 #include "runtime/synchronizer.hpp"
36 #include "utilities/globalDefinitions.hpp"
37
38 #ifdef PRODUCT
39 #define BLOCK_COMMENT(str) /* nothing */
40 #define STOP(error) stop(error)
41 #else
42 #define BLOCK_COMMENT(str) block_comment(str)
43 #define STOP(error) block_comment(error); stop(error)
44 #endif
45
46 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
47
48 void C2_MacroAssembler::entry_barrier() {
49 BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
50 // Dummy labels for just measuring the code size
51 Label dummy_slow_path;
52 Label dummy_continuation;
53 Label dummy_guard;
54 Label* slow_path = &dummy_slow_path;
55 Label* continuation = &dummy_continuation;
56 Label* guard = &dummy_guard;
57
58 if (!Compile::current()->output()->in_scratch_emit_size()) {
59 // Use real labels from actual stub when not emitting code for the purpose of measuring its size
60 C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
61 Compile::current()->output()->add_stub(stub);
62 slow_path = &stub->entry();
63 continuation = &stub->continuation();
64 guard = &stub->guard();
65 }
66
67 // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
68 bs->nmethod_entry_barrier(this, slow_path, continuation, guard);
69 }
70
71 void C2_MacroAssembler::fast_lock(Register obj, Register box,
72 Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
73 // Flag register, zero for success; non-zero for failure.
74 Register flag = t1;
75
76 assert_different_registers(obj, box, tmp1, tmp2, tmp3, tmp4, flag, t0);
77
78 mv(flag, 1);
79
80 // Handle inflated monitor.
81 Label inflated;
82 // Finish fast lock successfully. MUST branch to with flag == 0
83 Label locked;
84 // Finish fast lock unsuccessfully. slow_path MUST branch to with flag != 0
85 Label slow_path;
86
87 if (UseObjectMonitorTable) {
88 // Clear cache in case fast locking succeeds or we need to take the slow-path.
89 sd(zr, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
90 }
2358
2359 __ j(stub.continuation());
2360 #undef __
2361 }
2362
2363 // j.l.Float.float16ToFloat
2364 void C2_MacroAssembler::float16_to_float(FloatRegister dst, Register src, Register tmp) {
2365 auto stub = C2CodeStub::make<FloatRegister, Register, Register>(dst, src, tmp, 20, float16_to_float_slow_path);
2366
2367 // On riscv, NaN needs a special process as fcvt does not work in that case.
2368 // On riscv, Inf does not need a special process as fcvt can handle it correctly.
2369 // but we consider to get the slow path to process NaN and Inf at the same time,
2370 // as both of them are rare cases, and if we try to get the slow path to handle
2371 // only NaN case it would sacrifise the performance for normal cases,
2372 // i.e. non-NaN and non-Inf cases.
2373
2374 // check whether it's a NaN or +/- Inf.
2375 mv(t0, 0x7c00);
2376 andr(tmp, src, t0);
2377 // jump to stub processing NaN and Inf cases.
2378 beq(t0, tmp, stub->entry(), /* is_far */ true);
2379
2380 // non-NaN or non-Inf cases, just use built-in instructions.
2381 fmv_h_x(dst, src);
2382 fcvt_s_h(dst, dst);
2383
2384 bind(stub->continuation());
2385 }
2386
2387 static void float_to_float16_slow_path(C2_MacroAssembler& masm, C2GeneralStub<Register, FloatRegister, Register>& stub) {
2388 #define __ masm.
2389 Register dst = stub.data<0>();
2390 FloatRegister src = stub.data<1>();
2391 Register tmp = stub.data<2>();
2392 __ bind(stub.entry());
2393
2394 __ float_to_float16_NaN(dst, src, t0, tmp);
2395
2396 __ j(stub.continuation());
2397 #undef __
2398 }
2399
2400 // j.l.Float.floatToFloat16
2401 void C2_MacroAssembler::float_to_float16(Register dst, FloatRegister src, FloatRegister ftmp, Register xtmp) {
2402 auto stub = C2CodeStub::make<Register, FloatRegister, Register>(dst, src, xtmp, 64, float_to_float16_slow_path);
2403
2404 // On riscv, NaN needs a special process as fcvt does not work in that case.
2405
2406 // check whether it's a NaN.
2407 // replace fclass with feq as performance optimization.
2408 feq_s(t0, src, src);
2409 // jump to stub processing NaN cases.
2410 beqz(t0, stub->entry(), /* is_far */ true);
2411
2412 // non-NaN cases, just use built-in instructions.
2413 fcvt_h_s(ftmp, src);
2414 fmv_x_h(dst, ftmp);
2415
2416 bind(stub->continuation());
2417 }
2418
2419 static void float16_to_float_v_slow_path(C2_MacroAssembler& masm, C2GeneralStub<VectorRegister, VectorRegister, uint>& stub) {
2420 #define __ masm.
2421 VectorRegister dst = stub.data<0>();
2422 VectorRegister src = stub.data<1>();
2423 uint vector_length = stub.data<2>();
2424 __ bind(stub.entry());
2425
2426 // following instructions mainly focus on NaN, as riscv does not handle
2427 // NaN well with vfwcvt_f_f_v, but the code also works for Inf at the same time.
2428 //
2429 // construct NaN's in 32 bits from the NaN's in 16 bits,
2430 // we need the payloads of non-canonical NaNs to be preserved.
2451 // On riscv, NaN needs a special process as vfwcvt_f_f_v does not work in that case.
2452 // On riscv, Inf does not need a special process as vfwcvt_f_f_v can handle it correctly.
2453 // but we consider to get the slow path to process NaN and Inf at the same time,
2454 // as both of them are rare cases, and if we try to get the slow path to handle
2455 // only NaN case it would sacrifise the performance for normal cases,
2456 // i.e. non-NaN and non-Inf cases.
2457
2458 vsetvli_helper(BasicType::T_SHORT, vector_length, Assembler::mf2);
2459
2460 // check whether there is a NaN or +/- Inf.
2461 mv(t0, 0x7c00);
2462 vand_vx(v0, src, t0);
2463 // v0 will be used as mask in slow path.
2464 vmseq_vx(v0, v0, t0);
2465 vcpop_m(t0, v0);
2466
2467 // For non-NaN or non-Inf cases, just use built-in instructions.
2468 vfwcvt_f_f_v(dst, src);
2469
2470 // jump to stub processing NaN and Inf cases if there is any of them in the vector-wide.
2471 bnez(t0, stub->entry(), /* is_far */ true);
2472
2473 bind(stub->continuation());
2474 }
2475
2476 static void float_to_float16_v_slow_path(C2_MacroAssembler& masm,
2477 C2GeneralStub<VectorRegister, VectorRegister, VectorRegister>& stub) {
2478 #define __ masm.
2479 VectorRegister dst = stub.data<0>();
2480 VectorRegister src = stub.data<1>();
2481 VectorRegister vtmp = stub.data<2>();
2482 assert_different_registers(dst, src, vtmp);
2483
2484 __ bind(stub.entry());
2485
2486 // Active elements (NaNs) are marked in v0 mask register.
2487 // mul is already set to mf2 in float_to_float16_v.
2488
2489 // Float (32 bits)
2490 // Bit: 31 30 to 23 22 to 0
2491 // +---+------------------+-----------------------------+
2544 assert_different_registers(dst, src, vtmp);
2545
2546 auto stub = C2CodeStub::make<VectorRegister, VectorRegister, VectorRegister>
2547 (dst, src, vtmp, 56, float_to_float16_v_slow_path);
2548
2549 // On riscv, NaN needs a special process as vfncvt_f_f_w does not work in that case.
2550
2551 vsetvli_helper(BasicType::T_FLOAT, vector_length, Assembler::m1);
2552
2553 // check whether there is a NaN.
2554 // replace v_fclass with vmfne_vv as performance optimization.
2555 vmfne_vv(v0, src, src);
2556 vcpop_m(t0, v0);
2557
2558 vsetvli_helper(BasicType::T_SHORT, vector_length, Assembler::mf2, tmp);
2559
2560 // For non-NaN cases, just use built-in instructions.
2561 vfncvt_f_f_w(dst, src);
2562
2563 // jump to stub processing NaN cases.
2564 bnez(t0, stub->entry(), /* is_far */ true);
2565
2566 bind(stub->continuation());
2567 }
2568
2569 void C2_MacroAssembler::signum_fp_v(VectorRegister dst, VectorRegister one, BasicType bt, int vlen) {
2570 vsetvli_helper(bt, vlen);
2571
2572 // check if input is -0, +0, signaling NaN or quiet NaN
2573 vfclass_v(v0, dst);
2574 mv(t0, FClassBits::zero | FClassBits::nan);
2575 vand_vx(v0, v0, t0);
2576 vmseq_vi(v0, v0, 0);
2577
2578 // use floating-point 1.0 with a sign of input
2579 vfsgnj_vv(dst, one, dst, v0_t);
2580 }
2581
2582 // j.l.Math.round(float)
2583 // Returns the closest int to the argument, with ties rounding to positive infinity.
2584 // We need to handle 3 special cases defined by java api spec:
|