Sdiff src/hotspot/cpu/riscv/c2_MacroAssembler

src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp

  28 #include "opto/c2_MacroAssembler.hpp"
  29 #include "opto/compile.hpp"
  30 #include "opto/intrinsicnode.hpp"
  31 #include "opto/output.hpp"
  32 #include "opto/subnode.hpp"
  33 #include "runtime/objectMonitorTable.hpp"
  34 #include "runtime/stubRoutines.hpp"
  35 #include "runtime/synchronizer.hpp"
  36 #include "utilities/globalDefinitions.hpp"
  37 
  38 #ifdef PRODUCT
  39 #define BLOCK_COMMENT(str) /* nothing */
  40 #define STOP(error) stop(error)
  41 #else
  42 #define BLOCK_COMMENT(str) block_comment(str)
  43 #define STOP(error) block_comment(error); stop(error)
  44 #endif
  45 
  46 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  47 























  48 void C2_MacroAssembler::fast_lock(Register obj, Register box,
  49                                   Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
  50   // Flag register, zero for success; non-zero for failure.
  51   Register flag = t1;
  52 
  53   assert_different_registers(obj, box, tmp1, tmp2, tmp3, tmp4, flag, t0);
  54 
  55   mv(flag, 1);
  56 
  57   // Handle inflated monitor.
  58   Label inflated;
  59   // Finish fast lock successfully. MUST branch to with flag == 0
  60   Label locked;
  61   // Finish fast lock unsuccessfully. slow_path MUST branch to with flag != 0
  62   Label slow_path;
  63 
  64   if (UseObjectMonitorTable) {
  65     // Clear cache in case fast locking succeeds or we need to take the slow-path.
  66     sd(zr, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
  67   }

2335 
2336   __ j(stub.continuation());
2337 #undef __
2338 }
2339 
2340 // j.l.Float.float16ToFloat
2341 void C2_MacroAssembler::float16_to_float(FloatRegister dst, Register src, Register tmp) {
2342   auto stub = C2CodeStub::make<FloatRegister, Register, Register>(dst, src, tmp, 20, float16_to_float_slow_path);
2343 
2344   // On riscv, NaN needs a special process as fcvt does not work in that case.
2345   // On riscv, Inf does not need a special process as fcvt can handle it correctly.
2346   // but we consider to get the slow path to process NaN and Inf at the same time,
2347   // as both of them are rare cases, and if we try to get the slow path to handle
2348   // only NaN case it would sacrifise the performance for normal cases,
2349   // i.e. non-NaN and non-Inf cases.
2350 
2351   // check whether it's a NaN or +/- Inf.
2352   mv(t0, 0x7c00);
2353   andr(tmp, src, t0);
2354   // jump to stub processing NaN and Inf cases.
2355   beq(t0, tmp, stub->entry(), true);
2356 
2357   // non-NaN or non-Inf cases, just use built-in instructions.
2358   fmv_h_x(dst, src);
2359   fcvt_s_h(dst, dst);
2360 
2361   bind(stub->continuation());
2362 }
2363 
2364 static void float_to_float16_slow_path(C2_MacroAssembler& masm, C2GeneralStub<Register, FloatRegister, Register>& stub) {
2365 #define __ masm.
2366   Register dst = stub.data<0>();
2367   FloatRegister src = stub.data<1>();
2368   Register tmp = stub.data<2>();
2369   __ bind(stub.entry());
2370 
2371   __ float_to_float16_NaN(dst, src, t0, tmp);
2372 
2373   __ j(stub.continuation());
2374 #undef __
2375 }
2376 
2377 // j.l.Float.floatToFloat16
2378 void C2_MacroAssembler::float_to_float16(Register dst, FloatRegister src, FloatRegister ftmp, Register xtmp) {
2379   auto stub = C2CodeStub::make<Register, FloatRegister, Register>(dst, src, xtmp, 64, float_to_float16_slow_path);
2380 
2381   // On riscv, NaN needs a special process as fcvt does not work in that case.
2382 
2383   // check whether it's a NaN.
2384   // replace fclass with feq as performance optimization.
2385   feq_s(t0, src, src);
2386   // jump to stub processing NaN cases.
2387   beqz(t0, stub->entry(), true);
2388 
2389   // non-NaN cases, just use built-in instructions.
2390   fcvt_h_s(ftmp, src);
2391   fmv_x_h(dst, ftmp);
2392 
2393   bind(stub->continuation());
2394 }
2395 
2396 static void float16_to_float_v_slow_path(C2_MacroAssembler& masm, C2GeneralStub<VectorRegister, VectorRegister, uint>& stub) {
2397 #define __ masm.
2398   VectorRegister dst = stub.data<0>();
2399   VectorRegister src = stub.data<1>();
2400   uint vector_length = stub.data<2>();
2401   __ bind(stub.entry());
2402 
2403   // following instructions mainly focus on NaN, as riscv does not handle
2404   // NaN well with vfwcvt_f_f_v, but the code also works for Inf at the same time.
2405   //
2406   // construct NaN's in 32 bits from the NaN's in 16 bits,
2407   // we need the payloads of non-canonical NaNs to be preserved.

2428   // On riscv, NaN needs a special process as vfwcvt_f_f_v does not work in that case.
2429   // On riscv, Inf does not need a special process as vfwcvt_f_f_v can handle it correctly.
2430   // but we consider to get the slow path to process NaN and Inf at the same time,
2431   // as both of them are rare cases, and if we try to get the slow path to handle
2432   // only NaN case it would sacrifise the performance for normal cases,
2433   // i.e. non-NaN and non-Inf cases.
2434 
2435   vsetvli_helper(BasicType::T_SHORT, vector_length, Assembler::mf2);
2436 
2437   // check whether there is a NaN or +/- Inf.
2438   mv(t0, 0x7c00);
2439   vand_vx(v0, src, t0);
2440   // v0 will be used as mask in slow path.
2441   vmseq_vx(v0, v0, t0);
2442   vcpop_m(t0, v0);
2443 
2444   // For non-NaN or non-Inf cases, just use built-in instructions.
2445   vfwcvt_f_f_v(dst, src);
2446 
2447   // jump to stub processing NaN and Inf cases if there is any of them in the vector-wide.
2448   bnez(t0, stub->entry(), true);
2449 
2450   bind(stub->continuation());
2451 }
2452 
2453 static void float_to_float16_v_slow_path(C2_MacroAssembler& masm,
2454                                          C2GeneralStub<VectorRegister, VectorRegister, VectorRegister>& stub) {
2455 #define __ masm.
2456   VectorRegister dst = stub.data<0>();
2457   VectorRegister src = stub.data<1>();
2458   VectorRegister vtmp = stub.data<2>();
2459   assert_different_registers(dst, src, vtmp);
2460 
2461   __ bind(stub.entry());
2462 
2463   // Active elements (NaNs) are marked in v0 mask register.
2464   // mul is already set to mf2 in float_to_float16_v.
2465 
2466   //  Float (32 bits)
2467   //    Bit:     31        30 to 23          22 to 0
2468   //          +---+------------------+-----------------------------+

2521   assert_different_registers(dst, src, vtmp);
2522 
2523   auto stub = C2CodeStub::make<VectorRegister, VectorRegister, VectorRegister>
2524               (dst, src, vtmp, 56, float_to_float16_v_slow_path);
2525 
2526   // On riscv, NaN needs a special process as vfncvt_f_f_w does not work in that case.
2527 
2528   vsetvli_helper(BasicType::T_FLOAT, vector_length, Assembler::m1);
2529 
2530   // check whether there is a NaN.
2531   // replace v_fclass with vmfne_vv as performance optimization.
2532   vmfne_vv(v0, src, src);
2533   vcpop_m(t0, v0);
2534 
2535   vsetvli_helper(BasicType::T_SHORT, vector_length, Assembler::mf2, tmp);
2536 
2537   // For non-NaN cases, just use built-in instructions.
2538   vfncvt_f_f_w(dst, src);
2539 
2540   // jump to stub processing NaN cases.
2541   bnez(t0, stub->entry(), true);
2542 
2543   bind(stub->continuation());
2544 }
2545 
2546 void C2_MacroAssembler::signum_fp_v(VectorRegister dst, VectorRegister one, BasicType bt, int vlen) {
2547   vsetvli_helper(bt, vlen);
2548 
2549   // check if input is -0, +0, signaling NaN or quiet NaN
2550   vfclass_v(v0, dst);
2551   mv(t0, FClassBits::zero | FClassBits::nan);
2552   vand_vx(v0, v0, t0);
2553   vmseq_vi(v0, v0, 0);
2554 
2555   // use floating-point 1.0 with a sign of input
2556   vfsgnj_vv(dst, one, dst, v0_t);
2557 }
2558 
2559 // j.l.Math.round(float)
2560 //  Returns the closest int to the argument, with ties rounding to positive infinity.
2561 // We need to handle 3 special cases defined by java api spec:

  28 #include "opto/c2_MacroAssembler.hpp"
  29 #include "opto/compile.hpp"
  30 #include "opto/intrinsicnode.hpp"
  31 #include "opto/output.hpp"
  32 #include "opto/subnode.hpp"
  33 #include "runtime/objectMonitorTable.hpp"
  34 #include "runtime/stubRoutines.hpp"
  35 #include "runtime/synchronizer.hpp"
  36 #include "utilities/globalDefinitions.hpp"
  37 
  38 #ifdef PRODUCT
  39 #define BLOCK_COMMENT(str) /* nothing */
  40 #define STOP(error) stop(error)
  41 #else
  42 #define BLOCK_COMMENT(str) block_comment(str)
  43 #define STOP(error) block_comment(error); stop(error)
  44 #endif
  45 
  46 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  47 
  48 void C2_MacroAssembler::entry_barrier() {
  49   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
  50   // Dummy labels for just measuring the code size
  51   Label dummy_slow_path;
  52   Label dummy_continuation;
  53   Label dummy_guard;
  54   Label* slow_path = &dummy_slow_path;
  55   Label* continuation = &dummy_continuation;
  56   Label* guard = &dummy_guard;
  57 
  58   if (!Compile::current()->output()->in_scratch_emit_size()) {
  59     // Use real labels from actual stub when not emitting code for the purpose of measuring its size
  60     C2EntryBarrierStub* stub = new (Compile::current()->comp_arena()) C2EntryBarrierStub();
  61     Compile::current()->output()->add_stub(stub);
  62     slow_path = &stub->entry();
  63     continuation = &stub->continuation();
  64     guard = &stub->guard();
  65   }
  66 
  67   // In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
  68   bs->nmethod_entry_barrier(this, slow_path, continuation, guard);
  69 }
  70 
  71 void C2_MacroAssembler::fast_lock(Register obj, Register box,
  72                                   Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
  73   // Flag register, zero for success; non-zero for failure.
  74   Register flag = t1;
  75 
  76   assert_different_registers(obj, box, tmp1, tmp2, tmp3, tmp4, flag, t0);
  77 
  78   mv(flag, 1);
  79 
  80   // Handle inflated monitor.
  81   Label inflated;
  82   // Finish fast lock successfully. MUST branch to with flag == 0
  83   Label locked;
  84   // Finish fast lock unsuccessfully. slow_path MUST branch to with flag != 0
  85   Label slow_path;
  86 
  87   if (UseObjectMonitorTable) {
  88     // Clear cache in case fast locking succeeds or we need to take the slow-path.
  89     sd(zr, Address(box, BasicLock::object_monitor_cache_offset_in_bytes()));
  90   }

2358 
2359   __ j(stub.continuation());
2360 #undef __
2361 }
2362 
2363 // j.l.Float.float16ToFloat
2364 void C2_MacroAssembler::float16_to_float(FloatRegister dst, Register src, Register tmp) {
2365   auto stub = C2CodeStub::make<FloatRegister, Register, Register>(dst, src, tmp, 20, float16_to_float_slow_path);
2366 
2367   // On riscv, NaN needs a special process as fcvt does not work in that case.
2368   // On riscv, Inf does not need a special process as fcvt can handle it correctly.
2369   // but we consider to get the slow path to process NaN and Inf at the same time,
2370   // as both of them are rare cases, and if we try to get the slow path to handle
2371   // only NaN case it would sacrifise the performance for normal cases,
2372   // i.e. non-NaN and non-Inf cases.
2373 
2374   // check whether it's a NaN or +/- Inf.
2375   mv(t0, 0x7c00);
2376   andr(tmp, src, t0);
2377   // jump to stub processing NaN and Inf cases.
2378   beq(t0, tmp, stub->entry(), /* is_far */ true);
2379 
2380   // non-NaN or non-Inf cases, just use built-in instructions.
2381   fmv_h_x(dst, src);
2382   fcvt_s_h(dst, dst);
2383 
2384   bind(stub->continuation());
2385 }
2386 
2387 static void float_to_float16_slow_path(C2_MacroAssembler& masm, C2GeneralStub<Register, FloatRegister, Register>& stub) {
2388 #define __ masm.
2389   Register dst = stub.data<0>();
2390   FloatRegister src = stub.data<1>();
2391   Register tmp = stub.data<2>();
2392   __ bind(stub.entry());
2393 
2394   __ float_to_float16_NaN(dst, src, t0, tmp);
2395 
2396   __ j(stub.continuation());
2397 #undef __
2398 }
2399 
2400 // j.l.Float.floatToFloat16
2401 void C2_MacroAssembler::float_to_float16(Register dst, FloatRegister src, FloatRegister ftmp, Register xtmp) {
2402   auto stub = C2CodeStub::make<Register, FloatRegister, Register>(dst, src, xtmp, 64, float_to_float16_slow_path);
2403 
2404   // On riscv, NaN needs a special process as fcvt does not work in that case.
2405 
2406   // check whether it's a NaN.
2407   // replace fclass with feq as performance optimization.
2408   feq_s(t0, src, src);
2409   // jump to stub processing NaN cases.
2410   beqz(t0, stub->entry(), /* is_far */ true);
2411 
2412   // non-NaN cases, just use built-in instructions.
2413   fcvt_h_s(ftmp, src);
2414   fmv_x_h(dst, ftmp);
2415 
2416   bind(stub->continuation());
2417 }
2418 
2419 static void float16_to_float_v_slow_path(C2_MacroAssembler& masm, C2GeneralStub<VectorRegister, VectorRegister, uint>& stub) {
2420 #define __ masm.
2421   VectorRegister dst = stub.data<0>();
2422   VectorRegister src = stub.data<1>();
2423   uint vector_length = stub.data<2>();
2424   __ bind(stub.entry());
2425 
2426   // following instructions mainly focus on NaN, as riscv does not handle
2427   // NaN well with vfwcvt_f_f_v, but the code also works for Inf at the same time.
2428   //
2429   // construct NaN's in 32 bits from the NaN's in 16 bits,
2430   // we need the payloads of non-canonical NaNs to be preserved.

2451   // On riscv, NaN needs a special process as vfwcvt_f_f_v does not work in that case.
2452   // On riscv, Inf does not need a special process as vfwcvt_f_f_v can handle it correctly.
2453   // but we consider to get the slow path to process NaN and Inf at the same time,
2454   // as both of them are rare cases, and if we try to get the slow path to handle
2455   // only NaN case it would sacrifise the performance for normal cases,
2456   // i.e. non-NaN and non-Inf cases.
2457 
2458   vsetvli_helper(BasicType::T_SHORT, vector_length, Assembler::mf2);
2459 
2460   // check whether there is a NaN or +/- Inf.
2461   mv(t0, 0x7c00);
2462   vand_vx(v0, src, t0);
2463   // v0 will be used as mask in slow path.
2464   vmseq_vx(v0, v0, t0);
2465   vcpop_m(t0, v0);
2466 
2467   // For non-NaN or non-Inf cases, just use built-in instructions.
2468   vfwcvt_f_f_v(dst, src);
2469 
2470   // jump to stub processing NaN and Inf cases if there is any of them in the vector-wide.
2471   bnez(t0, stub->entry(), /* is_far */ true);
2472 
2473   bind(stub->continuation());
2474 }
2475 
2476 static void float_to_float16_v_slow_path(C2_MacroAssembler& masm,
2477                                          C2GeneralStub<VectorRegister, VectorRegister, VectorRegister>& stub) {
2478 #define __ masm.
2479   VectorRegister dst = stub.data<0>();
2480   VectorRegister src = stub.data<1>();
2481   VectorRegister vtmp = stub.data<2>();
2482   assert_different_registers(dst, src, vtmp);
2483 
2484   __ bind(stub.entry());
2485 
2486   // Active elements (NaNs) are marked in v0 mask register.
2487   // mul is already set to mf2 in float_to_float16_v.
2488 
2489   //  Float (32 bits)
2490   //    Bit:     31        30 to 23          22 to 0
2491   //          +---+------------------+-----------------------------+

2544   assert_different_registers(dst, src, vtmp);
2545 
2546   auto stub = C2CodeStub::make<VectorRegister, VectorRegister, VectorRegister>
2547               (dst, src, vtmp, 56, float_to_float16_v_slow_path);
2548 
2549   // On riscv, NaN needs a special process as vfncvt_f_f_w does not work in that case.
2550 
2551   vsetvli_helper(BasicType::T_FLOAT, vector_length, Assembler::m1);
2552 
2553   // check whether there is a NaN.
2554   // replace v_fclass with vmfne_vv as performance optimization.
2555   vmfne_vv(v0, src, src);
2556   vcpop_m(t0, v0);
2557 
2558   vsetvli_helper(BasicType::T_SHORT, vector_length, Assembler::mf2, tmp);
2559 
2560   // For non-NaN cases, just use built-in instructions.
2561   vfncvt_f_f_w(dst, src);
2562 
2563   // jump to stub processing NaN cases.
2564   bnez(t0, stub->entry(), /* is_far */ true);
2565 
2566   bind(stub->continuation());
2567 }
2568 
2569 void C2_MacroAssembler::signum_fp_v(VectorRegister dst, VectorRegister one, BasicType bt, int vlen) {
2570   vsetvli_helper(bt, vlen);
2571 
2572   // check if input is -0, +0, signaling NaN or quiet NaN
2573   vfclass_v(v0, dst);
2574   mv(t0, FClassBits::zero | FClassBits::nan);
2575   vand_vx(v0, v0, t0);
2576   vmseq_vi(v0, v0, 0);
2577 
2578   // use floating-point 1.0 with a sign of input
2579   vfsgnj_vv(dst, one, dst, v0_t);
2580 }
2581 
2582 // j.l.Math.round(float)
2583 //  Returns the closest int to the argument, with ties rounding to positive infinity.
2584 // We need to handle 3 special cases defined by java api spec:

< prev index next >