1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.nio.ch.iouring; 27 28 import jdk.internal.ffi.generated.iouring.*; 29 30 import java.io.IOException; 31 import java.lang.foreign.*; 32 import java.lang.invoke.MethodHandle; 33 import java.lang.invoke.VarHandle; 34 import java.nio.ByteBuffer; 35 import java.time.Duration; 36 37 import static java.lang.foreign.ValueLayout.JAVA_BYTE; 38 import static sun.nio.ch.iouring.Util.strerror; 39 import static sun.nio.ch.iouring.Util.locateHandleFromLib; 40 import static sun.nio.ch.iouring.Util.locateStdHandle; 41 import static sun.nio.ch.iouring.Util.INT_POINTER; 42 import static jdk.internal.ffi.generated.iouring.iouring_h.*; 43 import static jdk.internal.ffi.generated.iouring.iouring_h_1.IORING_REGISTER_EVENTFD; 44 import static jdk.internal.ffi.generated.iouring.iouring_h_1.IORING_UNREGISTER_EVENTFD; 45 46 /** 47 * Low level interface to a Linux io_uring. It provides an asynchronous 48 * interface. Requests are submitted through the {@link #submit(Sqe)} method. 49 * Completion events can be awaited by calling {@link #enter(int, int, int)}. 50 * Completions represented by {@link Cqe} are then obtained by calling 51 * {@link #pollCompletion()}. Completions are linked to submissions by the 52 * {@link Cqe#user_data()} field of the {@code Cqe} which contains the 53 * same 64-bit (long) value that was supplied in the submitted {@link Sqe}. 54 * <p> 55 * Some IOUringImpl operations work with kernel registered direct ByteBuffers. 56 * When creating an IOUringImpl instance, a number of these buffers can be 57 * created in a pool. Registered buffers are not used with regular 58 * IOUringImpl read/write operations. 59 */ 60 @SuppressWarnings("restricted") 61 public class IOUringImpl { 62 private static final Arena arena = Arena.ofAuto(); 63 64 private static final boolean TRACE = System 65 .getProperty("sun.nio.ch.iouring.trace", "false") 66 .equalsIgnoreCase("true"); 67 private final SubmissionQueue sq; 68 private final CompletionQueue cq; 69 private final int fd; // The ringfd 70 private int epollfd = -1; // The epoll(7) if set 71 private static final int INT_SIZE = (int)ValueLayout.JAVA_INT.byteSize(); 72 73 private final Arena autoArena = Arena.ofAuto(); 74 75 private final KMappedBuffers mappedBuffers; 76 77 /** 78 * Creates an IOURing and initializes the ring structures. {@code entries} 79 * (or the next higher power of 2) is the size of the Submission Queue. 80 * Currently, the completion queue returned will be double the size 81 * of the Submission queue. 82 */ 83 public IOUringImpl(int entries) throws IOException { 84 this(entries, 0, 0, -1); 85 } 86 87 /** 88 * Creates an IOURing and initializes the ring structures. 89 * @param sq_entries the number of submission queue entries to allocate 90 * @param cq_entries the number of completion queue entries to allocate 91 * @throws IOException if an IOException occurs 92 */ 93 public IOUringImpl(int sq_entries, int cq_entries) throws IOException { 94 this(sq_entries, cq_entries, 0, -1); 95 } 96 97 /** 98 * Creates an IOURing initializes the ring structures and allocates a 99 * number of direct {@link ByteBuffer}s which are additionally mapped 100 * into the kernel address space. 101 * 102 * @param sq_entries the number of submission queue entries to allocate 103 * @param cq_entries the number of completion queue entries to allocate 104 * @param nmappedBuffers number of mapped direct ByteBuffers to create 105 * @param mappedBufsize size of each buffer in bytes 106 * @throws IOException if an IOException occurs 107 */ 108 public IOUringImpl(int sq_entries, 109 int cq_entries, 110 int nmappedBuffers, 111 int mappedBufsize) throws IOException { 112 MemorySegment params_seg = getSegmentFor(io_uring_params.$LAYOUT()); 113 if (cq_entries > 0) { 114 io_uring_params.cq_entries(params_seg, cq_entries); 115 int flags = io_uring_params.flags(params_seg) | IORING_SETUP_CQSIZE(); 116 io_uring_params.flags(params_seg, flags); 117 } 118 119 // call setup 120 fd = io_uring_setup(sq_entries, params_seg); 121 if (fd < 0) { 122 throw new IOException(errorString(fd)); 123 } 124 125 mappedBuffers = new KMappedBuffers(nmappedBuffers, mappedBufsize); 126 if (nmappedBuffers > 0) { 127 mappedBuffers.register(fd); 128 } 129 // Offsets segments 130 MemorySegment cq_off_seg = io_uring_params.cq_off(params_seg); 131 MemorySegment sq_off_seg = io_uring_params.sq_off(params_seg); 132 133 // Offsets to cqe array and the sqe index array 134 int cq_off_cqes = io_cqring_offsets.cqes(cq_off_seg); 135 int sq_off_array = io_sqring_offsets.array(sq_off_seg); 136 137 // Acual number of entries in each Q 138 sq_entries = io_uring_params.sq_entries(params_seg); 139 cq_entries = io_uring_params.cq_entries(params_seg); 140 141 int sq_size = sq_off_array + sq_entries * INT_SIZE; 142 int cq_size = cq_off_cqes + cq_entries * (int)io_uring_cqe.sizeof(); 143 144 boolean singleMmap = (io_uring_params.features(params_seg) 145 & IORING_FEAT_SINGLE_MMAP()) != 0; 146 147 if (singleMmap) { 148 if (cq_size > sq_size) 149 sq_size = cq_size; 150 cq_size = sq_size; 151 } 152 var sqe_seg = mmap(sq_size, fd, IORING_OFF_SQ_RING()); 153 154 MemorySegment cqes_seg; 155 if (singleMmap) { 156 cqes_seg = sqe_seg; 157 } else { 158 cqes_seg = mmap(cq_size, fd, IORING_OFF_CQ_RING()); 159 } 160 161 // Masks 162 int sq_mask = sqe_seg.get(ValueLayout.JAVA_INT, 163 io_sqring_offsets.ring_mask(sq_off_seg)); 164 int cq_mask = cqes_seg.get(ValueLayout.JAVA_INT, 165 io_cqring_offsets.ring_mask(cq_off_seg)); 166 167 var sqes = mmap(sq_entries * io_uring_sqe.sizeof(), 168 fd, IORING_OFF_SQES()); 169 170 cq = new CompletionQueue(cqes_seg.asSlice(cq_off_cqes), 171 cqes_seg.asSlice(io_cqring_offsets.head(cq_off_seg)), 172 cqes_seg.asSlice(io_cqring_offsets.tail(cq_off_seg)), 173 cq_mask); 174 175 sq = new SubmissionQueue(sqe_seg.asSlice(sq_off_array), 176 cqes_seg.asSlice(io_cqring_offsets.head(sq_off_seg)), 177 cqes_seg.asSlice(io_cqring_offsets.tail(sq_off_seg)), 178 sq_mask, 179 sqes); 180 if (TRACE) 181 System.out.printf("IOUringImpl: ringfd: %d\n", fd); 182 } 183 184 185 public void close() throws IOException { 186 int ret; 187 SystemCallContext ctx = SystemCallContext.get(); 188 try { 189 ret = (int)close_fn.invokeExact(ctx.errnoCaptureSegment(), 190 ringFd()); 191 } catch (Throwable e) { 192 throw new RuntimeException(e); 193 } 194 ctx.throwIOExceptionOnError(ret); 195 196 } 197 198 public int eventfd() throws IOException { 199 int ret; 200 SystemCallContext ctx = SystemCallContext.get(); 201 try { 202 ret = (int)eventfd_fn.invokeExact(ctx.errnoCaptureSegment(), 203 0, 0); 204 } catch (Throwable e) { 205 throw new RuntimeException(e); 206 } 207 ctx.throwIOExceptionOnError(ret); 208 return ret; 209 } 210 211 private int initEpoll() throws IOException { 212 int ret; 213 SystemCallContext ctx = SystemCallContext.get(); 214 try { 215 ret = (int)epoll_create_fn.invokeExact(ctx.errnoCaptureSegment(), 216 ringFd(), 1); 217 } catch (Throwable e) { 218 throw new RuntimeException(e); 219 } 220 ctx.throwIOExceptionOnError(ret); 221 return ret; 222 } 223 224 public void register_eventfd(int efd) throws IOException { 225 int ret; 226 SystemCallContext ctx = SystemCallContext.get(); 227 MemorySegment fdseg = 228 arena.allocateFrom(ValueLayout.JAVA_INT, efd); 229 230 try { 231 ret = (int)evregister_fn 232 .invokeExact( 233 ctx.errnoCaptureSegment(), 234 NR_io_uring_register, 235 fd, IORING_REGISTER_EVENTFD(), 236 fdseg, 1 237 ); 238 } catch (Throwable e) { 239 throw new RuntimeException(e); 240 } 241 ctx.throwIOExceptionOnError(ret); 242 } 243 244 public void unregister_eventfd() throws IOException { 245 int ret; 246 SystemCallContext ctx = SystemCallContext.get(); 247 248 try { 249 ret = (int)evregister_fn 250 .invokeExact( 251 ctx.errnoCaptureSegment(), 252 NR_io_uring_register, 253 fd, IORING_UNREGISTER_EVENTFD(), 254 MemorySegment.NULL, 0 255 ); 256 } catch (Throwable e) { 257 throw new RuntimeException(e); 258 } 259 ctx.throwIOExceptionOnError(ret); 260 261 } 262 263 /** 264 * Asynchronously submits an Sqe to this IOUringImpl. Can be called 265 * multiple times before enter(). 266 * 267 * @param sqe 268 * @throws IOException if submission q full 269 */ 270 public void submit(Sqe sqe) throws IOException { 271 sq.submit(sqe); 272 if (TRACE) 273 System.out.printf("submit: %s \n", sqe); 274 } 275 276 /** 277 * Notifies the kernel of entries on the Submission Q and waits for a 278 * number of responses (completion events). If this returns normally 279 * with value {@code n > 0}, this means that n requests have been accepted 280 * by the kernel. A normal return also means that the requested number of 281 * completion events have been received {@link #pollCompletion()} can be 282 * called {@code nreceive} times to obtain the results. 283 * 284 * @param nsubmit number of requests to submit 285 * @param nreceive block until this number of events received 286 * @param flags flags to pass to io_uring_enter 287 * 288 * @return if return value less than 0 means an error occurred. Otherwise, 289 * the number of Sqes successfully submitted. 290 */ 291 public int enter(int nsubmit, int nreceive, int flags) throws IOException { 292 if (nreceive > 0) { 293 flags |= IORING_ENTER_GETEVENTS(); 294 } 295 return io_uring_enter(this.fd, nsubmit, nreceive, flags); 296 } 297 298 /** 299 * Returns the allocated size of the Submission Q. If the requested size 300 * was not a power of 2, then the allocated size will be the next highest 301 * power of 2. 302 * 303 * @return 304 */ 305 public int sqsize() { 306 return sq.ringSize; 307 } 308 309 /** 310 * Returns the number of free entries in the Submission Q 311 */ 312 public int sqfree() { 313 return sq.nUsed(); 314 } 315 316 /** 317 * Returns whether the completion Q is empty or not. 318 * 319 * @return 320 */ 321 public boolean cqempty() { 322 return cq.nEntries() == 0; 323 } 324 325 /** 326 * Returns the allocated size of the Completion Q. 327 * Currently, double the size of the Submission Q 328 * 329 * @return 330 */ 331 public int cqsize() { 332 return cq.ringSize; 333 } 334 335 public int epoll_fd() { 336 return epollfd; 337 } 338 339 /** 340 * Polls the Completion Queue for results. 341 * 342 * @return a Cqe if available or {@code null} 343 */ 344 public Cqe pollCompletion() { 345 Cqe cqe = cq.pollHead(); 346 if (TRACE) 347 System.out.printf("pollCompletion: -> %s\n", cqe); 348 return cqe; 349 } 350 351 /** 352 * Returns a String description of the given errno value 353 * 354 * @param errno 355 * @return 356 */ 357 public static String strerror(int errno) { 358 return Util.strerror(errno); 359 } 360 361 private static int io_uring_setup(int entries, MemorySegment params) 362 throws IOException { 363 try { 364 return (int) setup_fn.invokeExact(NR_io_uring_setup, 365 entries, params); 366 } catch (Throwable t) { 367 throw ioexception(t); 368 } 369 } 370 371 private static int io_uring_enter(int fd, int to_submit, int min_complete, 372 int flags) throws IOException { 373 try { 374 return (int) enter_fn.invokeExact(NR_io_uring_enter, 375 fd, to_submit, min_complete, flags, MemorySegment.NULL); 376 } catch (Throwable t) { 377 throw ioexception(t); 378 } 379 } 380 381 static IOException ioexception(Throwable t) { 382 if (t instanceof IOException ioe) { 383 return ioe; 384 } else { 385 return new IOException(t); 386 } 387 } 388 389 int checkAndGetIndexFor(ByteBuffer buffer) { 390 return mappedBuffers.checkAndGetIndexForBuffer(buffer); 391 } 392 393 /** 394 * Returns a mapped direct ByteBuffer or {@code null} if none available. 395 * Mapped buffers must be used with some IOUringImpl operations such as 396 * {@code IORING_OP_WRITE_FIXED} and {@code IORING_OP_READ_FIXED}. 397 * Buffers must be returned after use with 398 * {@link #returnRegisteredBuffer(ByteBuffer)}. 399 * 400 * @return 401 */ 402 public ByteBuffer getRegisteredBuffer() { 403 return mappedBuffers.getRegisteredBuffer(); 404 } 405 406 /** 407 * Returns a previously allocated registered buffer. 408 * 409 * @param buffer 410 */ 411 public void returnRegisteredBuffer(ByteBuffer buffer) { 412 mappedBuffers.returnRegisteredBuffer(buffer); 413 } 414 415 /** 416 * Common capabilities of SubmissionQueue and CompletionQueue 417 */ 418 sealed class QueueImplBase permits SubmissionQueue, CompletionQueue { 419 protected final MemorySegment ringSeg; 420 private final MemorySegment head, tail; 421 protected final int ringMask; 422 protected final MemoryLayout ringLayout; 423 protected final int ringLayoutSize; 424 protected final int ringLayoutAlignment; 425 protected final int ringSize; 426 427 // For accessing head and tail as volatile 428 protected final VarHandle addrH; 429 430 /** 431 * 432 * @param ringSeg The mapped segment 433 * @param head The head pointer 434 * @param tail The tail pointer 435 * @param ringMask 436 * @param ringLayout 437 */ 438 QueueImplBase(MemorySegment ringSeg, MemorySegment head, 439 MemorySegment tail, int ringMask, 440 MemoryLayout ringLayout) { 441 this.ringSeg = ringSeg; 442 this.head = head; 443 this.tail = tail; 444 this.ringMask = ringMask; 445 this.ringSize = ringMask + 1; 446 this.ringLayout = ringLayout; 447 this.ringLayoutSize = (int)ringLayout.byteSize(); 448 this.ringLayoutAlignment = (int)ringLayout.byteAlignment(); 449 this.addrH = ValueLayout.JAVA_INT.varHandle(); 450 } 451 452 int nEntries() { 453 int n = Math.abs(getTail(false) - getHead(false)); 454 return n; 455 } 456 457 boolean ringFull() { 458 return nEntries() == ringSize; 459 } 460 461 int nUsed() { 462 return ringSize - nEntries(); 463 } 464 protected int getHead(boolean withAcquire) { 465 int val = (int)(withAcquire 466 ? addrH.getAcquire(head, 0) : addrH.get(head, 0)); 467 return val; 468 } 469 470 protected int getTail(boolean withAcquire) { 471 int val = (int)(withAcquire 472 ? addrH.getAcquire(tail, 0L) : addrH.get(tail, 0L)); 473 return val; 474 } 475 476 // Used by CompletionQueue 477 protected void setHead(int val) { 478 addrH.setRelease(head, 0L, val); 479 } 480 481 // Used by SubmissionQueue 482 protected void setTail(int val) { 483 addrH.setRelease(tail, 0L, val); 484 } 485 } 486 487 final class SubmissionQueue extends QueueImplBase { 488 final MemorySegment sqes; 489 final int n_sqes; 490 static final int sqe_layout_size = 491 (int)io_uring_sqe.$LAYOUT().byteSize(); 492 493 static final int sqe_alignment = 494 (int)io_uring_sqe.$LAYOUT().byteAlignment(); 495 496 SubmissionQueue(MemorySegment ringSeg, MemorySegment head, 497 MemorySegment tail, int mask, MemorySegment sqes) { 498 super(ringSeg, head, tail, mask, ValueLayout.JAVA_INT); 499 this.sqes = sqes; 500 this.n_sqes = (int) (sqes.byteSize() / sqe_layout_size); 501 } 502 503 /** 504 * Submits an Sqe to Submission Q. 505 * @param sqe 506 * @throws IOException if Q full 507 */ 508 public void submit(Sqe sqe) throws IOException { 509 if (ringFull()) { 510 throw new IOException("Submission Queue full"); 511 } 512 513 int tailVal = getTail(false); 514 int tailIndex = tailVal & ringMask; 515 516 MemorySegment slot = sqes.asSlice( 517 (long) tailIndex * sqe_layout_size, 518 sqe_layout_size, sqe_alignment).fill((byte)0); 519 if (slot == null) 520 throw new IOException("Q full"); // shouldn't happen 521 // Populate the slot as an io_uring_sqe 522 // Note. Sqe has already validated that overlapping fields not set 523 io_uring_sqe.user_data(slot, sqe.user_data()); 524 io_uring_sqe.fd(slot, sqe.fd()); 525 io_uring_sqe.opcode(slot, (byte)sqe.opcode()); 526 // This statement handles the large flags union 527 // For simplicity all __u32 variants are handled 528 // as xxx_flags. poll_events (__u16) are special 529 sqe.xxx_flags().ifPresentOrElse( 530 u32 -> io_uring_sqe.open_flags(slot, u32), 531 // xxx_flags not present, poll_events may be 532 () -> sqe.poll_events().ifPresent( 533 u16 -> io_uring_sqe.poll_events(slot, (short)u16))); 534 535 io_uring_sqe.flags(slot, (byte)sqe.flags()); 536 io_uring_sqe.addr(slot, sqe.addr() 537 .orElse(MemorySegment.NULL).address()); 538 io_uring_sqe.addr2(slot, sqe.addr2() 539 .orElse(MemorySegment.NULL).address()); 540 io_uring_sqe.buf_index(slot, (short)sqe.buf_index().orElse(0)); 541 io_uring_sqe.off(slot, sqe.off().orElse(0L)); 542 io_uring_sqe.len(slot, sqe.len().orElse(0)); 543 // Populate the tail slot 544 ringSeg.setAtIndex(ValueLayout.JAVA_INT, tailIndex, tailIndex); 545 //Util.print(slot, "SQE"); 546 setTail(++tailVal); 547 } 548 } 549 550 final class CompletionQueue extends QueueImplBase { 551 CompletionQueue(MemorySegment ringSeg, MemorySegment head, 552 MemorySegment tail, int mask) { 553 super(ringSeg, head, tail, mask, io_uring_cqe.$LAYOUT()); 554 } 555 556 public Cqe pollHead() { 557 int headVal = getHead(false); 558 Cqe res = null; 559 if (headVal != getTail(true)) { 560 int index = headVal & ringMask; 561 int offset = index * ringLayoutSize; 562 MemorySegment seg = ringSeg.asSlice(offset, 563 ringLayoutSize, ringLayoutAlignment); 564 res = new Cqe( 565 io_uring_cqe.user_data(seg), 566 io_uring_cqe.res(seg), 567 io_uring_cqe.flags(seg)); 568 headVal++; 569 } 570 setHead(headVal); 571 return res; 572 } 573 }; 574 575 /** 576 * Adds the given fd to this ring's epoll(7) instance 577 * and creates the epoll instance if it hasn't already been created 578 * 579 * If using the EPOLLONESHOT mode (in flags) the opaque field 580 * can be used to return the "id" of the specific operation that was 581 * kicked off. 582 * 583 * @param fd target fd to manage 584 * @param poll_events bit mask of events to activate 585 * @param opaque a 64 bit value to return with event notifications. 586 * A value of -1L is ignored. 587 * @throws IOException 588 * @throws InterruptedException 589 */ 590 public void epoll_add(int fd, int poll_events, long opaque) 591 throws IOException, InterruptedException { 592 epoll_op(fd, poll_events, opaque, EPOLL_CTL_ADD()); 593 } 594 595 public void epoll_del(int fd, int poll_events) 596 throws IOException, InterruptedException { 597 epoll_op(fd, poll_events, -1L, EPOLL_CTL_DEL()); 598 } 599 600 public void epoll_mod(int fd, int poll_events, long opaque) 601 throws IOException, InterruptedException { 602 epoll_op(fd, poll_events, opaque, EPOLL_CTL_DEL()); 603 } 604 605 private void epoll_op(int fd, int poll_events, long opaque, int op) 606 throws IOException, InterruptedException { 607 if (this.epollfd == -1) { 608 this.epollfd = initEpoll(); 609 } 610 611 MemorySegment targetfd = 612 arena.allocateFrom(ValueLayout.OfInt.JAVA_INT, fd); 613 614 Sqe request = new Sqe() 615 .opcode(IORING_OP_EPOLL_CTL()) 616 .fd(epollfd) 617 .addr(targetfd) 618 .xxx_flags(poll_events) 619 .len(op); 620 621 if (opaque != -1L) { 622 MemorySegment event = arena.allocate(epoll_event.$LAYOUT()); 623 epoll_event.events(event, poll_events); 624 var dataSlice = epoll_event.data(event); 625 epoll_data_t.u64(dataSlice, opaque); 626 request = request.off(event.address()); 627 } 628 submit(request); 629 } 630 631 static MemorySegment getSegmentFor(MemoryLayout layout) { 632 return arena.allocate(layout.byteSize(), layout.byteAlignment()) 633 .fill((byte)0); 634 } 635 636 static String errorString(int errno) { 637 errno = -errno; 638 return "Error: " + strerror(errno); 639 } 640 641 // This is obsolete. There is a better way of doing a timed 642 // poll by providing a timeval to io_uring_enter 643 public Sqe getTimeoutSqe(Duration maxwait, int opcode, int completionCount) { 644 MemorySegment seg = 645 arena.allocate(__kernel_timespec.$LAYOUT()).fill((byte)(0)); 646 647 __kernel_timespec.tv_sec(seg, maxwait.getSeconds()); 648 __kernel_timespec.tv_nsec(seg, maxwait.getNano()); 649 return new Sqe() 650 .opcode(opcode) 651 .addr(seg) 652 .xxx_flags(0) // timeout_flags 653 .off(completionCount) 654 .len(1); 655 } 656 657 private final static ValueLayout POINTER = 658 ValueLayout.ADDRESS.withTargetLayout( 659 MemoryLayout.sequenceLayout(Long.MAX_VALUE, JAVA_BYTE) 660 ); 661 662 private static final MethodHandle mmap_fn = locateStdHandle( 663 "mmap", FunctionDescriptor.of( 664 POINTER, 665 //ValueLayout.JAVA_LONG, // returned address 666 ValueLayout.JAVA_LONG, // input address, usually zero 667 ValueLayout.JAVA_LONG, // size_t 668 ValueLayout.JAVA_INT, // int prot (PROT_READ | PROT_WRITE) 669 ValueLayout.JAVA_INT, // int flags (MAP_SHARED|MAP_POPULATE) 670 ValueLayout.JAVA_INT, // int fd 671 ValueLayout.JAVA_LONG // off_t (64bit?) 672 ) 673 ); 674 675 private static final MethodHandle epoll_create_fn = locateStdHandle( 676 "epoll_create", FunctionDescriptor.of( 677 ValueLayout.JAVA_INT, // returned fd 678 ValueLayout.JAVA_INT // int size (ignored) 679 ), SystemCallContext.errnoLinkerOption() 680 ); 681 682 private static final MethodHandle close_fn = locateStdHandle( 683 "close", 684 FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.JAVA_INT), 685 SystemCallContext.errnoLinkerOption() 686 ); 687 688 private static final MethodHandle eventfd_fn = locateStdHandle( 689 "eventfd", 690 FunctionDescriptor.of( 691 ValueLayout.JAVA_INT, 692 ValueLayout.JAVA_INT, 693 ValueLayout.JAVA_INT), 694 SystemCallContext.errnoLinkerOption() 695 ); 696 697 // Linux syscall numbers. Allows to invoke the system call 698 // directly in systems where there are no wrappers 699 // for these functions in libc or liburing. 700 // Also means we no longer use liburing 701 702 private static final int NR_io_uring_setup = 425; 703 private static final int NR_io_uring_enter = 426; 704 private static final int NR_io_uring_register = 427; 705 706 private static final MethodHandle setup_fn = locateStdHandle( 707 "syscall", FunctionDescriptor.of( 708 ValueLayout.JAVA_INT, 709 ValueLayout.JAVA_INT, 710 ValueLayout.JAVA_INT, 711 ValueLayout.ADDRESS) 712 ); 713 714 private static final MethodHandle enter_fn = locateStdHandle( 715 "syscall", FunctionDescriptor.of(ValueLayout.JAVA_INT, 716 ValueLayout.JAVA_INT, 717 ValueLayout.JAVA_INT, 718 ValueLayout.JAVA_INT, 719 ValueLayout.JAVA_INT, 720 ValueLayout.JAVA_INT, 721 ValueLayout.ADDRESS) // sigset_t UNUSED for now 722 ); 723 724 // io_uring_register specifically for 725 // IORING_REGISTER_EVENTFD and IORING_UNREGISTER_EVENTFD 726 private static final MethodHandle evregister_fn = locateStdHandle( 727 "syscall", 728 FunctionDescriptor.of(ValueLayout.JAVA_INT, // result 729 ValueLayout.JAVA_INT, // syscall 730 ValueLayout.JAVA_INT, // ring fd 731 ValueLayout.JAVA_INT, // opcode 732 INT_POINTER, // pointer to fd 733 ValueLayout.JAVA_INT),// integer value 1 734 SystemCallContext.errnoLinkerOption() 735 ); 736 737 // mmap constants used internally 738 private static final int PROT_READ = 1; 739 private static final int PROT_WRITE = 2; 740 private static final int MAP_SHARED = 1; 741 private static final int MAP_POPULATE = 0x8000; 742 743 /** 744 * offset (when mapping IOURING segments) must be one of: 745 * jdk.internal.ffi.generated.iouring.iouring_h.IORING_OFF_SQ_RING() 746 * jdk.internal.ffi.generated.iouring.iouring_h.IORING_OFF_CQ_RING() 747 * jdk.internal.ffi.generated.iouring.iouring_h.IORING_OFF_SQES() 748 * 749 * @param size 750 * @param fd 751 * @param offset 752 * @return 753 */ 754 private static MemorySegment mmap(long size, int fd, long offset) { 755 MemorySegment seg = null; 756 try { 757 seg = (MemorySegment)mmap_fn 758 .invokeExact(0L, size, 759 PROT_READ | PROT_WRITE, 760 MAP_SHARED | MAP_POPULATE, 761 fd, 762 offset 763 ); 764 } catch (Throwable e) { 765 throw new RuntimeException(e); 766 } 767 long addr = seg.address(); 768 return seg.reinterpret(size); 769 } 770 771 int ringFd() { 772 return fd; 773 } 774 }