1 /* 2 * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.nio.ch.iouring; 27 28 import jdk.internal.ffi.generated.iouring.*; 29 30 import java.io.IOException; 31 import java.lang.foreign.*; 32 import java.lang.invoke.MethodHandle; 33 import java.lang.invoke.VarHandle; 34 import java.nio.ByteBuffer; 35 import java.time.Duration; 36 37 import static java.lang.foreign.ValueLayout.JAVA_BYTE; 38 import static sun.nio.ch.iouring.Util.strerror; 39 import static sun.nio.ch.iouring.Util.locateHandleFromLib; 40 import static sun.nio.ch.iouring.Util.locateStdHandle; 41 import static sun.nio.ch.iouring.Util.INT_POINTER; 42 import static jdk.internal.ffi.generated.iouring.iouring_h.*; 43 import static jdk.internal.ffi.generated.iouring.iouring_h_1.IORING_REGISTER_EVENTFD; 44 import static jdk.internal.ffi.generated.iouring.iouring_h_1.IORING_UNREGISTER_EVENTFD; 45 46 /** 47 * Low level interface to a Linux io_uring. It provides an asynchronous 48 * interface. Requests are submitted through the {@link #submit(Sqe)} method. 49 * Completion events can be awaited by calling {@link #enter(int, int, int)}. 50 * Completions represented by {@link Cqe} are then obtained by calling 51 * {@link #pollCompletion()}. Completions are linked to submissions by the 52 * {@link Cqe#user_data()} field of the {@code Cqe} which contains the 53 * same 64-bit (long) value that was supplied in the submitted {@link Sqe}. 54 * <p> 55 * Some IOUringImpl operations work with kernel registered direct ByteBuffers. 56 * When creating an IOUringImpl instance, a number of these buffers can be 57 * created in a pool. Registered buffers are not used with regular 58 * IOUringImpl read/write operations. 59 */ 60 @SuppressWarnings("restricted") 61 public class IOUringImpl { 62 private static final Arena arena = Arena.ofAuto(); 63 64 private static final boolean TRACE = System 65 .getProperty("sun.nio.ch.iouring.trace", "false") 66 .equalsIgnoreCase("true"); 67 private final SubmissionQueue sq; 68 private final CompletionQueue cq; 69 private final int fd; // The ringfd 70 private int epollfd = -1; // The epoll(7) if set 71 private static final int INT_SIZE = (int)ValueLayout.JAVA_INT.byteSize(); 72 73 private final Arena autoArena = Arena.ofAuto(); 74 75 private final KMappedBuffers mappedBuffers; 76 77 /** 78 * Creates an IOURing and initializes the ring structures. {@code entries} 79 * (or the next higher power of 2) is the size of the Submission Queue. 80 * Currently, the completion queue returned will be double the size 81 * of the Submission queue. 82 */ 83 public IOUringImpl(int entries) throws IOException { 84 this(entries, 0, 0); 85 } 86 87 /** 88 * Creates an IOURing and initializes the ring structures. 89 * @param sq_entries the number of submission queue entries to allocate 90 * @param cq_entries the number of completion queue entries to allocate 91 * @param flags io_uring_params flags 92 * @throws IOException if an IOException occurs 93 */ 94 public IOUringImpl(int sq_entries, int cq_entries, int flags) throws IOException { 95 this(sq_entries, cq_entries, 0, 0, -1); 96 } 97 98 /** 99 * Creates an IOURing initializes the ring structures and allocates a 100 * number of direct {@link ByteBuffer}s which are additionally mapped 101 * into the kernel address space. 102 * 103 * @param sq_entries the number of submission queue entries to allocate 104 * @param cq_entries the number of completion queue entries to allocate 105 * @param flags io_uring_params flags 106 * @param nmappedBuffers number of mapped direct ByteBuffers to create 107 * @param mappedBufsize size of each buffer in bytes 108 * @throws IOException if an IOException occurs 109 */ 110 public IOUringImpl(int sq_entries, 111 int cq_entries, 112 int flags, 113 int nmappedBuffers, 114 int mappedBufsize) throws IOException { 115 MemorySegment params_seg = getSegmentFor(io_uring_params.$LAYOUT()); 116 117 if (cq_entries > 0) { 118 io_uring_params.cq_entries(params_seg, cq_entries); 119 flags |= IORING_SETUP_CQSIZE(); 120 } 121 122 if (flags != 0) { 123 io_uring_params.flags(params_seg, flags); 124 } 125 126 // call setup 127 fd = io_uring_setup(sq_entries, params_seg); 128 if (fd < 0) { 129 throw new IOException(errorString(fd)); 130 } 131 132 mappedBuffers = new KMappedBuffers(nmappedBuffers, mappedBufsize); 133 if (nmappedBuffers > 0) { 134 mappedBuffers.register(fd); 135 } 136 // Offsets segments 137 MemorySegment cq_off_seg = io_uring_params.cq_off(params_seg); 138 MemorySegment sq_off_seg = io_uring_params.sq_off(params_seg); 139 140 // Offsets to cqe array and the sqe index array 141 int cq_off_cqes = io_cqring_offsets.cqes(cq_off_seg); 142 int sq_off_array = io_sqring_offsets.array(sq_off_seg); 143 144 // Acual number of entries in each Q 145 sq_entries = io_uring_params.sq_entries(params_seg); 146 cq_entries = io_uring_params.cq_entries(params_seg); 147 148 int sq_size = sq_off_array + sq_entries * INT_SIZE; 149 int cq_size = cq_off_cqes + cq_entries * (int)io_uring_cqe.sizeof(); 150 151 boolean singleMmap = (io_uring_params.features(params_seg) 152 & IORING_FEAT_SINGLE_MMAP()) != 0; 153 154 if (singleMmap) { 155 if (cq_size > sq_size) 156 sq_size = cq_size; 157 cq_size = sq_size; 158 } 159 var sqe_seg = mmap(sq_size, fd, IORING_OFF_SQ_RING()); 160 161 MemorySegment cqes_seg; 162 if (singleMmap) { 163 cqes_seg = sqe_seg; 164 } else { 165 cqes_seg = mmap(cq_size, fd, IORING_OFF_CQ_RING()); 166 } 167 168 // Masks 169 int sq_mask = sqe_seg.get(ValueLayout.JAVA_INT, 170 io_sqring_offsets.ring_mask(sq_off_seg)); 171 int cq_mask = cqes_seg.get(ValueLayout.JAVA_INT, 172 io_cqring_offsets.ring_mask(cq_off_seg)); 173 174 var sqes = mmap(sq_entries * io_uring_sqe.sizeof(), 175 fd, IORING_OFF_SQES()); 176 177 cq = new CompletionQueue(cqes_seg.asSlice(cq_off_cqes), 178 cqes_seg.asSlice(io_cqring_offsets.head(cq_off_seg)), 179 cqes_seg.asSlice(io_cqring_offsets.tail(cq_off_seg)), 180 cq_mask); 181 182 sq = new SubmissionQueue(sqe_seg.asSlice(sq_off_array), 183 cqes_seg.asSlice(io_cqring_offsets.head(sq_off_seg)), 184 cqes_seg.asSlice(io_cqring_offsets.tail(sq_off_seg)), 185 sq_mask, 186 sqes); 187 if (TRACE) 188 System.out.printf("IOUringImpl: ringfd: %d\n", fd); 189 } 190 191 192 public void close() throws IOException { 193 int ret; 194 SystemCallContext ctx = SystemCallContext.get(); 195 try { 196 ret = (int)close_fn.invokeExact(ctx.errnoCaptureSegment(), 197 ringFd()); 198 } catch (Throwable e) { 199 throw new RuntimeException(e); 200 } 201 ctx.throwIOExceptionOnError(ret); 202 203 } 204 205 public int eventfd() throws IOException { 206 int ret; 207 SystemCallContext ctx = SystemCallContext.get(); 208 try { 209 ret = (int)eventfd_fn.invokeExact(ctx.errnoCaptureSegment(), 210 0, 0); 211 } catch (Throwable e) { 212 throw new RuntimeException(e); 213 } 214 ctx.throwIOExceptionOnError(ret); 215 return ret; 216 } 217 218 private int initEpoll() throws IOException { 219 int ret; 220 SystemCallContext ctx = SystemCallContext.get(); 221 try { 222 ret = (int)epoll_create_fn.invokeExact(ctx.errnoCaptureSegment(), 223 ringFd(), 1); 224 } catch (Throwable e) { 225 throw new RuntimeException(e); 226 } 227 ctx.throwIOExceptionOnError(ret); 228 return ret; 229 } 230 231 public void register_eventfd(int efd) throws IOException { 232 int ret; 233 SystemCallContext ctx = SystemCallContext.get(); 234 MemorySegment fdseg = 235 arena.allocateFrom(ValueLayout.JAVA_INT, efd); 236 237 try { 238 ret = (int)evregister_fn 239 .invokeExact( 240 ctx.errnoCaptureSegment(), 241 NR_io_uring_register, 242 fd, IORING_REGISTER_EVENTFD(), 243 fdseg, 1 244 ); 245 } catch (Throwable e) { 246 throw new RuntimeException(e); 247 } 248 ctx.throwIOExceptionOnError(ret); 249 } 250 251 public void unregister_eventfd() throws IOException { 252 int ret; 253 SystemCallContext ctx = SystemCallContext.get(); 254 255 try { 256 ret = (int)evregister_fn 257 .invokeExact( 258 ctx.errnoCaptureSegment(), 259 NR_io_uring_register, 260 fd, IORING_UNREGISTER_EVENTFD(), 261 MemorySegment.NULL, 0 262 ); 263 } catch (Throwable e) { 264 throw new RuntimeException(e); 265 } 266 ctx.throwIOExceptionOnError(ret); 267 268 } 269 270 /** 271 * Asynchronously submits an Sqe to this IOUringImpl. Can be called 272 * multiple times before enter(). 273 * 274 * @param sqe 275 * @throws IOException if submission q full 276 */ 277 public void submit(Sqe sqe) throws IOException { 278 sq.submit(sqe); 279 if (TRACE) 280 System.out.printf("submit: %s \n", sqe); 281 } 282 283 /** 284 * Notifies the kernel of entries on the Submission Q and waits for a 285 * number of responses (completion events). If this returns normally 286 * with value {@code n > 0}, this means that n requests have been accepted 287 * by the kernel. A normal return also means that the requested number of 288 * completion events have been received {@link #pollCompletion()} can be 289 * called {@code nreceive} times to obtain the results. 290 * 291 * @param nsubmit number of requests to submit 292 * @param nreceive block until this number of events received 293 * @param flags flags to pass to io_uring_enter 294 * 295 * @return if return value less than 0 means an error occurred. Otherwise, 296 * the number of Sqes successfully submitted. 297 */ 298 public int enter(int nsubmit, int nreceive, int flags) throws IOException { 299 if (nreceive > 0) { 300 flags |= IORING_ENTER_GETEVENTS(); 301 } 302 return io_uring_enter(this.fd, nsubmit, nreceive, flags); 303 } 304 305 /** 306 * Returns the allocated size of the Submission Q. If the requested size 307 * was not a power of 2, then the allocated size will be the next highest 308 * power of 2. 309 * 310 * @return 311 */ 312 public int sqsize() { 313 return sq.ringSize; 314 } 315 316 /** 317 * Returns the number of free entries in the Submission Q 318 */ 319 public int sqfree() { 320 return sq.nUsed(); 321 } 322 323 /** 324 * Returns whether the completion Q is empty or not. 325 * 326 * @return 327 */ 328 public boolean cqempty() { 329 return cq.nEntries() == 0; 330 } 331 332 /** 333 * Returns the allocated size of the Completion Q. 334 * Currently, double the size of the Submission Q 335 * 336 * @return 337 */ 338 public int cqsize() { 339 return cq.ringSize; 340 } 341 342 public int epoll_fd() { 343 return epollfd; 344 } 345 346 /** 347 * Polls the Completion Queue for results. 348 * 349 * @return a Cqe if available or {@code null} 350 */ 351 public Cqe pollCompletion() { 352 Cqe cqe = cq.pollHead(); 353 if (TRACE) 354 System.out.printf("pollCompletion: -> %s\n", cqe); 355 return cqe; 356 } 357 358 /** 359 * Returns a String description of the given errno value 360 * 361 * @param errno 362 * @return 363 */ 364 public static String strerror(int errno) { 365 return Util.strerror(errno); 366 } 367 368 private static int io_uring_setup(int entries, MemorySegment params) 369 throws IOException { 370 try { 371 return (int) setup_fn.invokeExact(NR_io_uring_setup, 372 entries, params); 373 } catch (Throwable t) { 374 throw ioexception(t); 375 } 376 } 377 378 private static int io_uring_enter(int fd, int to_submit, int min_complete, 379 int flags) throws IOException { 380 try { 381 return (int) enter_fn.invokeExact(NR_io_uring_enter, 382 fd, to_submit, min_complete, flags, MemorySegment.NULL); 383 } catch (Throwable t) { 384 throw ioexception(t); 385 } 386 } 387 388 static IOException ioexception(Throwable t) { 389 if (t instanceof IOException ioe) { 390 return ioe; 391 } else { 392 return new IOException(t); 393 } 394 } 395 396 int checkAndGetIndexFor(ByteBuffer buffer) { 397 return mappedBuffers.checkAndGetIndexForBuffer(buffer); 398 } 399 400 /** 401 * Returns a mapped direct ByteBuffer or {@code null} if none available. 402 * Mapped buffers must be used with some IOUringImpl operations such as 403 * {@code IORING_OP_WRITE_FIXED} and {@code IORING_OP_READ_FIXED}. 404 * Buffers must be returned after use with 405 * {@link #returnRegisteredBuffer(ByteBuffer)}. 406 * 407 * @return 408 */ 409 public ByteBuffer getRegisteredBuffer() { 410 return mappedBuffers.getRegisteredBuffer(); 411 } 412 413 /** 414 * Returns a previously allocated registered buffer. 415 * 416 * @param buffer 417 */ 418 public void returnRegisteredBuffer(ByteBuffer buffer) { 419 mappedBuffers.returnRegisteredBuffer(buffer); 420 } 421 422 /** 423 * Common capabilities of SubmissionQueue and CompletionQueue 424 */ 425 sealed class QueueImplBase permits SubmissionQueue, CompletionQueue { 426 protected final MemorySegment ringSeg; 427 private final MemorySegment head, tail; 428 protected final int ringMask; 429 protected final MemoryLayout ringLayout; 430 protected final int ringLayoutSize; 431 protected final int ringLayoutAlignment; 432 protected final int ringSize; 433 434 // For accessing head and tail as volatile 435 protected final VarHandle addrH; 436 437 /** 438 * 439 * @param ringSeg The mapped segment 440 * @param head The head pointer 441 * @param tail The tail pointer 442 * @param ringMask 443 * @param ringLayout 444 */ 445 QueueImplBase(MemorySegment ringSeg, MemorySegment head, 446 MemorySegment tail, int ringMask, 447 MemoryLayout ringLayout) { 448 this.ringSeg = ringSeg; 449 this.head = head; 450 this.tail = tail; 451 this.ringMask = ringMask; 452 this.ringSize = ringMask + 1; 453 this.ringLayout = ringLayout; 454 this.ringLayoutSize = (int)ringLayout.byteSize(); 455 this.ringLayoutAlignment = (int)ringLayout.byteAlignment(); 456 this.addrH = ValueLayout.JAVA_INT.varHandle(); 457 } 458 459 int nEntries() { 460 int n = Math.abs(getTail(false) - getHead(false)); 461 return n; 462 } 463 464 boolean ringFull() { 465 return nEntries() == ringSize; 466 } 467 468 int nUsed() { 469 return ringSize - nEntries(); 470 } 471 protected int getHead(boolean withAcquire) { 472 int val = (int)(withAcquire 473 ? addrH.getAcquire(head, 0) : addrH.get(head, 0)); 474 return val; 475 } 476 477 protected int getTail(boolean withAcquire) { 478 int val = (int)(withAcquire 479 ? addrH.getAcquire(tail, 0L) : addrH.get(tail, 0L)); 480 return val; 481 } 482 483 // Used by CompletionQueue 484 protected void setHead(int val) { 485 addrH.setRelease(head, 0L, val); 486 } 487 488 // Used by SubmissionQueue 489 protected void setTail(int val) { 490 addrH.setRelease(tail, 0L, val); 491 } 492 } 493 494 final class SubmissionQueue extends QueueImplBase { 495 final MemorySegment sqes; 496 final int n_sqes; 497 static final int sqe_layout_size = 498 (int)io_uring_sqe.$LAYOUT().byteSize(); 499 500 static final int sqe_alignment = 501 (int)io_uring_sqe.$LAYOUT().byteAlignment(); 502 503 SubmissionQueue(MemorySegment ringSeg, MemorySegment head, 504 MemorySegment tail, int mask, MemorySegment sqes) { 505 super(ringSeg, head, tail, mask, ValueLayout.JAVA_INT); 506 this.sqes = sqes; 507 this.n_sqes = (int) (sqes.byteSize() / sqe_layout_size); 508 } 509 510 /** 511 * Submits an Sqe to Submission Q. 512 * @param sqe 513 * @throws IOException if Q full 514 */ 515 public void submit(Sqe sqe) throws IOException { 516 if (ringFull()) { 517 throw new IOException("Submission Queue full"); 518 } 519 520 int tailVal = getTail(false); 521 int tailIndex = tailVal & ringMask; 522 523 MemorySegment slot = sqes.asSlice( 524 (long) tailIndex * sqe_layout_size, 525 sqe_layout_size, sqe_alignment).fill((byte)0); 526 if (slot == null) 527 throw new IOException("Q full"); // shouldn't happen 528 // Populate the slot as an io_uring_sqe 529 // Note. Sqe has already validated that overlapping fields not set 530 io_uring_sqe.user_data(slot, sqe.user_data()); 531 io_uring_sqe.fd(slot, sqe.fd()); 532 io_uring_sqe.opcode(slot, (byte)sqe.opcode()); 533 // This statement handles the large flags union 534 // For simplicity all __u32 variants are handled 535 // as xxx_flags. poll_events (__u16) are special 536 sqe.xxx_flags().ifPresentOrElse( 537 u32 -> io_uring_sqe.open_flags(slot, u32), 538 // xxx_flags not present, poll_events may be 539 () -> sqe.poll_events().ifPresent( 540 u16 -> io_uring_sqe.poll_events(slot, (short)u16))); 541 542 io_uring_sqe.flags(slot, (byte)sqe.flags()); 543 io_uring_sqe.addr(slot, sqe.addr() 544 .orElse(MemorySegment.NULL).address()); 545 io_uring_sqe.addr2(slot, sqe.addr2() 546 .orElse(MemorySegment.NULL).address()); 547 io_uring_sqe.buf_index(slot, (short)sqe.buf_index().orElse(0)); 548 io_uring_sqe.off(slot, sqe.off().orElse(0L)); 549 io_uring_sqe.len(slot, sqe.len().orElse(0)); 550 // Populate the tail slot 551 ringSeg.setAtIndex(ValueLayout.JAVA_INT, tailIndex, tailIndex); 552 //Util.print(slot, "SQE"); 553 setTail(++tailVal); 554 } 555 } 556 557 final class CompletionQueue extends QueueImplBase { 558 CompletionQueue(MemorySegment ringSeg, MemorySegment head, 559 MemorySegment tail, int mask) { 560 super(ringSeg, head, tail, mask, io_uring_cqe.$LAYOUT()); 561 } 562 563 public Cqe pollHead() { 564 int headVal = getHead(false); 565 Cqe res = null; 566 if (headVal != getTail(true)) { 567 int index = headVal & ringMask; 568 int offset = index * ringLayoutSize; 569 MemorySegment seg = ringSeg.asSlice(offset, 570 ringLayoutSize, ringLayoutAlignment); 571 res = new Cqe( 572 io_uring_cqe.user_data(seg), 573 io_uring_cqe.res(seg), 574 io_uring_cqe.flags(seg)); 575 headVal++; 576 } 577 setHead(headVal); 578 return res; 579 } 580 }; 581 582 /** 583 * Adds the given fd to this ring's epoll(7) instance 584 * and creates the epoll instance if it hasn't already been created 585 * 586 * If using the EPOLLONESHOT mode (in flags) the opaque field 587 * can be used to return the "id" of the specific operation that was 588 * kicked off. 589 * 590 * @param fd target fd to manage 591 * @param poll_events bit mask of events to activate 592 * @param opaque a 64 bit value to return with event notifications. 593 * A value of -1L is ignored. 594 * @throws IOException 595 * @throws InterruptedException 596 */ 597 public void epoll_add(int fd, int poll_events, long opaque) 598 throws IOException, InterruptedException { 599 epoll_op(fd, poll_events, opaque, EPOLL_CTL_ADD()); 600 } 601 602 public void epoll_del(int fd, int poll_events) 603 throws IOException, InterruptedException { 604 epoll_op(fd, poll_events, -1L, EPOLL_CTL_DEL()); 605 } 606 607 public void epoll_mod(int fd, int poll_events, long opaque) 608 throws IOException, InterruptedException { 609 epoll_op(fd, poll_events, opaque, EPOLL_CTL_DEL()); 610 } 611 612 private void epoll_op(int fd, int poll_events, long opaque, int op) 613 throws IOException, InterruptedException { 614 if (this.epollfd == -1) { 615 this.epollfd = initEpoll(); 616 } 617 618 MemorySegment targetfd = 619 arena.allocateFrom(ValueLayout.OfInt.JAVA_INT, fd); 620 621 Sqe request = new Sqe() 622 .opcode(IORING_OP_EPOLL_CTL()) 623 .fd(epollfd) 624 .addr(targetfd) 625 .xxx_flags(poll_events) 626 .len(op); 627 628 if (opaque != -1L) { 629 MemorySegment event = arena.allocate(epoll_event.$LAYOUT()); 630 epoll_event.events(event, poll_events); 631 var dataSlice = epoll_event.data(event); 632 epoll_data_t.u64(dataSlice, opaque); 633 request = request.off(event.address()); 634 } 635 submit(request); 636 } 637 638 static MemorySegment getSegmentFor(MemoryLayout layout) { 639 return arena.allocate(layout.byteSize(), layout.byteAlignment()) 640 .fill((byte)0); 641 } 642 643 static String errorString(int errno) { 644 errno = -errno; 645 return "Error: " + strerror(errno); 646 } 647 648 // This is obsolete. There is a better way of doing a timed 649 // poll by providing a timeval to io_uring_enter 650 public Sqe getTimeoutSqe(Duration maxwait, int opcode, int completionCount) { 651 MemorySegment seg = 652 arena.allocate(__kernel_timespec.$LAYOUT()).fill((byte)(0)); 653 654 __kernel_timespec.tv_sec(seg, maxwait.getSeconds()); 655 __kernel_timespec.tv_nsec(seg, maxwait.getNano()); 656 return new Sqe() 657 .opcode(opcode) 658 .addr(seg) 659 .xxx_flags(0) // timeout_flags 660 .off(completionCount) 661 .len(1); 662 } 663 664 private final static ValueLayout POINTER = 665 ValueLayout.ADDRESS.withTargetLayout( 666 MemoryLayout.sequenceLayout(Long.MAX_VALUE, JAVA_BYTE) 667 ); 668 669 private static final MethodHandle mmap_fn = locateStdHandle( 670 "mmap", FunctionDescriptor.of( 671 POINTER, 672 //ValueLayout.JAVA_LONG, // returned address 673 ValueLayout.JAVA_LONG, // input address, usually zero 674 ValueLayout.JAVA_LONG, // size_t 675 ValueLayout.JAVA_INT, // int prot (PROT_READ | PROT_WRITE) 676 ValueLayout.JAVA_INT, // int flags (MAP_SHARED|MAP_POPULATE) 677 ValueLayout.JAVA_INT, // int fd 678 ValueLayout.JAVA_LONG // off_t (64bit?) 679 ) 680 ); 681 682 private static final MethodHandle epoll_create_fn = locateStdHandle( 683 "epoll_create", FunctionDescriptor.of( 684 ValueLayout.JAVA_INT, // returned fd 685 ValueLayout.JAVA_INT // int size (ignored) 686 ), SystemCallContext.errnoLinkerOption() 687 ); 688 689 private static final MethodHandle close_fn = locateStdHandle( 690 "close", 691 FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.JAVA_INT), 692 SystemCallContext.errnoLinkerOption() 693 ); 694 695 private static final MethodHandle eventfd_fn = locateStdHandle( 696 "eventfd", 697 FunctionDescriptor.of( 698 ValueLayout.JAVA_INT, 699 ValueLayout.JAVA_INT, 700 ValueLayout.JAVA_INT), 701 SystemCallContext.errnoLinkerOption() 702 ); 703 704 // Linux syscall numbers. Allows to invoke the system call 705 // directly in systems where there are no wrappers 706 // for these functions in libc or liburing. 707 // Also means we no longer use liburing 708 709 private static final int NR_io_uring_setup = 425; 710 private static final int NR_io_uring_enter = 426; 711 private static final int NR_io_uring_register = 427; 712 713 private static final MethodHandle setup_fn = locateStdHandle( 714 "syscall", FunctionDescriptor.of( 715 ValueLayout.JAVA_INT, 716 ValueLayout.JAVA_INT, 717 ValueLayout.JAVA_INT, 718 ValueLayout.ADDRESS) 719 ); 720 721 private static final MethodHandle enter_fn = locateStdHandle( 722 "syscall", FunctionDescriptor.of(ValueLayout.JAVA_INT, 723 ValueLayout.JAVA_INT, 724 ValueLayout.JAVA_INT, 725 ValueLayout.JAVA_INT, 726 ValueLayout.JAVA_INT, 727 ValueLayout.JAVA_INT, 728 ValueLayout.ADDRESS) // sigset_t UNUSED for now 729 ); 730 731 // io_uring_register specifically for 732 // IORING_REGISTER_EVENTFD and IORING_UNREGISTER_EVENTFD 733 private static final MethodHandle evregister_fn = locateStdHandle( 734 "syscall", 735 FunctionDescriptor.of(ValueLayout.JAVA_INT, // result 736 ValueLayout.JAVA_INT, // syscall 737 ValueLayout.JAVA_INT, // ring fd 738 ValueLayout.JAVA_INT, // opcode 739 INT_POINTER, // pointer to fd 740 ValueLayout.JAVA_INT),// integer value 1 741 SystemCallContext.errnoLinkerOption() 742 ); 743 744 // mmap constants used internally 745 private static final int PROT_READ = 1; 746 private static final int PROT_WRITE = 2; 747 private static final int MAP_SHARED = 1; 748 private static final int MAP_POPULATE = 0x8000; 749 750 /** 751 * offset (when mapping IOURING segments) must be one of: 752 * jdk.internal.ffi.generated.iouring.iouring_h.IORING_OFF_SQ_RING() 753 * jdk.internal.ffi.generated.iouring.iouring_h.IORING_OFF_CQ_RING() 754 * jdk.internal.ffi.generated.iouring.iouring_h.IORING_OFF_SQES() 755 * 756 * @param size 757 * @param fd 758 * @param offset 759 * @return 760 */ 761 private static MemorySegment mmap(long size, int fd, long offset) { 762 MemorySegment seg = null; 763 try { 764 seg = (MemorySegment)mmap_fn 765 .invokeExact(0L, size, 766 PROT_READ | PROT_WRITE, 767 MAP_SHARED | MAP_POPULATE, 768 fd, 769 offset 770 ); 771 } catch (Throwable e) { 772 throw new RuntimeException(e); 773 } 774 long addr = seg.address(); 775 return seg.reinterpret(size); 776 } 777 778 int ringFd() { 779 return fd; 780 } 781 }