1 /*
  2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 
 26 package sun.nio.ch.iouring;
 27 
 28 import jdk.internal.ffi.generated.iouring.*;
 29 
 30 import java.io.IOException;
 31 import java.lang.foreign.*;
 32 import java.lang.invoke.MethodHandle;
 33 import java.lang.invoke.VarHandle;
 34 import java.nio.ByteBuffer;
 35 import java.time.Duration;
 36 
 37 import static java.lang.foreign.ValueLayout.JAVA_BYTE;
 38 import static sun.nio.ch.iouring.Util.strerror;
 39 import static sun.nio.ch.iouring.Util.locateHandleFromLib;
 40 import static sun.nio.ch.iouring.Util.locateStdHandle;
 41 import static sun.nio.ch.iouring.Util.INT_POINTER;
 42 import static jdk.internal.ffi.generated.iouring.iouring_h.*;
 43 import static jdk.internal.ffi.generated.iouring.iouring_h_1.IORING_REGISTER_EVENTFD;
 44 import static jdk.internal.ffi.generated.iouring.iouring_h_1.IORING_UNREGISTER_EVENTFD;
 45 
 46 /**
 47  * Low level interface to a Linux io_uring. It provides an asynchronous
 48  * interface. Requests are submitted through the {@link #submit(Sqe)} method.
 49  * Completion events can be awaited by calling {@link #enter(int, int, int)}.
 50  * Completions represented by {@link Cqe} are then obtained by calling
 51  * {@link #pollCompletion()}. Completions are linked to submissions by the
 52  * {@link Cqe#user_data()} field of the {@code Cqe} which contains the
 53  * same 64-bit (long) value that was supplied in the submitted {@link Sqe}.
 54  * <p>
 55  * Some IOUringImpl operations work with kernel registered direct ByteBuffers.
 56  * When creating an IOUringImpl instance, a number of these buffers can be
 57  * created in a pool. Registered buffers are not used with regular
 58  * IOUringImpl read/write operations.
 59  */
 60 @SuppressWarnings("restricted")
 61 public class IOUringImpl {
 62     private static final Arena arena = Arena.ofAuto();
 63 
 64     private static final boolean TRACE = System
 65             .getProperty("sun.nio.ch.iouring.trace", "false")
 66             .equalsIgnoreCase("true");
 67     private final SubmissionQueue sq;
 68     private final CompletionQueue cq;
 69     private final int fd;               // The ringfd
 70     private int epollfd = -1;           // The epoll(7) if set
 71     private static final int INT_SIZE = (int)ValueLayout.JAVA_INT.byteSize();
 72 
 73     private final Arena autoArena = Arena.ofAuto();
 74 
 75     private final KMappedBuffers mappedBuffers;
 76 
 77     /**
 78      * Creates an IOURing and initializes the ring structures. {@code entries}
 79      * (or the next higher power of 2) is the size of the Submission Queue.
 80      * Currently, the completion queue returned will be double the size
 81      * of the Submission queue.
 82      */
 83     public IOUringImpl(int entries) throws IOException {
 84         this(entries, 0, 0);
 85     }
 86 
 87     /**
 88      * Creates an IOURing and initializes the ring structures.
 89      * @param sq_entries the number of submission queue entries to allocate
 90      * @param cq_entries the number of completion queue entries to allocate
 91      * @param flags io_uring_params flags
 92      * @throws IOException if an IOException occurs
 93      */
 94     public IOUringImpl(int sq_entries, int cq_entries, int flags) throws IOException {
 95         this(sq_entries, cq_entries, 0, 0, -1);
 96     }
 97 
 98     /**
 99      * Creates an IOURing initializes the ring structures and allocates a
100      * number of direct {@link ByteBuffer}s which are additionally mapped
101      * into the kernel address space.
102      *
103      * @param sq_entries the number of submission queue entries to allocate
104      * @param cq_entries the number of completion queue entries to allocate
105      * @param flags io_uring_params flags
106      * @param nmappedBuffers number of mapped direct ByteBuffers to create
107      * @param mappedBufsize size of each buffer in bytes
108      * @throws IOException if an IOException occurs
109      */
110     public IOUringImpl(int sq_entries,
111                        int cq_entries,
112                        int flags,
113                        int nmappedBuffers,
114                        int mappedBufsize) throws IOException {
115         MemorySegment params_seg = getSegmentFor(io_uring_params.$LAYOUT());
116 
117         if (cq_entries > 0) {
118             io_uring_params.cq_entries(params_seg, cq_entries);
119             flags |= IORING_SETUP_CQSIZE();
120         }
121 
122         if (flags != 0) {
123             io_uring_params.flags(params_seg, flags);
124         }
125 
126         // call setup
127         fd = io_uring_setup(sq_entries, params_seg);
128         if (fd < 0) {
129             throw new IOException(errorString(fd));
130         }
131 
132         mappedBuffers = new KMappedBuffers(nmappedBuffers, mappedBufsize);
133         if (nmappedBuffers > 0) {
134             mappedBuffers.register(fd);
135         }
136         // Offsets segments
137         MemorySegment cq_off_seg = io_uring_params.cq_off(params_seg);
138         MemorySegment sq_off_seg = io_uring_params.sq_off(params_seg);
139 
140         // Offsets to cqe array and the sqe index array
141         int cq_off_cqes = io_cqring_offsets.cqes(cq_off_seg);
142         int sq_off_array = io_sqring_offsets.array(sq_off_seg);
143 
144         // Acual number of entries in each Q
145         sq_entries = io_uring_params.sq_entries(params_seg);
146         cq_entries = io_uring_params.cq_entries(params_seg);
147 
148         int sq_size = sq_off_array + sq_entries * INT_SIZE;
149         int cq_size = cq_off_cqes + cq_entries * (int)io_uring_cqe.sizeof();
150 
151         boolean singleMmap = (io_uring_params.features(params_seg)
152                 & IORING_FEAT_SINGLE_MMAP()) != 0;
153 
154         if (singleMmap) {
155             if (cq_size > sq_size)
156                 sq_size = cq_size;
157             cq_size = sq_size;
158         }
159         var sqe_seg = mmap(sq_size, fd, IORING_OFF_SQ_RING());
160 
161         MemorySegment cqes_seg;
162         if (singleMmap) {
163             cqes_seg = sqe_seg;
164         } else {
165             cqes_seg = mmap(cq_size, fd, IORING_OFF_CQ_RING());
166         }
167 
168         // Masks
169         int sq_mask = sqe_seg.get(ValueLayout.JAVA_INT,
170                                   io_sqring_offsets.ring_mask(sq_off_seg));
171         int cq_mask = cqes_seg.get(ValueLayout.JAVA_INT,
172                                    io_cqring_offsets.ring_mask(cq_off_seg));
173 
174         var sqes = mmap(sq_entries * io_uring_sqe.sizeof(),
175                         fd, IORING_OFF_SQES());
176 
177         cq = new CompletionQueue(cqes_seg.asSlice(cq_off_cqes),
178                 cqes_seg.asSlice(io_cqring_offsets.head(cq_off_seg)),
179                 cqes_seg.asSlice(io_cqring_offsets.tail(cq_off_seg)),
180                 cq_mask);
181 
182         sq = new SubmissionQueue(sqe_seg.asSlice(sq_off_array),
183                 cqes_seg.asSlice(io_cqring_offsets.head(sq_off_seg)),
184                 cqes_seg.asSlice(io_cqring_offsets.tail(sq_off_seg)),
185                 sq_mask,
186                 sqes);
187         if (TRACE)
188             System.out.printf("IOUringImpl: ringfd: %d\n", fd);
189     }
190 
191 
192     public void close() throws IOException {
193         int ret;
194         SystemCallContext ctx = SystemCallContext.get();
195         try {
196             ret = (int)close_fn.invokeExact(ctx.errnoCaptureSegment(),
197                                             ringFd());
198         } catch (Throwable e) {
199             throw new RuntimeException(e);
200         }
201         ctx.throwIOExceptionOnError(ret);
202 
203     }
204 
205     public int eventfd() throws IOException {
206         int ret;
207         SystemCallContext ctx = SystemCallContext.get();
208         try {
209             ret = (int)eventfd_fn.invokeExact(ctx.errnoCaptureSegment(),
210                                             0, 0);
211         } catch (Throwable e) {
212             throw new RuntimeException(e);
213         }
214         ctx.throwIOExceptionOnError(ret);
215         return ret;
216     }
217 
218     private int initEpoll() throws IOException {
219         int ret;
220         SystemCallContext ctx = SystemCallContext.get();
221         try {
222             ret = (int)epoll_create_fn.invokeExact(ctx.errnoCaptureSegment(),
223                                                    ringFd(), 1);
224         } catch (Throwable e) {
225             throw new RuntimeException(e);
226         }
227         ctx.throwIOExceptionOnError(ret);
228         return ret;
229     }
230 
231     public void register_eventfd(int efd) throws IOException {
232         int ret;
233         SystemCallContext ctx = SystemCallContext.get();
234         MemorySegment fdseg =
235             arena.allocateFrom(ValueLayout.JAVA_INT, efd);
236 
237         try {
238             ret = (int)evregister_fn
239                     .invokeExact(
240                             ctx.errnoCaptureSegment(),
241                             NR_io_uring_register,
242                             fd, IORING_REGISTER_EVENTFD(),
243                             fdseg, 1
244                     );
245         } catch (Throwable e) {
246             throw new RuntimeException(e);
247         }
248         ctx.throwIOExceptionOnError(ret);
249     }
250 
251     public void unregister_eventfd() throws IOException {
252         int ret;
253         SystemCallContext ctx = SystemCallContext.get();
254 
255         try {
256             ret = (int)evregister_fn
257                     .invokeExact(
258                             ctx.errnoCaptureSegment(),
259                             NR_io_uring_register,
260                             fd, IORING_UNREGISTER_EVENTFD(),
261                             MemorySegment.NULL, 0
262                     );
263         } catch (Throwable e) {
264             throw new RuntimeException(e);
265         }
266         ctx.throwIOExceptionOnError(ret);
267 
268     }
269 
270     /**
271      * Asynchronously submits an Sqe to this IOUringImpl. Can be called
272      * multiple times before enter().
273      *
274      * @param sqe
275      * @throws IOException if submission q full
276      */
277     public void submit(Sqe sqe) throws IOException {
278         sq.submit(sqe);
279         if (TRACE)
280             System.out.printf("submit: %s \n", sqe);
281     }
282 
283     /**
284      * Notifies the kernel of entries on the Submission Q and waits for a
285      * number of responses (completion events). If this returns normally
286      * with value {@code n > 0}, this means that n requests have been accepted
287      * by the kernel. A normal return also means that the requested number of
288      * completion events have been received {@link #pollCompletion()} can be
289      * called {@code nreceive} times to obtain the results.
290      *
291      * @param nsubmit number of requests to submit
292      * @param nreceive block until this number of events received
293      * @param flags flags to pass to io_uring_enter
294      *
295      * @return if return value less than 0 means an error occurred. Otherwise,
296      *         the number of Sqes successfully submitted.
297      */
298     public int enter(int nsubmit, int nreceive, int flags) throws IOException {
299         if (nreceive > 0) {
300             flags |= IORING_ENTER_GETEVENTS();
301         }
302         return io_uring_enter(this.fd, nsubmit, nreceive, flags);
303     }
304 
305     /**
306      * Returns the allocated size of the Submission Q. If the requested size
307      * was not a power of 2, then the allocated size will be the next highest
308      * power of 2.
309      *
310      * @return
311      */
312     public int sqsize() {
313         return sq.ringSize;
314     }
315 
316     /**
317      * Returns the number of free entries in the Submission Q
318      */
319     public int sqfree() {
320         return sq.nUsed();
321     }
322 
323     /**
324      * Returns whether the completion Q is empty or not.
325      *
326      * @return
327      */
328     public boolean cqempty() {
329         return cq.nEntries() == 0;
330     }
331 
332     /**
333      * Returns the allocated size of the Completion Q.
334      * Currently, double the size of the Submission Q
335      *
336      * @return
337      */
338     public int cqsize() {
339         return cq.ringSize;
340     }
341 
342     public int epoll_fd() {
343         return epollfd;
344     }
345 
346     /**
347      * Polls the Completion Queue for results.
348      *
349      * @return a Cqe if available or {@code null}
350      */
351     public Cqe pollCompletion() {
352         Cqe cqe = cq.pollHead();
353         if (TRACE)
354             System.out.printf("pollCompletion: -> %s\n", cqe);
355         return cqe;
356     }
357 
358     /**
359      * Returns a String description of the given errno value
360      *
361      * @param errno
362      * @return
363      */
364     public static String strerror(int errno) {
365         return Util.strerror(errno);
366     }
367 
368     private static int io_uring_setup(int entries, MemorySegment params)
369             throws IOException {
370         try {
371             return (int) setup_fn.invokeExact(NR_io_uring_setup,
372                                               entries, params);
373         } catch (Throwable t) {
374             throw ioexception(t);
375         }
376     }
377 
378     private static int io_uring_enter(int fd, int to_submit, int min_complete,
379                                       int flags) throws IOException {
380         try {
381             return (int) enter_fn.invokeExact(NR_io_uring_enter,
382                     fd, to_submit, min_complete, flags, MemorySegment.NULL);
383         } catch (Throwable t) {
384             throw ioexception(t);
385         }
386     }
387 
388     static IOException ioexception(Throwable t) {
389         if (t instanceof IOException ioe) {
390             return ioe;
391         } else {
392             return new IOException(t);
393         }
394     }
395 
396     int checkAndGetIndexFor(ByteBuffer buffer) {
397         return mappedBuffers.checkAndGetIndexForBuffer(buffer);
398     }
399 
400     /**
401      * Returns a mapped direct ByteBuffer or {@code null} if none available.
402      * Mapped buffers must be used with some IOUringImpl operations such as
403      * {@code IORING_OP_WRITE_FIXED} and {@code IORING_OP_READ_FIXED}.
404      * Buffers must be returned after use with
405      * {@link #returnRegisteredBuffer(ByteBuffer)}.
406      *
407      * @return
408      */
409     public ByteBuffer getRegisteredBuffer() {
410         return mappedBuffers.getRegisteredBuffer();
411     }
412 
413     /**
414      * Returns a previously allocated registered buffer.
415      *
416      * @param buffer
417      */
418     public void returnRegisteredBuffer(ByteBuffer buffer) {
419         mappedBuffers.returnRegisteredBuffer(buffer);
420     }
421 
422     /**
423      * Common capabilities of SubmissionQueue and CompletionQueue
424      */
425     sealed class QueueImplBase permits SubmissionQueue, CompletionQueue {
426         protected final MemorySegment ringSeg;
427         private final MemorySegment head, tail;
428         protected final int ringMask;
429         protected final MemoryLayout ringLayout;
430         protected final int ringLayoutSize;
431         protected final int ringLayoutAlignment;
432         protected final int ringSize;
433 
434         // For accessing head and tail as volatile
435         protected final VarHandle addrH;
436 
437         /**
438          *
439          * @param ringSeg The mapped segment
440          * @param head The head pointer
441          * @param tail The tail pointer
442          * @param ringMask
443          * @param ringLayout
444          */
445         QueueImplBase(MemorySegment ringSeg, MemorySegment head,
446                       MemorySegment tail, int ringMask,
447                       MemoryLayout ringLayout) {
448             this.ringSeg = ringSeg;
449             this.head = head;
450             this.tail = tail;
451             this.ringMask = ringMask;
452             this.ringSize = ringMask + 1;
453             this.ringLayout = ringLayout;
454             this.ringLayoutSize = (int)ringLayout.byteSize();
455             this.ringLayoutAlignment = (int)ringLayout.byteAlignment();
456             this.addrH = ValueLayout.JAVA_INT.varHandle();
457         }
458 
459         int nEntries() {
460             int n = Math.abs(getTail(false) - getHead(false));
461             return n;
462         }
463 
464         boolean ringFull() {
465             return nEntries() == ringSize;
466         }
467 
468         int nUsed() {
469             return ringSize - nEntries();
470         }
471         protected int getHead(boolean withAcquire) {
472             int val = (int)(withAcquire
473                 ? addrH.getAcquire(head, 0) : addrH.get(head, 0));
474             return val;
475         }
476 
477         protected int getTail(boolean withAcquire) {
478             int val = (int)(withAcquire
479                 ? addrH.getAcquire(tail, 0L) : addrH.get(tail, 0L));
480             return val;
481         }
482 
483         // Used by CompletionQueue
484         protected void setHead(int val) {
485             addrH.setRelease(head, 0L, val);
486         }
487 
488         // Used by SubmissionQueue
489         protected void setTail(int val) {
490             addrH.setRelease(tail, 0L, val);
491         }
492     }
493 
494     final class SubmissionQueue extends QueueImplBase {
495         final MemorySegment sqes;
496         final int n_sqes;
497         static final int sqe_layout_size =
498             (int)io_uring_sqe.$LAYOUT().byteSize();
499 
500         static final int sqe_alignment =
501             (int)io_uring_sqe.$LAYOUT().byteAlignment();
502 
503         SubmissionQueue(MemorySegment ringSeg, MemorySegment head,
504                         MemorySegment tail, int mask, MemorySegment sqes) {
505             super(ringSeg, head, tail, mask, ValueLayout.JAVA_INT);
506             this.sqes = sqes;
507             this.n_sqes = (int) (sqes.byteSize() / sqe_layout_size);
508         }
509 
510         /**
511          * Submits an Sqe to Submission Q.
512          * @param sqe
513          * @throws IOException if Q full
514          */
515         public void submit(Sqe sqe) throws IOException {
516             if (ringFull()) {
517                 throw new IOException("Submission Queue full");
518             }
519 
520             int tailVal = getTail(false);
521             int tailIndex = tailVal & ringMask;
522 
523             MemorySegment slot = sqes.asSlice(
524                     (long) tailIndex * sqe_layout_size,
525                     sqe_layout_size, sqe_alignment).fill((byte)0);
526             if (slot == null)
527                 throw new IOException("Q full"); // shouldn't happen
528             // Populate the slot as an io_uring_sqe
529             // Note. Sqe has already validated that overlapping fields not set
530             io_uring_sqe.user_data(slot, sqe.user_data());
531             io_uring_sqe.fd(slot, sqe.fd());
532             io_uring_sqe.opcode(slot, (byte)sqe.opcode());
533             // This statement handles the large flags union
534             // For simplicity all __u32 variants are handled
535             // as xxx_flags. poll_events (__u16) are special
536             sqe.xxx_flags().ifPresentOrElse(
537                 u32 -> io_uring_sqe.open_flags(slot, u32),
538                 // xxx_flags not present, poll_events may be
539                 () -> sqe.poll_events().ifPresent(
540                     u16 -> io_uring_sqe.poll_events(slot, (short)u16)));
541 
542             io_uring_sqe.flags(slot, (byte)sqe.flags());
543             io_uring_sqe.addr(slot, sqe.addr()
544                         .orElse(MemorySegment.NULL).address());
545             io_uring_sqe.addr2(slot, sqe.addr2()
546                         .orElse(MemorySegment.NULL).address());
547             io_uring_sqe.buf_index(slot, (short)sqe.buf_index().orElse(0));
548             io_uring_sqe.off(slot, sqe.off().orElse(0L));
549             io_uring_sqe.len(slot, sqe.len().orElse(0));
550             // Populate the tail slot
551             ringSeg.setAtIndex(ValueLayout.JAVA_INT, tailIndex, tailIndex);
552             //Util.print(slot, "SQE");
553             setTail(++tailVal);
554         }
555     }
556 
557     final class CompletionQueue extends QueueImplBase {
558         CompletionQueue(MemorySegment ringSeg, MemorySegment head,
559                         MemorySegment tail, int mask) {
560             super(ringSeg, head, tail, mask, io_uring_cqe.$LAYOUT());
561         }
562 
563         public Cqe pollHead() {
564             int headVal = getHead(false);
565             Cqe res = null;
566             if (headVal != getTail(true)) {
567                 int index = headVal & ringMask;
568                 int offset = index * ringLayoutSize;
569                 MemorySegment seg = ringSeg.asSlice(offset,
570                         ringLayoutSize, ringLayoutAlignment);
571                 res = new Cqe(
572                         io_uring_cqe.user_data(seg),
573                         io_uring_cqe.res(seg),
574                         io_uring_cqe.flags(seg));
575                 headVal++;
576             }
577             setHead(headVal);
578             return res;
579         }
580     };
581 
582     /**
583      * Adds the given fd to this ring's epoll(7) instance
584      * and creates the epoll instance if it hasn't already been created
585      *
586      * If using the EPOLLONESHOT mode (in flags) the opaque field
587      * can be used to return the "id" of the specific operation that was
588      * kicked off.
589      *
590      * @param fd target fd to manage
591      * @param poll_events bit mask of events to activate
592      * @param opaque a 64 bit value to return with event notifications.
593      *               A value of -1L is ignored.
594      * @throws IOException
595      * @throws InterruptedException
596      */
597     public void epoll_add(int fd, int poll_events, long opaque)
598             throws IOException, InterruptedException {
599         epoll_op(fd, poll_events, opaque, EPOLL_CTL_ADD());
600     }
601 
602     public void epoll_del(int fd, int poll_events)
603             throws IOException, InterruptedException {
604         epoll_op(fd, poll_events, -1L, EPOLL_CTL_DEL());
605     }
606 
607     public void epoll_mod(int fd, int poll_events, long opaque)
608             throws IOException, InterruptedException {
609         epoll_op(fd, poll_events, opaque, EPOLL_CTL_DEL());
610     }
611 
612     private void epoll_op(int fd, int poll_events, long opaque, int op)
613             throws IOException, InterruptedException {
614         if (this.epollfd == -1) {
615             this.epollfd = initEpoll();
616         }
617 
618         MemorySegment targetfd =
619             arena.allocateFrom(ValueLayout.OfInt.JAVA_INT, fd);
620 
621         Sqe request = new Sqe()
622                 .opcode(IORING_OP_EPOLL_CTL())
623                 .fd(epollfd)
624                 .addr(targetfd)
625                 .xxx_flags(poll_events)
626                 .len(op);
627 
628         if (opaque != -1L) {
629             MemorySegment event = arena.allocate(epoll_event.$LAYOUT());
630             epoll_event.events(event, poll_events);
631             var dataSlice = epoll_event.data(event);
632             epoll_data_t.u64(dataSlice, opaque);
633             request = request.off(event.address());
634         }
635         submit(request);
636     }
637 
638     static MemorySegment getSegmentFor(MemoryLayout layout) {
639         return arena.allocate(layout.byteSize(), layout.byteAlignment())
640                     .fill((byte)0);
641     }
642 
643     static String errorString(int errno) {
644         errno = -errno;
645         return "Error: " + strerror(errno);
646     }
647 
648     // This is obsolete. There is a better way of doing a timed
649     // poll by providing a timeval to io_uring_enter
650     public Sqe getTimeoutSqe(Duration maxwait, int opcode, int completionCount) {
651         MemorySegment seg =
652             arena.allocate(__kernel_timespec.$LAYOUT()).fill((byte)(0));
653 
654         __kernel_timespec.tv_sec(seg, maxwait.getSeconds());
655         __kernel_timespec.tv_nsec(seg, maxwait.getNano());
656         return new Sqe()
657                 .opcode(opcode)
658                 .addr(seg)
659                 .xxx_flags(0)  // timeout_flags
660                 .off(completionCount)
661                 .len(1);
662     }
663 
664     private final static ValueLayout POINTER =
665         ValueLayout.ADDRESS.withTargetLayout(
666             MemoryLayout.sequenceLayout(Long.MAX_VALUE, JAVA_BYTE)
667     );
668 
669     private static final MethodHandle mmap_fn = locateStdHandle(
670         "mmap", FunctionDescriptor.of(
671                 POINTER,
672                 //ValueLayout.JAVA_LONG, // returned address
673                 ValueLayout.JAVA_LONG, // input address, usually zero
674                 ValueLayout.JAVA_LONG, // size_t
675                 ValueLayout.JAVA_INT, // int prot (PROT_READ | PROT_WRITE)
676                 ValueLayout.JAVA_INT, // int flags (MAP_SHARED|MAP_POPULATE)
677                 ValueLayout.JAVA_INT, // int fd
678                 ValueLayout.JAVA_LONG // off_t (64bit?)
679         )
680     );
681 
682     private static final MethodHandle epoll_create_fn = locateStdHandle(
683         "epoll_create", FunctionDescriptor.of(
684                 ValueLayout.JAVA_INT, // returned fd
685                 ValueLayout.JAVA_INT // int size (ignored)
686         ), SystemCallContext.errnoLinkerOption()
687     );
688 
689     private static final MethodHandle close_fn = locateStdHandle(
690         "close",
691         FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.JAVA_INT),
692         SystemCallContext.errnoLinkerOption()
693     );
694 
695     private static final MethodHandle eventfd_fn = locateStdHandle(
696         "eventfd",
697         FunctionDescriptor.of(
698             ValueLayout.JAVA_INT,
699             ValueLayout.JAVA_INT,
700             ValueLayout.JAVA_INT),
701         SystemCallContext.errnoLinkerOption()
702     );
703 
704     // Linux syscall numbers. Allows to invoke the system call
705     // directly in systems where there are no wrappers
706     // for these functions in libc or liburing.
707     // Also means we no longer use liburing
708 
709     private static final int NR_io_uring_setup = 425;
710     private static final int NR_io_uring_enter = 426;
711     private static final int NR_io_uring_register = 427;
712 
713     private static final MethodHandle setup_fn = locateStdHandle(
714         "syscall", FunctionDescriptor.of(
715                 ValueLayout.JAVA_INT,
716                 ValueLayout.JAVA_INT,
717                 ValueLayout.JAVA_INT,
718                 ValueLayout.ADDRESS)
719     );
720 
721     private static final MethodHandle enter_fn = locateStdHandle(
722         "syscall", FunctionDescriptor.of(ValueLayout.JAVA_INT,
723                 ValueLayout.JAVA_INT,
724                 ValueLayout.JAVA_INT,
725                 ValueLayout.JAVA_INT,
726                 ValueLayout.JAVA_INT,
727                 ValueLayout.JAVA_INT,
728                 ValueLayout.ADDRESS) // sigset_t UNUSED for now
729     );
730 
731     // io_uring_register specifically for
732     // IORING_REGISTER_EVENTFD and IORING_UNREGISTER_EVENTFD
733     private static final MethodHandle evregister_fn = locateStdHandle(
734             "syscall",
735             FunctionDescriptor.of(ValueLayout.JAVA_INT,  // result
736                     ValueLayout.JAVA_INT, // syscall
737                     ValueLayout.JAVA_INT, // ring fd
738                     ValueLayout.JAVA_INT, // opcode
739                     INT_POINTER,          // pointer to fd
740                     ValueLayout.JAVA_INT),// integer value 1
741             SystemCallContext.errnoLinkerOption()
742     );
743 
744     // mmap constants used internally
745     private static final int PROT_READ = 1;
746     private static final int PROT_WRITE = 2;
747     private static final int MAP_SHARED = 1;
748     private static final int MAP_POPULATE = 0x8000;
749 
750     /**
751      * offset (when mapping IOURING segments) must be one of:
752      *      jdk.internal.ffi.generated.iouring.iouring_h.IORING_OFF_SQ_RING()
753      *      jdk.internal.ffi.generated.iouring.iouring_h.IORING_OFF_CQ_RING()
754      *      jdk.internal.ffi.generated.iouring.iouring_h.IORING_OFF_SQES()
755      *
756      * @param size
757      * @param fd
758      * @param offset
759      * @return
760      */
761     private static MemorySegment mmap(long size, int fd, long offset) {
762         MemorySegment seg = null;
763         try {
764             seg = (MemorySegment)mmap_fn
765                     .invokeExact(0L, size,
766                             PROT_READ | PROT_WRITE,
767                             MAP_SHARED | MAP_POPULATE,
768                             fd,
769                             offset
770                     );
771         } catch (Throwable e) {
772             throw new RuntimeException(e);
773         }
774         long addr = seg.address();
775         return seg.reinterpret(size);
776     }
777 
778     int ringFd() {
779         return fd;
780     }
781 }