1 /*
  2  * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 
 26 package sun.nio.ch.iouring;
 27 
 28 import jdk.internal.ffi.generated.iouring.*;
 29 
 30 import java.io.IOException;
 31 import java.lang.foreign.*;
 32 import java.lang.invoke.MethodHandle;
 33 import java.lang.invoke.VarHandle;
 34 import java.nio.ByteBuffer;
 35 import java.time.Duration;
 36 
 37 import static java.lang.foreign.ValueLayout.JAVA_BYTE;
 38 import static sun.nio.ch.iouring.Util.strerror;
 39 import static sun.nio.ch.iouring.Util.locateHandleFromLib;
 40 import static sun.nio.ch.iouring.Util.locateStdHandle;
 41 import static sun.nio.ch.iouring.Util.INT_POINTER;
 42 import static jdk.internal.ffi.generated.iouring.iouring_h.*;
 43 import static jdk.internal.ffi.generated.iouring.iouring_h_1.IORING_REGISTER_EVENTFD;
 44 import static jdk.internal.ffi.generated.iouring.iouring_h_1.IORING_UNREGISTER_EVENTFD;
 45 
 46 /**
 47  * Low level interface to a Linux io_uring. It provides an asynchronous
 48  * interface. Requests are submitted through the {@link #submit(Sqe)} method.
 49  * Completion events can be awaited by calling {@link #enter(int, int, int)}.
 50  * Completions represented by {@link Cqe} are then obtained by calling
 51  * {@link #pollCompletion()}. Completions are linked to submissions by the
 52  * {@link Cqe#user_data()} field of the {@code Cqe} which contains the
 53  * same 64-bit (long) value that was supplied in the submitted {@link Sqe}.
 54  * <p>
 55  * Some IOUringImpl operations work with kernel registered direct ByteBuffers.
 56  * When creating an IOUringImpl instance, a number of these buffers can be
 57  * created in a pool. Registered buffers are not used with regular
 58  * IOUringImpl read/write operations.
 59  */
 60 @SuppressWarnings("restricted")
 61 public class IOUringImpl {
 62     private static final Arena arena = Arena.ofAuto();
 63 
 64     private static final boolean TRACE = System
 65             .getProperty("sun.nio.ch.iouring.trace", "false")
 66             .equalsIgnoreCase("true");
 67     private final SubmissionQueue sq;
 68     private final CompletionQueue cq;
 69     private final int fd;               // The ringfd
 70     private int epollfd = -1;           // The epoll(7) if set
 71     private static final int INT_SIZE = (int)ValueLayout.JAVA_INT.byteSize();
 72 
 73     private final Arena autoArena = Arena.ofAuto();
 74 
 75     private final KMappedBuffers mappedBuffers;
 76 
 77     /**
 78      * Creates an IOURing and initializes the ring structures. {@code entries}
 79      * (or the next higher power of 2) is the size of the Submission Queue.
 80      * Currently, the completion queue returned will be double the size
 81      * of the Submission queue.
 82      */
 83     public IOUringImpl(int entries) throws IOException {
 84         this(entries, 0, 0, -1);
 85     }
 86 
 87     /**
 88      * Creates an IOURing and initializes the ring structures.
 89      * @param sq_entries the number of submission queue entries to allocate
 90      * @param cq_entries the number of completion queue entries to allocate
 91      * @throws IOException if an IOException occurs
 92      */
 93     public IOUringImpl(int sq_entries, int cq_entries) throws IOException {
 94         this(sq_entries, cq_entries, 0, -1);
 95     }
 96 
 97     /**
 98      * Creates an IOURing initializes the ring structures and allocates a
 99      * number of direct {@link ByteBuffer}s which are additionally mapped
100      * into the kernel address space.
101      *
102      * @param sq_entries the number of submission queue entries to allocate
103      * @param cq_entries the number of completion queue entries to allocate
104      * @param nmappedBuffers number of mapped direct ByteBuffers to create
105      * @param mappedBufsize size of each buffer in bytes
106      * @throws IOException if an IOException occurs
107      */
108     public IOUringImpl(int sq_entries,
109                        int cq_entries,
110                        int nmappedBuffers,
111                        int mappedBufsize) throws IOException {
112         MemorySegment params_seg = getSegmentFor(io_uring_params.$LAYOUT());
113         if (cq_entries > 0) {
114             io_uring_params.cq_entries(params_seg, cq_entries);
115             int flags = io_uring_params.flags(params_seg) | IORING_SETUP_CQSIZE();
116             io_uring_params.flags(params_seg, flags);
117         }
118 
119         // call setup
120         fd = io_uring_setup(sq_entries, params_seg);
121         if (fd < 0) {
122             throw new IOException(errorString(fd));
123         }
124 
125         mappedBuffers = new KMappedBuffers(nmappedBuffers, mappedBufsize);
126         if (nmappedBuffers > 0) {
127             mappedBuffers.register(fd);
128         }
129         // Offsets segments
130         MemorySegment cq_off_seg = io_uring_params.cq_off(params_seg);
131         MemorySegment sq_off_seg = io_uring_params.sq_off(params_seg);
132 
133         // Offsets to cqe array and the sqe index array
134         int cq_off_cqes = io_cqring_offsets.cqes(cq_off_seg);
135         int sq_off_array = io_sqring_offsets.array(sq_off_seg);
136 
137         // Acual number of entries in each Q
138         sq_entries = io_uring_params.sq_entries(params_seg);
139         cq_entries = io_uring_params.cq_entries(params_seg);
140 
141         int sq_size = sq_off_array + sq_entries * INT_SIZE;
142         int cq_size = cq_off_cqes + cq_entries * (int)io_uring_cqe.sizeof();
143 
144         boolean singleMmap = (io_uring_params.features(params_seg)
145                 & IORING_FEAT_SINGLE_MMAP()) != 0;
146 
147         if (singleMmap) {
148             if (cq_size > sq_size)
149                 sq_size = cq_size;
150             cq_size = sq_size;
151         }
152         var sqe_seg = mmap(sq_size, fd, IORING_OFF_SQ_RING());
153 
154         MemorySegment cqes_seg;
155         if (singleMmap) {
156             cqes_seg = sqe_seg;
157         } else {
158             cqes_seg = mmap(cq_size, fd, IORING_OFF_CQ_RING());
159         }
160 
161         // Masks
162         int sq_mask = sqe_seg.get(ValueLayout.JAVA_INT,
163                                   io_sqring_offsets.ring_mask(sq_off_seg));
164         int cq_mask = cqes_seg.get(ValueLayout.JAVA_INT,
165                                    io_cqring_offsets.ring_mask(cq_off_seg));
166 
167         var sqes = mmap(sq_entries * io_uring_sqe.sizeof(),
168                         fd, IORING_OFF_SQES());
169 
170         cq = new CompletionQueue(cqes_seg.asSlice(cq_off_cqes),
171                 cqes_seg.asSlice(io_cqring_offsets.head(cq_off_seg)),
172                 cqes_seg.asSlice(io_cqring_offsets.tail(cq_off_seg)),
173                 cq_mask);
174 
175         sq = new SubmissionQueue(sqe_seg.asSlice(sq_off_array),
176                 cqes_seg.asSlice(io_cqring_offsets.head(sq_off_seg)),
177                 cqes_seg.asSlice(io_cqring_offsets.tail(sq_off_seg)),
178                 sq_mask,
179                 sqes);
180         if (TRACE)
181             System.out.printf("IOUringImpl: ringfd: %d\n", fd);
182     }
183 
184 
185     public void close() throws IOException {
186         int ret;
187         SystemCallContext ctx = SystemCallContext.get();
188         try {
189             ret = (int)close_fn.invokeExact(ctx.errnoCaptureSegment(),
190                                             ringFd());
191         } catch (Throwable e) {
192             throw new RuntimeException(e);
193         }
194         ctx.throwIOExceptionOnError(ret);
195 
196     }
197 
198     public int eventfd() throws IOException {
199         int ret;
200         SystemCallContext ctx = SystemCallContext.get();
201         try {
202             ret = (int)eventfd_fn.invokeExact(ctx.errnoCaptureSegment(),
203                                             0, 0);
204         } catch (Throwable e) {
205             throw new RuntimeException(e);
206         }
207         ctx.throwIOExceptionOnError(ret);
208         return ret;
209     }
210 
211     private int initEpoll() throws IOException {
212         int ret;
213         SystemCallContext ctx = SystemCallContext.get();
214         try {
215             ret = (int)epoll_create_fn.invokeExact(ctx.errnoCaptureSegment(),
216                                                    ringFd(), 1);
217         } catch (Throwable e) {
218             throw new RuntimeException(e);
219         }
220         ctx.throwIOExceptionOnError(ret);
221         return ret;
222     }
223 
224     public void register_eventfd(int efd) throws IOException {
225         int ret;
226         SystemCallContext ctx = SystemCallContext.get();
227         MemorySegment fdseg =
228             arena.allocateFrom(ValueLayout.JAVA_INT, efd);
229 
230         try {
231             ret = (int)evregister_fn
232                     .invokeExact(
233                             ctx.errnoCaptureSegment(),
234                             NR_io_uring_register,
235                             fd, IORING_REGISTER_EVENTFD(),
236                             fdseg, 1
237                     );
238         } catch (Throwable e) {
239             throw new RuntimeException(e);
240         }
241         ctx.throwIOExceptionOnError(ret);
242     }
243 
244     public void unregister_eventfd() throws IOException {
245         int ret;
246         SystemCallContext ctx = SystemCallContext.get();
247 
248         try {
249             ret = (int)evregister_fn
250                     .invokeExact(
251                             ctx.errnoCaptureSegment(),
252                             NR_io_uring_register,
253                             fd, IORING_UNREGISTER_EVENTFD(),
254                             MemorySegment.NULL, 0
255                     );
256         } catch (Throwable e) {
257             throw new RuntimeException(e);
258         }
259         ctx.throwIOExceptionOnError(ret);
260 
261     }
262 
263     /**
264      * Asynchronously submits an Sqe to this IOUringImpl. Can be called
265      * multiple times before enter().
266      *
267      * @param sqe
268      * @throws IOException if submission q full
269      */
270     public void submit(Sqe sqe) throws IOException {
271         sq.submit(sqe);
272         if (TRACE)
273             System.out.printf("submit: %s \n", sqe);
274     }
275 
276     /**
277      * Notifies the kernel of entries on the Submission Q and waits for a
278      * number of responses (completion events). If this returns normally
279      * with value {@code n > 0}, this means that n requests have been accepted
280      * by the kernel. A normal return also means that the requested number of
281      * completion events have been received {@link #pollCompletion()} can be
282      * called {@code nreceive} times to obtain the results.
283      *
284      * @param nsubmit number of requests to submit
285      * @param nreceive block until this number of events received
286      * @param flags flags to pass to io_uring_enter
287      *
288      * @return if return value less than 0 means an error occurred. Otherwise,
289      *         the number of Sqes successfully submitted.
290      */
291     public int enter(int nsubmit, int nreceive, int flags) throws IOException {
292         if (nreceive > 0) {
293             flags |= IORING_ENTER_GETEVENTS();
294         }
295         return io_uring_enter(this.fd, nsubmit, nreceive, flags);
296     }
297 
298     /**
299      * Returns the allocated size of the Submission Q. If the requested size
300      * was not a power of 2, then the allocated size will be the next highest
301      * power of 2.
302      *
303      * @return
304      */
305     public int sqsize() {
306         return sq.ringSize;
307     }
308 
309     /**
310      * Returns the number of free entries in the Submission Q
311      */
312     public int sqfree() {
313         return sq.nUsed();
314     }
315 
316     /**
317      * Returns whether the completion Q is empty or not.
318      *
319      * @return
320      */
321     public boolean cqempty() {
322         return cq.nEntries() == 0;
323     }
324 
325     /**
326      * Returns the allocated size of the Completion Q.
327      * Currently, double the size of the Submission Q
328      *
329      * @return
330      */
331     public int cqsize() {
332         return cq.ringSize;
333     }
334 
335     public int epoll_fd() {
336         return epollfd;
337     }
338 
339     /**
340      * Polls the Completion Queue for results.
341      *
342      * @return a Cqe if available or {@code null}
343      */
344     public Cqe pollCompletion() {
345         Cqe cqe = cq.pollHead();
346         if (TRACE)
347             System.out.printf("pollCompletion: -> %s\n", cqe);
348         return cqe;
349     }
350 
351     /**
352      * Returns a String description of the given errno value
353      *
354      * @param errno
355      * @return
356      */
357     public static String strerror(int errno) {
358         return Util.strerror(errno);
359     }
360 
361     private static int io_uring_setup(int entries, MemorySegment params)
362             throws IOException {
363         try {
364             return (int) setup_fn.invokeExact(NR_io_uring_setup,
365                                               entries, params);
366         } catch (Throwable t) {
367             throw ioexception(t);
368         }
369     }
370 
371     private static int io_uring_enter(int fd, int to_submit, int min_complete,
372                                       int flags) throws IOException {
373         try {
374             return (int) enter_fn.invokeExact(NR_io_uring_enter,
375                     fd, to_submit, min_complete, flags, MemorySegment.NULL);
376         } catch (Throwable t) {
377             throw ioexception(t);
378         }
379     }
380 
381     static IOException ioexception(Throwable t) {
382         if (t instanceof IOException ioe) {
383             return ioe;
384         } else {
385             return new IOException(t);
386         }
387     }
388 
389     int checkAndGetIndexFor(ByteBuffer buffer) {
390         return mappedBuffers.checkAndGetIndexForBuffer(buffer);
391     }
392 
393     /**
394      * Returns a mapped direct ByteBuffer or {@code null} if none available.
395      * Mapped buffers must be used with some IOUringImpl operations such as
396      * {@code IORING_OP_WRITE_FIXED} and {@code IORING_OP_READ_FIXED}.
397      * Buffers must be returned after use with
398      * {@link #returnRegisteredBuffer(ByteBuffer)}.
399      *
400      * @return
401      */
402     public ByteBuffer getRegisteredBuffer() {
403         return mappedBuffers.getRegisteredBuffer();
404     }
405 
406     /**
407      * Returns a previously allocated registered buffer.
408      *
409      * @param buffer
410      */
411     public void returnRegisteredBuffer(ByteBuffer buffer) {
412         mappedBuffers.returnRegisteredBuffer(buffer);
413     }
414 
415     /**
416      * Common capabilities of SubmissionQueue and CompletionQueue
417      */
418     sealed class QueueImplBase permits SubmissionQueue, CompletionQueue {
419         protected final MemorySegment ringSeg;
420         private final MemorySegment head, tail;
421         protected final int ringMask;
422         protected final MemoryLayout ringLayout;
423         protected final int ringLayoutSize;
424         protected final int ringLayoutAlignment;
425         protected final int ringSize;
426 
427         // For accessing head and tail as volatile
428         protected final VarHandle addrH;
429 
430         /**
431          *
432          * @param ringSeg The mapped segment
433          * @param head The head pointer
434          * @param tail The tail pointer
435          * @param ringMask
436          * @param ringLayout
437          */
438         QueueImplBase(MemorySegment ringSeg, MemorySegment head,
439                       MemorySegment tail, int ringMask,
440                       MemoryLayout ringLayout) {
441             this.ringSeg = ringSeg;
442             this.head = head;
443             this.tail = tail;
444             this.ringMask = ringMask;
445             this.ringSize = ringMask + 1;
446             this.ringLayout = ringLayout;
447             this.ringLayoutSize = (int)ringLayout.byteSize();
448             this.ringLayoutAlignment = (int)ringLayout.byteAlignment();
449             this.addrH = ValueLayout.JAVA_INT.varHandle();
450         }
451 
452         int nEntries() {
453             int n = Math.abs(getTail(false) - getHead(false));
454             return n;
455         }
456 
457         boolean ringFull() {
458             return nEntries() == ringSize;
459         }
460 
461         int nUsed() {
462             return ringSize - nEntries();
463         }
464         protected int getHead(boolean withAcquire) {
465             int val = (int)(withAcquire
466                 ? addrH.getAcquire(head, 0) : addrH.get(head, 0));
467             return val;
468         }
469 
470         protected int getTail(boolean withAcquire) {
471             int val = (int)(withAcquire
472                 ? addrH.getAcquire(tail, 0L) : addrH.get(tail, 0L));
473             return val;
474         }
475 
476         // Used by CompletionQueue
477         protected void setHead(int val) {
478             addrH.setRelease(head, 0L, val);
479         }
480 
481         // Used by SubmissionQueue
482         protected void setTail(int val) {
483             addrH.setRelease(tail, 0L, val);
484         }
485     }
486 
487     final class SubmissionQueue extends QueueImplBase {
488         final MemorySegment sqes;
489         final int n_sqes;
490         static final int sqe_layout_size =
491             (int)io_uring_sqe.$LAYOUT().byteSize();
492 
493         static final int sqe_alignment =
494             (int)io_uring_sqe.$LAYOUT().byteAlignment();
495 
496         SubmissionQueue(MemorySegment ringSeg, MemorySegment head,
497                         MemorySegment tail, int mask, MemorySegment sqes) {
498             super(ringSeg, head, tail, mask, ValueLayout.JAVA_INT);
499             this.sqes = sqes;
500             this.n_sqes = (int) (sqes.byteSize() / sqe_layout_size);
501         }
502 
503         /**
504          * Submits an Sqe to Submission Q.
505          * @param sqe
506          * @throws IOException if Q full
507          */
508         public void submit(Sqe sqe) throws IOException {
509             if (ringFull()) {
510                 throw new IOException("Submission Queue full");
511             }
512 
513             int tailVal = getTail(false);
514             int tailIndex = tailVal & ringMask;
515 
516             MemorySegment slot = sqes.asSlice(
517                     (long) tailIndex * sqe_layout_size,
518                     sqe_layout_size, sqe_alignment).fill((byte)0);
519             if (slot == null)
520                 throw new IOException("Q full"); // shouldn't happen
521             // Populate the slot as an io_uring_sqe
522             // Note. Sqe has already validated that overlapping fields not set
523             io_uring_sqe.user_data(slot, sqe.user_data());
524             io_uring_sqe.fd(slot, sqe.fd());
525             io_uring_sqe.opcode(slot, (byte)sqe.opcode());
526             // This statement handles the large flags union
527             // For simplicity all __u32 variants are handled
528             // as xxx_flags. poll_events (__u16) are special
529             sqe.xxx_flags().ifPresentOrElse(
530                 u32 -> io_uring_sqe.open_flags(slot, u32),
531                 // xxx_flags not present, poll_events may be
532                 () -> sqe.poll_events().ifPresent(
533                     u16 -> io_uring_sqe.poll_events(slot, (short)u16)));
534 
535             io_uring_sqe.flags(slot, (byte)sqe.flags());
536             io_uring_sqe.addr(slot, sqe.addr()
537                         .orElse(MemorySegment.NULL).address());
538             io_uring_sqe.addr2(slot, sqe.addr2()
539                         .orElse(MemorySegment.NULL).address());
540             io_uring_sqe.buf_index(slot, (short)sqe.buf_index().orElse(0));
541             io_uring_sqe.off(slot, sqe.off().orElse(0L));
542             io_uring_sqe.len(slot, sqe.len().orElse(0));
543             // Populate the tail slot
544             ringSeg.setAtIndex(ValueLayout.JAVA_INT, tailIndex, tailIndex);
545             //Util.print(slot, "SQE");
546             setTail(++tailVal);
547         }
548     }
549 
550     final class CompletionQueue extends QueueImplBase {
551         CompletionQueue(MemorySegment ringSeg, MemorySegment head,
552                         MemorySegment tail, int mask) {
553             super(ringSeg, head, tail, mask, io_uring_cqe.$LAYOUT());
554         }
555 
556         public Cqe pollHead() {
557             int headVal = getHead(false);
558             Cqe res = null;
559             if (headVal != getTail(true)) {
560                 int index = headVal & ringMask;
561                 int offset = index * ringLayoutSize;
562                 MemorySegment seg = ringSeg.asSlice(offset,
563                         ringLayoutSize, ringLayoutAlignment);
564                 res = new Cqe(
565                         io_uring_cqe.user_data(seg),
566                         io_uring_cqe.res(seg),
567                         io_uring_cqe.flags(seg));
568                 headVal++;
569             }
570             setHead(headVal);
571             return res;
572         }
573     };
574 
575     /**
576      * Adds the given fd to this ring's epoll(7) instance
577      * and creates the epoll instance if it hasn't already been created
578      *
579      * If using the EPOLLONESHOT mode (in flags) the opaque field
580      * can be used to return the "id" of the specific operation that was
581      * kicked off.
582      *
583      * @param fd target fd to manage
584      * @param poll_events bit mask of events to activate
585      * @param opaque a 64 bit value to return with event notifications.
586      *               A value of -1L is ignored.
587      * @throws IOException
588      * @throws InterruptedException
589      */
590     public void epoll_add(int fd, int poll_events, long opaque)
591             throws IOException, InterruptedException {
592         epoll_op(fd, poll_events, opaque, EPOLL_CTL_ADD());
593     }
594 
595     public void epoll_del(int fd, int poll_events)
596             throws IOException, InterruptedException {
597         epoll_op(fd, poll_events, -1L, EPOLL_CTL_DEL());
598     }
599 
600     public void epoll_mod(int fd, int poll_events, long opaque)
601             throws IOException, InterruptedException {
602         epoll_op(fd, poll_events, opaque, EPOLL_CTL_DEL());
603     }
604 
605     private void epoll_op(int fd, int poll_events, long opaque, int op)
606             throws IOException, InterruptedException {
607         if (this.epollfd == -1) {
608             this.epollfd = initEpoll();
609         }
610 
611         MemorySegment targetfd =
612             arena.allocateFrom(ValueLayout.OfInt.JAVA_INT, fd);
613 
614         Sqe request = new Sqe()
615                 .opcode(IORING_OP_EPOLL_CTL())
616                 .fd(epollfd)
617                 .addr(targetfd)
618                 .xxx_flags(poll_events)
619                 .len(op);
620 
621         if (opaque != -1L) {
622             MemorySegment event = arena.allocate(epoll_event.$LAYOUT());
623             epoll_event.events(event, poll_events);
624             var dataSlice = epoll_event.data(event);
625             epoll_data_t.u64(dataSlice, opaque);
626             request = request.off(event.address());
627         }
628         submit(request);
629     }
630 
631     static MemorySegment getSegmentFor(MemoryLayout layout) {
632         return arena.allocate(layout.byteSize(), layout.byteAlignment())
633                     .fill((byte)0);
634     }
635 
636     static String errorString(int errno) {
637         errno = -errno;
638         return "Error: " + strerror(errno);
639     }
640 
641     // This is obsolete. There is a better way of doing a timed
642     // poll by providing a timeval to io_uring_enter
643     public Sqe getTimeoutSqe(Duration maxwait, int opcode, int completionCount) {
644         MemorySegment seg =
645             arena.allocate(__kernel_timespec.$LAYOUT()).fill((byte)(0));
646 
647         __kernel_timespec.tv_sec(seg, maxwait.getSeconds());
648         __kernel_timespec.tv_nsec(seg, maxwait.getNano());
649         return new Sqe()
650                 .opcode(opcode)
651                 .addr(seg)
652                 .xxx_flags(0)  // timeout_flags
653                 .off(completionCount)
654                 .len(1);
655     }
656 
657     private final static ValueLayout POINTER =
658         ValueLayout.ADDRESS.withTargetLayout(
659             MemoryLayout.sequenceLayout(Long.MAX_VALUE, JAVA_BYTE)
660     );
661 
662     private static final MethodHandle mmap_fn = locateStdHandle(
663         "mmap", FunctionDescriptor.of(
664                 POINTER,
665                 //ValueLayout.JAVA_LONG, // returned address
666                 ValueLayout.JAVA_LONG, // input address, usually zero
667                 ValueLayout.JAVA_LONG, // size_t
668                 ValueLayout.JAVA_INT, // int prot (PROT_READ | PROT_WRITE)
669                 ValueLayout.JAVA_INT, // int flags (MAP_SHARED|MAP_POPULATE)
670                 ValueLayout.JAVA_INT, // int fd
671                 ValueLayout.JAVA_LONG // off_t (64bit?)
672         )
673     );
674 
675     private static final MethodHandle epoll_create_fn = locateStdHandle(
676         "epoll_create", FunctionDescriptor.of(
677                 ValueLayout.JAVA_INT, // returned fd
678                 ValueLayout.JAVA_INT // int size (ignored)
679         ), SystemCallContext.errnoLinkerOption()
680     );
681 
682     private static final MethodHandle close_fn = locateStdHandle(
683         "close",
684         FunctionDescriptor.of(ValueLayout.JAVA_INT, ValueLayout.JAVA_INT),
685         SystemCallContext.errnoLinkerOption()
686     );
687 
688     private static final MethodHandle eventfd_fn = locateStdHandle(
689         "eventfd",
690         FunctionDescriptor.of(
691             ValueLayout.JAVA_INT,
692             ValueLayout.JAVA_INT,
693             ValueLayout.JAVA_INT),
694         SystemCallContext.errnoLinkerOption()
695     );
696 
697     // Linux syscall numbers. Allows to invoke the system call
698     // directly in systems where there are no wrappers
699     // for these functions in libc or liburing.
700     // Also means we no longer use liburing
701 
702     private static final int NR_io_uring_setup = 425;
703     private static final int NR_io_uring_enter = 426;
704     private static final int NR_io_uring_register = 427;
705 
706     private static final MethodHandle setup_fn = locateStdHandle(
707         "syscall", FunctionDescriptor.of(
708                 ValueLayout.JAVA_INT,
709                 ValueLayout.JAVA_INT,
710                 ValueLayout.JAVA_INT,
711                 ValueLayout.ADDRESS)
712     );
713 
714     private static final MethodHandle enter_fn = locateStdHandle(
715         "syscall", FunctionDescriptor.of(ValueLayout.JAVA_INT,
716                 ValueLayout.JAVA_INT,
717                 ValueLayout.JAVA_INT,
718                 ValueLayout.JAVA_INT,
719                 ValueLayout.JAVA_INT,
720                 ValueLayout.JAVA_INT,
721                 ValueLayout.ADDRESS) // sigset_t UNUSED for now
722     );
723 
724     // io_uring_register specifically for
725     // IORING_REGISTER_EVENTFD and IORING_UNREGISTER_EVENTFD
726     private static final MethodHandle evregister_fn = locateStdHandle(
727             "syscall",
728             FunctionDescriptor.of(ValueLayout.JAVA_INT,  // result
729                     ValueLayout.JAVA_INT, // syscall
730                     ValueLayout.JAVA_INT, // ring fd
731                     ValueLayout.JAVA_INT, // opcode
732                     INT_POINTER,          // pointer to fd
733                     ValueLayout.JAVA_INT),// integer value 1
734             SystemCallContext.errnoLinkerOption()
735     );
736 
737     // mmap constants used internally
738     private static final int PROT_READ = 1;
739     private static final int PROT_WRITE = 2;
740     private static final int MAP_SHARED = 1;
741     private static final int MAP_POPULATE = 0x8000;
742 
743     /**
744      * offset (when mapping IOURING segments) must be one of:
745      *      jdk.internal.ffi.generated.iouring.iouring_h.IORING_OFF_SQ_RING()
746      *      jdk.internal.ffi.generated.iouring.iouring_h.IORING_OFF_CQ_RING()
747      *      jdk.internal.ffi.generated.iouring.iouring_h.IORING_OFF_SQES()
748      *
749      * @param size
750      * @param fd
751      * @param offset
752      * @return
753      */
754     private static MemorySegment mmap(long size, int fd, long offset) {
755         MemorySegment seg = null;
756         try {
757             seg = (MemorySegment)mmap_fn
758                     .invokeExact(0L, size,
759                             PROT_READ | PROT_WRITE,
760                             MAP_SHARED | MAP_POPULATE,
761                             fd,
762                             offset
763                     );
764         } catch (Throwable e) {
765             throw new RuntimeException(e);
766         }
767         long addr = seg.address();
768         return seg.reinterpret(size);
769     }
770 
771     int ringFd() {
772         return fd;
773     }
774 }