1 /* 2 * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 package org.openjdk.bench.java.lang.foreign; 24 25 import java.lang.foreign.*; 26 27 import org.openjdk.jmh.annotations.Benchmark; 28 import org.openjdk.jmh.annotations.BenchmarkMode; 29 import org.openjdk.jmh.annotations.Fork; 30 import org.openjdk.jmh.annotations.Measurement; 31 import org.openjdk.jmh.annotations.Mode; 32 import org.openjdk.jmh.annotations.OutputTimeUnit; 33 import org.openjdk.jmh.annotations.Setup; 34 import org.openjdk.jmh.annotations.State; 35 import org.openjdk.jmh.annotations.TearDown; 36 import org.openjdk.jmh.annotations.Warmup; 37 import sun.misc.Unsafe; 38 39 import java.nio.ByteBuffer; 40 import java.nio.ByteOrder; 41 import java.util.concurrent.TimeUnit; 42 43 import static java.lang.foreign.ValueLayout.*; 44 45 @BenchmarkMode(Mode.AverageTime) 46 @Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS) 47 @Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) 48 @State(org.openjdk.jmh.annotations.Scope.Thread) 49 @OutputTimeUnit(TimeUnit.MILLISECONDS) 50 @Fork(3) 51 public class LoopOverNew extends JavaLayouts { 52 53 static final Unsafe unsafe = Utils.unsafe; 54 55 static final int ELEM_SIZE = 1_000_000; 56 static final int CARRIER_SIZE = (int)JAVA_INT.byteSize(); 57 static final int ALLOC_SIZE = ELEM_SIZE * CARRIER_SIZE; 58 static final MemoryLayout ALLOC_LAYOUT = MemoryLayout.sequenceLayout(ELEM_SIZE, JAVA_INT); 59 final Arena arena = Arena.ofConfined(); 60 final SegmentAllocator recyclingAlloc = SegmentAllocator.prefixAllocator(arena.allocate(ALLOC_LAYOUT)); 61 62 @TearDown 63 public void tearDown() throws Throwable { 64 arena.close(); 65 } 66 67 @Benchmark 68 public void unsafe_loop() { 69 long unsafe_addr = unsafe.allocateMemory(ALLOC_SIZE); 70 for (int i = 0; i < ELEM_SIZE; i++) { 71 unsafe.putInt(unsafe_addr + (i * CARRIER_SIZE) , i); 72 } 73 unsafe.freeMemory(unsafe_addr); 74 } 75 76 @Benchmark 77 public void segment_loop_confined() { 78 try (Arena arena = Arena.ofConfined()) { 79 MemorySegment segment = arena.allocate(ALLOC_SIZE, 4); 80 for (int i = 0; i < ELEM_SIZE; i++) { 81 VH_INT.set(segment, (long) i, i); 82 } 83 } 84 } 85 86 @Benchmark 87 public void segment_loop_shared() { 88 try (Arena arena = Arena.ofShared()) { 89 MemorySegment segment = arena.allocate(ALLOC_SIZE, 4); 90 for (int i = 0; i < ELEM_SIZE; i++) { 91 VH_INT.set(segment, (long) i, i); 92 } 93 } 94 } 95 96 @Benchmark 97 public void segment_loop_recycle() { 98 MemorySegment segment = recyclingAlloc.allocate(ALLOC_SIZE, 4); 99 for (int i = 0; i < ELEM_SIZE; i++) { 100 VH_INT.set(segment, (long) i, i); 101 } 102 } 103 104 @Benchmark 105 public void buffer_loop() { 106 ByteBuffer byteBuffer = ByteBuffer.allocateDirect(ALLOC_SIZE).order(ByteOrder.nativeOrder()); 107 for (int i = 0; i < ELEM_SIZE; i++) { 108 byteBuffer.putInt(i * CARRIER_SIZE , i); 109 } 110 unsafe.invokeCleaner(byteBuffer); 111 } 112 113 // hack to even out calls to System::gc, which allows us to compare how the implicit segment deallocation 114 // fares compared with ByteBuffer; if there's no call to System.gc() we end up comparing how well the two 115 // act under significant native memory pressure, and here the ByteBuffer API has more juice, since it features 116 // a complex exponential back off with multiple GC retries (see ByteBuffer::allocateDirect). Of course, we 117 // don't care about those cases with segments, as if clients need to allocate/free very frequently 118 // they should just use deterministic deallocation (with confined session) instead, which delivers much 119 // better performances anyway. 120 static byte gcCount = 0; 121 122 @Benchmark 123 public void buffer_loop_implicit() { 124 if (gcCount++ == 0) System.gc(); // GC when we overflow 125 ByteBuffer byteBuffer = ByteBuffer.allocateDirect(ALLOC_SIZE).order(ByteOrder.nativeOrder()); 126 for (int i = 0; i < ELEM_SIZE; i++) { 127 byteBuffer.putInt(i * CARRIER_SIZE , i); 128 } 129 } 130 131 @Benchmark 132 public void segment_loop_implicit() { 133 if (gcCount++ == 0) System.gc(); // GC when we overflow 134 Arena scope = Arena.ofAuto(); 135 MemorySegment segment = scope.allocate(ALLOC_SIZE, 4); 136 for (int i = 0; i < ELEM_SIZE; i++) { 137 VH_INT.set(segment, (long) i, i); 138 } 139 } 140 }