1 /*
  2  * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  */
 23 
 24 package org.openjdk.bench.jdk.incubator.foreign;
 25 
 26 import jdk.incubator.foreign.MemoryLayout;
 27 import jdk.incubator.foreign.ResourceScope;
 28 import jdk.incubator.foreign.SequenceLayout;
 29 import jdk.incubator.foreign.ValueLayout;
 30 import sun.misc.Unsafe;
 31 import org.openjdk.jmh.annotations.Benchmark;
 32 import org.openjdk.jmh.annotations.BenchmarkMode;
 33 import org.openjdk.jmh.annotations.Fork;
 34 import org.openjdk.jmh.annotations.Measurement;
 35 import org.openjdk.jmh.annotations.Mode;
 36 import org.openjdk.jmh.annotations.OutputTimeUnit;
 37 import org.openjdk.jmh.annotations.Setup;
 38 import org.openjdk.jmh.annotations.State;
 39 import org.openjdk.jmh.annotations.TearDown;
 40 import org.openjdk.jmh.annotations.Warmup;
 41 
 42 import jdk.incubator.foreign.MemorySegment;
 43 import java.lang.invoke.VarHandle;
 44 import java.util.LinkedList;
 45 import java.util.List;
 46 import java.util.Optional;
 47 import java.util.Spliterator;
 48 import java.util.concurrent.CountedCompleter;
 49 import java.util.concurrent.RecursiveTask;
 50 import java.util.concurrent.TimeUnit;
 51 import java.util.function.Predicate;
 52 import java.util.function.ToIntFunction;
 53 
 54 import static jdk.incubator.foreign.MemoryLayout.PathElement.sequenceElement;
 55 import static jdk.incubator.foreign.ValueLayout.JAVA_INT;
 56 
 57 @BenchmarkMode(Mode.AverageTime)
 58 @Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
 59 @Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
 60 @State(org.openjdk.jmh.annotations.Scope.Thread)
 61 @OutputTimeUnit(TimeUnit.MILLISECONDS)
 62 @Fork(value = 3, jvmArgsAppend = { "--add-modules=jdk.incubator.foreign" })
 63 public class ParallelSum {
 64 
 65     final static int CARRIER_SIZE = 4;
 66     final static int ALLOC_SIZE = CARRIER_SIZE * 1024 * 1024 * 256;
 67     final static int ELEM_SIZE = ALLOC_SIZE / CARRIER_SIZE;
 68     static final VarHandle VH_int = MemoryLayout.sequenceLayout(JAVA_INT).varHandle(sequenceElement());
 69 
 70     final static MemoryLayout ELEM_LAYOUT = ValueLayout.JAVA_INT;
 71     final static int BULK_FACTOR = 512;
 72     final static SequenceLayout ELEM_LAYOUT_BULK = MemoryLayout.sequenceLayout(BULK_FACTOR, ELEM_LAYOUT);
 73 
 74     static final Unsafe unsafe = Utils.unsafe;
 75 
 76     MemorySegment segment;
 77     long address;
 78 
 79     @Setup
 80     public void setup() {
 81         address = unsafe.allocateMemory(ALLOC_SIZE);
 82         for (int i = 0; i < ELEM_SIZE; i++) {
 83             unsafe.putInt(address + (i * CARRIER_SIZE), i);
 84         }
 85         segment = MemorySegment.allocateNative(ALLOC_SIZE, CARRIER_SIZE, ResourceScope.newSharedScope());
 86         for (int i = 0; i < ELEM_SIZE; i++) {
 87             VH_int.set(segment, (long) i, i);
 88         }
 89     }
 90 
 91     @TearDown
 92     public void tearDown() throws Throwable {
 93         unsafe.freeMemory(address);
 94         segment.scope().close();
 95     }
 96 
 97     @Benchmark
 98     public int segment_serial() {
 99         int res = 0;
100         for (int i = 0; i < ELEM_SIZE; i++) {
101             res += (int)VH_int.get(segment, (long) i);
102         }
103         return res;
104     }
105 
106     @Benchmark
107     public int unsafe_serial() {
108         int res = 0;
109         for (int i = 0; i < ELEM_SIZE; i++) {
110             res += unsafe.getInt(address + (i * CARRIER_SIZE));
111         }
112         return res;
113     }
114 
115     @Benchmark
116     public int segment_parallel() {
117         return new SumSegment(segment.spliterator(ELEM_LAYOUT), SEGMENT_TO_INT).invoke();
118     }
119 
120     @Benchmark
121     public int segment_parallel_bulk() {
122         return new SumSegment(segment.spliterator(ELEM_LAYOUT_BULK), SEGMENT_TO_INT_BULK).invoke();
123     }
124 
125     @Benchmark
126     public int segment_stream_parallel() {
127         return segment.elements(ELEM_LAYOUT).parallel().mapToInt(SEGMENT_TO_INT).sum();
128     }
129 
130     @Benchmark
131     public int segment_stream_parallel_bulk() {
132         return segment.elements(ELEM_LAYOUT_BULK).parallel().mapToInt(SEGMENT_TO_INT_BULK).sum();
133     }
134 
135     final static ToIntFunction<MemorySegment> SEGMENT_TO_INT = slice ->
136             (int) VH_int.get(slice, 0L);
137 
138     final static ToIntFunction<MemorySegment> SEGMENT_TO_INT_BULK = slice -> {
139         int res = 0;
140         for (int i = 0; i < BULK_FACTOR ; i++) {
141             res += (int)VH_int.get(slice, (long) i);
142         }
143         return res;
144     };
145 
146     @Benchmark
147     public Optional<MemorySegment> segment_stream_findany_serial() {
148         return segment.elements(ELEM_LAYOUT)
149                 .filter(FIND_SINGLE)
150                 .findAny();
151     }
152 
153     @Benchmark
154     public Optional<MemorySegment> segment_stream_findany_parallel() {
155         return segment.elements(ELEM_LAYOUT).parallel()
156                 .filter(FIND_SINGLE)
157                 .findAny();
158     }
159 
160     @Benchmark
161     public Optional<MemorySegment> segment_stream_findany_serial_bulk() {
162         return segment.elements(ELEM_LAYOUT_BULK)
163                 .filter(FIND_BULK)
164                 .findAny();
165     }
166 
167     @Benchmark
168     public Optional<MemorySegment> segment_stream_findany_parallel_bulk() {
169         return segment.elements(ELEM_LAYOUT_BULK).parallel()
170                 .filter(FIND_BULK)
171                 .findAny();
172     }
173 
174     final static Predicate<MemorySegment> FIND_SINGLE = slice ->
175             (int)VH_int.get(slice, 0L) == (ELEM_SIZE - 1);
176 
177     final static Predicate<MemorySegment> FIND_BULK = slice -> {
178         for (int i = 0; i < BULK_FACTOR ; i++) {
179             if ((int)VH_int.get(slice, (long)i) == (ELEM_SIZE - 1)) {
180                 return true;
181             }
182         }
183         return false;
184     };
185 
186     @Benchmark
187     public int unsafe_parallel() {
188         return new SumUnsafe(address, 0, ALLOC_SIZE / CARRIER_SIZE).invoke();
189     }
190 
191     static class SumUnsafe extends RecursiveTask<Integer> {
192 
193         final static int SPLIT_THRESHOLD = 4 * 1024 * 8;
194 
195         private final long address;
196         private final int start, length;
197 
198         SumUnsafe(long address, int start, int length) {
199             this.address = address;
200             this.start = start;
201             this.length = length;
202         }
203 
204         @Override
205         protected Integer compute() {
206             if (length > SPLIT_THRESHOLD) {
207                 int rem = length % 2;
208                 int split = length / 2;
209                 int lobound = split;
210                 int hibound = lobound + rem;
211                 SumUnsafe s1 = new SumUnsafe(address, start, lobound);
212                 SumUnsafe s2 = new SumUnsafe(address, start + lobound, hibound);
213                 s1.fork();
214                 s2.fork();
215                 return s1.join() + s2.join();
216             } else {
217                 int res = 0;
218                 for (int i = 0; i < length; i ++) {
219                     res += unsafe.getInt(address + (start + i) * CARRIER_SIZE);
220                 }
221                 return res;
222             }
223         }
224     }
225 
226     static class SumSegment extends CountedCompleter<Integer> {
227 
228         final static int SPLIT_THRESHOLD = 1024 * 8;
229 
230         int localSum = 0;
231         private final ToIntFunction<MemorySegment> mapper;
232         List<SumSegment> children = new LinkedList<>();
233 
234         private Spliterator<MemorySegment> segmentSplitter;
235 
236         SumSegment(Spliterator<MemorySegment> segmentSplitter, ToIntFunction<MemorySegment> mapper) {
237             this(null, segmentSplitter, mapper);
238         }
239 
240         SumSegment(SumSegment parent, Spliterator<MemorySegment> segmentSplitter, ToIntFunction<MemorySegment> mapper) {
241             super(parent);
242             this.segmentSplitter = segmentSplitter;
243             this.mapper = mapper;
244         }
245 
246         @Override
247         public void compute() {
248             Spliterator<MemorySegment> sub;
249             while (segmentSplitter.estimateSize() > SPLIT_THRESHOLD &&
250                     (sub = segmentSplitter.trySplit()) != null) {
251                 addToPendingCount(1);
252                 SumSegment child = new SumSegment(this, sub, mapper);
253                 children.add(child);
254                 child.fork();
255             }
256             segmentSplitter.forEachRemaining(s -> {
257                 localSum += mapper.applyAsInt(s);
258             });
259             propagateCompletion();
260         }
261 
262         @Override
263         public Integer getRawResult() {
264             int sum = localSum;
265             for (SumSegment c : children) {
266                 sum += c.getRawResult();
267             }
268             children = null;
269             return sum;
270         }
271     }
272 }