1 /*
  2  * Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.  Oracle designates this
  8  * particular file as subject to the "Classpath" exception as provided
  9  * by Oracle in the LICENSE file that accompanied this code.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  */
 25 package org.openjdk.bench.jdk.incubator.vector.operation;
 26 
 27 import jdk.incubator.vector.*;
 28 
 29 import org.openjdk.jmh.annotations.*;
 30 
 31 import java.util.concurrent.TimeUnit;
 32 
 33 /**
 34  * Inspired by "Sorting an AVX512 register"
 35  *   http://0x80.pl/articles/avx512-sort-register.html
 36  */
 37 @BenchmarkMode(Mode.Throughput)
 38 @Warmup(iterations = 3, time = 1)
 39 @Measurement(iterations = 5, time = 1)
 40 @OutputTimeUnit(TimeUnit.MILLISECONDS)
 41 @State(Scope.Benchmark)
 42 @Fork(value = 1, jvmArgsPrepend = {"--add-modules=jdk.incubator.vector"})
 43 public class SortVector extends AbstractVectorBenchmark {
 44     @Param({"64", "1024", "65536"})
 45     int size;
 46 
 47     int[] in, out;
 48 
 49     @Setup
 50     public void setup() {
 51         size = size + (size % 16); // FIXME: process tails
 52         in  = fillInt(size, i -> RANDOM.nextInt());
 53         out = new int[size];
 54     }
 55 
 56     @Benchmark
 57     public void sortVectorI64() {
 58         sort(I64);
 59     }
 60 
 61     @Benchmark
 62     public void sortVectorI128() {
 63         sort(I128);
 64     }
 65 
 66     @Benchmark
 67     public void sortVectorI256() {
 68         sort(I256);
 69     }
 70 
 71     @Benchmark
 72     public void sortVectorI512() {
 73         sort(I512);
 74     }
 75 
 76 
 77     void sort(VectorSpecies<Integer> spec) {
 78         var iota = (IntVector) VectorShuffle.iota(spec, 0, 1, false).toVector(); // [ 0 1 ... n ]
 79 
 80         var result = IntVector.broadcast(spec, 0);
 81         var index = IntVector.broadcast(spec, 0);
 82         var incr = IntVector.broadcast(spec, 1);
 83 
 84         for (int i = 0; i < in.length; i += spec.length()) {
 85             var input = IntVector.fromArray(spec, in, i);
 86 
 87             for (int j = 0; j < input.length(); j++) {
 88                 var shuf = index.toShuffle().wrapIndexes();
 89                 var b = input.rearrange(shuf); // broadcast j-th element
 90                 var lt = input.lt(b).trueCount();
 91                 var eq = input.eq(b).trueCount();
 92 
 93                 // int/long -> mask?
 94                 // int m = (1 << (lt + eq)) - (1 << lt);
 95                 // var mask = masks[lt + eq].lanewise(VectorOperators.XOR, masks[lt]);
 96                 // var mask = masks[lt + eq].and(masks[lt].not());
 97                 //
 98                 // masks[i] =  [ 0 0 ... 0 1 ... 1 ]
 99                 //                      i-th
100                 var m = iota.lt(spec.broadcast(lt + eq)).and(iota.lt(spec.broadcast(lt)).not());
101 
102                 result = result.blend(b, m);
103                 index = index.add(incr);
104             }
105             result.intoArray(out, i);
106         }
107     }
108 }