8270147: Increase stride size allowing unrolling more loops
Reviewed-by: kvn, iveresov
This commit is contained in:
parent
7dd19af259
commit
1f51e13ea7
@ -879,8 +879,14 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
|
|||||||
if ((future_unroll_cnt / unroll_constraint) > LoopMaxUnroll) return false;
|
if ((future_unroll_cnt / unroll_constraint) > LoopMaxUnroll) return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const int stride_con = cl->stride_con();
|
||||||
|
|
||||||
// Check for initial stride being a small enough constant
|
// Check for initial stride being a small enough constant
|
||||||
if (abs(cl->stride_con()) > (1<<2)*future_unroll_cnt) return false;
|
const int initial_stride_sz = MAX2(1<<2, Matcher::max_vector_size(T_BYTE) / 2);
|
||||||
|
// Maximum stride size should protect against overflow, when doubling stride unroll_count times
|
||||||
|
const int max_stride_size = MIN2<int>(max_jint / 2 - 2, initial_stride_sz * future_unroll_cnt);
|
||||||
|
// No abs() use; abs(min_jint) = min_jint
|
||||||
|
if (stride_con < -max_stride_size || stride_con > max_stride_size) return false;
|
||||||
|
|
||||||
// Don't unroll if the next round of unrolling would push us
|
// Don't unroll if the next round of unrolling would push us
|
||||||
// over the expected trip count of the loop. One is subtracted
|
// over the expected trip count of the loop. One is subtracted
|
||||||
@ -906,7 +912,6 @@ bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
|
|||||||
|
|
||||||
Node *init_n = cl->init_trip();
|
Node *init_n = cl->init_trip();
|
||||||
Node *limit_n = cl->limit();
|
Node *limit_n = cl->limit();
|
||||||
int stride_con = cl->stride_con();
|
|
||||||
if (limit_n == NULL) return false; // We will dereference it below.
|
if (limit_n == NULL) return false; // We will dereference it below.
|
||||||
|
|
||||||
// Non-constant bounds.
|
// Non-constant bounds.
|
||||||
@ -2005,8 +2010,8 @@ void PhaseIdealLoop::do_unroll(IdealLoopTree *loop, Node_List &old_new, bool adj
|
|||||||
int stride_p = (stride_con > 0) ? stride_con : -stride_con;
|
int stride_p = (stride_con > 0) ? stride_con : -stride_con;
|
||||||
uint old_trip_count = loop_head->trip_count();
|
uint old_trip_count = loop_head->trip_count();
|
||||||
// Verify that unroll policy result is still valid.
|
// Verify that unroll policy result is still valid.
|
||||||
assert(old_trip_count > 1 &&
|
assert(old_trip_count > 1 && (!adjust_min_trip || stride_p <=
|
||||||
(!adjust_min_trip || stride_p <= (1<<3)*loop_head->unrolled_count()), "sanity");
|
MIN2<int>(max_jint / 2 - 2, MAX2(1<<3, Matcher::max_vector_size(T_BYTE)) * loop_head->unrolled_count())), "sanity");
|
||||||
|
|
||||||
update_main_loop_skeleton_predicates(ctrl, loop_head, init, stride_con);
|
update_main_loop_skeleton_predicates(ctrl, loop_head, init, stride_con);
|
||||||
|
|
||||||
|
@ -0,0 +1,252 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
package org.openjdk.bench.jdk.incubator.vector;
|
||||||
|
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import jdk.incubator.foreign.CLinker;
|
||||||
|
import jdk.incubator.foreign.MemoryAccess;
|
||||||
|
import jdk.incubator.foreign.MemoryAddress;
|
||||||
|
import jdk.incubator.foreign.MemorySegment;
|
||||||
|
import jdk.incubator.foreign.ResourceScope;
|
||||||
|
import jdk.incubator.vector.ByteVector;
|
||||||
|
import jdk.incubator.vector.VectorOperators;
|
||||||
|
import jdk.incubator.vector.VectorSpecies;
|
||||||
|
import org.openjdk.jmh.annotations.Benchmark;
|
||||||
|
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||||
|
import org.openjdk.jmh.annotations.CompilerControl;
|
||||||
|
import org.openjdk.jmh.annotations.Fork;
|
||||||
|
import org.openjdk.jmh.annotations.Measurement;
|
||||||
|
import org.openjdk.jmh.annotations.Mode;
|
||||||
|
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||||
|
import org.openjdk.jmh.annotations.Param;
|
||||||
|
import org.openjdk.jmh.annotations.Setup;
|
||||||
|
import org.openjdk.jmh.annotations.State;
|
||||||
|
import org.openjdk.jmh.annotations.Warmup;
|
||||||
|
|
||||||
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
|
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||||
|
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||||
|
@State(org.openjdk.jmh.annotations.Scope.Thread)
|
||||||
|
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||||
|
@Fork(value = 1, jvmArgsAppend = {
|
||||||
|
"--add-modules=jdk.incubator.foreign,jdk.incubator.vector",
|
||||||
|
"-Dforeign.restricted=permit",
|
||||||
|
"--enable-native-access", "ALL-UNNAMED",
|
||||||
|
"-Djdk.incubator.vector.VECTOR_ACCESS_OOB_CHECK=1"})
|
||||||
|
public class TestLoadStoreBytes {
|
||||||
|
private static final VectorSpecies<Byte> SPECIES = VectorSpecies.ofLargestShape(byte.class);
|
||||||
|
|
||||||
|
@Param("1024")
|
||||||
|
private int size;
|
||||||
|
|
||||||
|
private byte[] srcArray;
|
||||||
|
|
||||||
|
private byte[] dstArray;
|
||||||
|
|
||||||
|
|
||||||
|
private ByteBuffer srcBufferHeap;
|
||||||
|
|
||||||
|
private ByteBuffer dstBufferHeap;
|
||||||
|
|
||||||
|
private ByteBuffer srcBufferNative;
|
||||||
|
|
||||||
|
private ByteBuffer dstBufferNative;
|
||||||
|
|
||||||
|
|
||||||
|
private ResourceScope implicitScope;
|
||||||
|
|
||||||
|
private MemorySegment srcSegmentImplicit;
|
||||||
|
|
||||||
|
private MemorySegment dstSegmentImplicit;
|
||||||
|
|
||||||
|
private ByteBuffer srcBufferSegmentImplicit;
|
||||||
|
|
||||||
|
private ByteBuffer dstBufferSegmentImplicit;
|
||||||
|
|
||||||
|
|
||||||
|
private MemoryAddress srcAddress;
|
||||||
|
|
||||||
|
private MemoryAddress dstAddress;
|
||||||
|
|
||||||
|
byte[] a, b, c;
|
||||||
|
|
||||||
|
@Setup
|
||||||
|
public void setup() {
|
||||||
|
srcArray = new byte[size];
|
||||||
|
dstArray = srcArray.clone();
|
||||||
|
for (int i = 0; i < srcArray.length; i++) {
|
||||||
|
srcArray[i] = (byte) i;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
srcBufferHeap = ByteBuffer.allocate(size);
|
||||||
|
dstBufferHeap = ByteBuffer.allocate(size);
|
||||||
|
|
||||||
|
srcBufferNative = ByteBuffer.allocateDirect(size);
|
||||||
|
dstBufferNative = ByteBuffer.allocateDirect(size);
|
||||||
|
|
||||||
|
|
||||||
|
implicitScope = ResourceScope.newImplicitScope();
|
||||||
|
srcSegmentImplicit = MemorySegment.allocateNative(size, SPECIES.vectorByteSize(), implicitScope);
|
||||||
|
srcBufferSegmentImplicit = srcSegmentImplicit.asByteBuffer();
|
||||||
|
dstSegmentImplicit = MemorySegment.allocateNative(size, SPECIES.vectorByteSize(), implicitScope);
|
||||||
|
dstBufferSegmentImplicit = dstSegmentImplicit.asByteBuffer();
|
||||||
|
|
||||||
|
|
||||||
|
srcAddress = CLinker.allocateMemory(size);
|
||||||
|
dstAddress = CLinker.allocateMemory(size);
|
||||||
|
|
||||||
|
a = new byte[size];
|
||||||
|
b = new byte[size];
|
||||||
|
c = new byte[size];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void array() {
|
||||||
|
// final var srcArray = this.srcArray;
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
|
||||||
|
var v = ByteVector.fromArray(SPECIES, srcArray, i);
|
||||||
|
v.intoArray(dstArray, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void array2() {
|
||||||
|
// final var srcArray = this.srcArray;
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
|
||||||
|
var v = ByteVector.fromByteArray(SPECIES, srcArray, i, ByteOrder.nativeOrder());
|
||||||
|
v.intoByteArray(dstArray, i, ByteOrder.nativeOrder());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void arrayScalar() {
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i ++) {
|
||||||
|
var v = srcArray[i];
|
||||||
|
dstArray[i] = v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void vectAdd1() {
|
||||||
|
var a = this.a;
|
||||||
|
var b = this.b;
|
||||||
|
var c = this.c;
|
||||||
|
|
||||||
|
for (int i = 0; i < a.length; i += SPECIES.length()) {
|
||||||
|
ByteVector av = ByteVector.fromArray(SPECIES, a, i);
|
||||||
|
ByteVector bv = ByteVector.fromArray(SPECIES, b, i);
|
||||||
|
av.lanewise(VectorOperators.ADD, bv).intoArray(c, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void vectAdd2() {
|
||||||
|
var a = this.a;
|
||||||
|
var b = this.b;
|
||||||
|
var c = this.c;
|
||||||
|
|
||||||
|
for (int i = 0; i < a.length/SPECIES.length(); i++) {
|
||||||
|
ByteVector av = ByteVector.fromArray(SPECIES, a, (i*SPECIES.length()));
|
||||||
|
ByteVector bv = ByteVector.fromArray(SPECIES, b, (i*SPECIES.length()));
|
||||||
|
av.lanewise(VectorOperators.ADD, bv).intoArray(c, (i*SPECIES.length()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void arrayAdd() {
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
|
||||||
|
var v = ByteVector.fromArray(SPECIES, srcArray, i);
|
||||||
|
v = v.add(v);
|
||||||
|
v.intoArray(dstArray, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void bufferHeap() {
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
|
||||||
|
var v = ByteVector.fromByteBuffer(SPECIES, srcBufferHeap, i, ByteOrder.nativeOrder());
|
||||||
|
v.intoByteBuffer(dstBufferHeap, i, ByteOrder.nativeOrder());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void bufferHeapScalar() {
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i++) {
|
||||||
|
var v = srcBufferHeap.get(i);
|
||||||
|
dstBufferHeap.put(i, v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
@CompilerControl(CompilerControl.Mode.PRINT)
|
||||||
|
public void bufferNative() {
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
|
||||||
|
var v = ByteVector.fromByteBuffer(SPECIES, srcBufferNative, i, ByteOrder.nativeOrder());
|
||||||
|
v.intoByteBuffer(dstBufferNative, i, ByteOrder.nativeOrder());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void bufferNativeScalar() {
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i++) {
|
||||||
|
var v = srcBufferNative.get(i);
|
||||||
|
dstBufferNative.put(i, v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void bufferSegmentImplicit() {
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
|
||||||
|
var v = ByteVector.fromByteBuffer(SPECIES, srcBufferSegmentImplicit, i, ByteOrder.nativeOrder());
|
||||||
|
v.intoByteBuffer(dstBufferSegmentImplicit, i, ByteOrder.nativeOrder());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
@CompilerControl(CompilerControl.Mode.PRINT)
|
||||||
|
public void segmentImplicitScalar() {
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i++) {
|
||||||
|
var v = MemoryAccess.getByteAtOffset(srcSegmentImplicit, i);
|
||||||
|
MemoryAccess.setByteAtOffset(dstSegmentImplicit, i, v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void bufferSegmentConfined() {
|
||||||
|
try (final var scope = ResourceScope.newConfinedScope()) {
|
||||||
|
final var srcBufferSegmentConfined = srcAddress.asSegment(size, scope).asByteBuffer();
|
||||||
|
final var dstBufferSegmentConfined = dstAddress.asSegment(size, scope).asByteBuffer();
|
||||||
|
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
|
||||||
|
var v = ByteVector.fromByteBuffer(SPECIES, srcBufferSegmentConfined, i, ByteOrder.nativeOrder());
|
||||||
|
v.intoByteBuffer(dstBufferSegmentConfined, i, ByteOrder.nativeOrder());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,228 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
package org.openjdk.bench.jdk.incubator.vector;
|
||||||
|
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import jdk.incubator.foreign.CLinker;
|
||||||
|
import jdk.incubator.foreign.MemoryAddress;
|
||||||
|
import jdk.incubator.foreign.MemorySegment;
|
||||||
|
import jdk.incubator.foreign.ResourceScope;
|
||||||
|
import jdk.incubator.vector.ShortVector;
|
||||||
|
import jdk.incubator.vector.VectorOperators;
|
||||||
|
import jdk.incubator.vector.VectorSpecies;
|
||||||
|
import org.openjdk.jmh.annotations.Benchmark;
|
||||||
|
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||||
|
import org.openjdk.jmh.annotations.CompilerControl;
|
||||||
|
import org.openjdk.jmh.annotations.Fork;
|
||||||
|
import org.openjdk.jmh.annotations.Measurement;
|
||||||
|
import org.openjdk.jmh.annotations.Mode;
|
||||||
|
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||||
|
import org.openjdk.jmh.annotations.Param;
|
||||||
|
import org.openjdk.jmh.annotations.Setup;
|
||||||
|
import org.openjdk.jmh.annotations.State;
|
||||||
|
import org.openjdk.jmh.annotations.TearDown;
|
||||||
|
import org.openjdk.jmh.annotations.Warmup;
|
||||||
|
|
||||||
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
|
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||||
|
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||||
|
@State(org.openjdk.jmh.annotations.Scope.Thread)
|
||||||
|
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||||
|
@Fork(value = 1, jvmArgsAppend = {
|
||||||
|
"--add-modules=jdk.incubator.foreign,jdk.incubator.vector",
|
||||||
|
"-Dforeign.restricted=permit",
|
||||||
|
"--enable-native-access", "ALL-UNNAMED"})
|
||||||
|
public class TestLoadStoreShort {
|
||||||
|
private static final VectorSpecies<Short> SPECIES = VectorSpecies.ofLargestShape(short.class);
|
||||||
|
|
||||||
|
@Param("256")
|
||||||
|
private int size;
|
||||||
|
|
||||||
|
private int longSize;
|
||||||
|
|
||||||
|
private short[] srcArray;
|
||||||
|
|
||||||
|
private short[] dstArray;
|
||||||
|
|
||||||
|
|
||||||
|
private ByteBuffer srcBufferHeap;
|
||||||
|
|
||||||
|
private ByteBuffer dstBufferHeap;
|
||||||
|
|
||||||
|
private ByteBuffer srcBufferNative;
|
||||||
|
|
||||||
|
private ByteBuffer dstBufferNative;
|
||||||
|
|
||||||
|
|
||||||
|
private ResourceScope implicitScope;
|
||||||
|
|
||||||
|
private MemorySegment srcSegmentImplicit;
|
||||||
|
|
||||||
|
private MemorySegment dstSegmentImplicit;
|
||||||
|
|
||||||
|
private ByteBuffer srcBufferSegmentImplicit;
|
||||||
|
|
||||||
|
private ByteBuffer dstBufferSegmentImplicit;
|
||||||
|
|
||||||
|
|
||||||
|
private MemoryAddress srcAddress;
|
||||||
|
|
||||||
|
private MemoryAddress dstAddress;
|
||||||
|
|
||||||
|
// private byte[] bigArray = new byte[Integer.MAX_VALUE];
|
||||||
|
|
||||||
|
private volatile short[] a, b, c;
|
||||||
|
@Setup
|
||||||
|
public void setup() {
|
||||||
|
var longSize = size / Short.BYTES;
|
||||||
|
srcArray = new short[longSize];
|
||||||
|
dstArray = srcArray.clone();
|
||||||
|
for (int i = 0; i < srcArray.length; i++) {
|
||||||
|
srcArray[i] = (short) i;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
srcBufferHeap = ByteBuffer.allocate(size);
|
||||||
|
dstBufferHeap = ByteBuffer.allocate(size);
|
||||||
|
|
||||||
|
srcBufferNative = ByteBuffer.allocateDirect(size);
|
||||||
|
dstBufferNative = ByteBuffer.allocateDirect(size);
|
||||||
|
|
||||||
|
|
||||||
|
implicitScope = ResourceScope.newImplicitScope();
|
||||||
|
srcSegmentImplicit = MemorySegment.allocateNative(size, SPECIES.vectorByteSize(), implicitScope);
|
||||||
|
srcBufferSegmentImplicit = srcSegmentImplicit.asByteBuffer();
|
||||||
|
dstSegmentImplicit = MemorySegment.allocateNative(size, SPECIES.vectorByteSize(), implicitScope);
|
||||||
|
dstBufferSegmentImplicit = dstSegmentImplicit.asByteBuffer();
|
||||||
|
|
||||||
|
|
||||||
|
srcAddress = CLinker.allocateMemory(size);
|
||||||
|
dstAddress = CLinker.allocateMemory(size);
|
||||||
|
|
||||||
|
this.longSize = longSize;
|
||||||
|
|
||||||
|
a = new short[size];
|
||||||
|
b = new short[size];
|
||||||
|
c = new short[size];
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@TearDown
|
||||||
|
public void tearDown() {
|
||||||
|
CLinker.freeMemory(srcAddress);
|
||||||
|
CLinker.freeMemory(dstAddress);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
@CompilerControl(CompilerControl.Mode.PRINT)
|
||||||
|
public void array() {
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
|
||||||
|
var v = ShortVector.fromArray(SPECIES, srcArray, i);
|
||||||
|
v.intoArray(dstArray, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void vectAdd1() {
|
||||||
|
var a = this.a;
|
||||||
|
var b = this.b;
|
||||||
|
var c = this.c;
|
||||||
|
|
||||||
|
for (int i = 0; i < a.length; i += SPECIES.length()) {
|
||||||
|
ShortVector av = ShortVector.fromArray(SPECIES, a, i);
|
||||||
|
ShortVector bv = ShortVector.fromArray(SPECIES, b, i);
|
||||||
|
av.lanewise(VectorOperators.ADD, bv).intoArray(c, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void vectAdd2() {
|
||||||
|
var a = this.a;
|
||||||
|
var b = this.b;
|
||||||
|
var c = this.c;
|
||||||
|
|
||||||
|
for (int i = 0; i < a.length/SPECIES.length(); i++) {
|
||||||
|
ShortVector av = ShortVector.fromArray(SPECIES, a, (i*SPECIES.length()));
|
||||||
|
ShortVector bv = ShortVector.fromArray(SPECIES, b, (i*SPECIES.length()));
|
||||||
|
av.lanewise(VectorOperators.ADD, bv).intoArray(c, (i*SPECIES.length()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void arrayAdd() {
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
|
||||||
|
var v = ShortVector.fromArray(SPECIES, srcArray, i);
|
||||||
|
v = v.add(v);
|
||||||
|
v.intoArray(dstArray, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void bufferHeap() {
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(longSize); i += SPECIES.length()) {
|
||||||
|
var v = ShortVector.fromByteBuffer(SPECIES, srcBufferHeap, i, ByteOrder.nativeOrder());
|
||||||
|
v.intoByteBuffer(dstBufferHeap, i, ByteOrder.nativeOrder());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void bufferNative() {
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
|
||||||
|
var v = ShortVector.fromByteBuffer(SPECIES, srcBufferNative, i, ByteOrder.nativeOrder());
|
||||||
|
v.intoByteBuffer(dstBufferNative, i, ByteOrder.nativeOrder());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void bufferNativeAdd() {
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
|
||||||
|
var v = ShortVector.fromByteBuffer(SPECIES, srcBufferNative, i, ByteOrder.nativeOrder());
|
||||||
|
v = v.add(v);
|
||||||
|
v.intoByteBuffer(dstBufferNative, i, ByteOrder.nativeOrder());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void bufferSegmentImplicit() {
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
|
||||||
|
var v = ShortVector.fromByteBuffer(SPECIES, srcBufferSegmentImplicit, i, ByteOrder.nativeOrder());
|
||||||
|
v.intoByteBuffer(dstBufferSegmentImplicit, i, ByteOrder.nativeOrder());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public void bufferSegmentConfined() {
|
||||||
|
try (final var scope = ResourceScope.newConfinedScope()) {
|
||||||
|
final var srcBufferSegmentConfined = srcAddress.asSegment(size, scope).asByteBuffer();
|
||||||
|
final var dstBufferSegmentConfined = dstAddress.asSegment(size, scope).asByteBuffer();
|
||||||
|
|
||||||
|
for (int i = 0; i < SPECIES.loopBound(srcArray.length); i += SPECIES.length()) {
|
||||||
|
var v = ShortVector.fromByteBuffer(SPECIES, srcBufferSegmentConfined, i, ByteOrder.nativeOrder());
|
||||||
|
v.intoByteBuffer(dstBufferSegmentConfined, i, ByteOrder.nativeOrder());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user