8339531: Improve performance of MemorySegment::mismatch
Reviewed-by: mcimadamore
This commit is contained in:
parent
ab9b72c50a
commit
81ff91ef27
src/java.base/share/classes
java/lang/foreign
jdk/internal/foreign
test
jdk/java/foreign
micro/org/openjdk/bench/java/lang/foreign
@ -43,6 +43,7 @@ import java.util.function.Consumer;
|
|||||||
import java.util.stream.Stream;
|
import java.util.stream.Stream;
|
||||||
import jdk.internal.foreign.AbstractMemorySegmentImpl;
|
import jdk.internal.foreign.AbstractMemorySegmentImpl;
|
||||||
import jdk.internal.foreign.MemorySessionImpl;
|
import jdk.internal.foreign.MemorySessionImpl;
|
||||||
|
import jdk.internal.foreign.SegmentBulkOperations;
|
||||||
import jdk.internal.foreign.SegmentFactories;
|
import jdk.internal.foreign.SegmentFactories;
|
||||||
import jdk.internal.javac.Restricted;
|
import jdk.internal.javac.Restricted;
|
||||||
import jdk.internal.reflect.CallerSensitive;
|
import jdk.internal.reflect.CallerSensitive;
|
||||||
@ -1571,7 +1572,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
static void copy(MemorySegment srcSegment, long srcOffset,
|
static void copy(MemorySegment srcSegment, long srcOffset,
|
||||||
MemorySegment dstSegment, long dstOffset, long bytes) {
|
MemorySegment dstSegment, long dstOffset, long bytes) {
|
||||||
|
|
||||||
AbstractMemorySegmentImpl.copy((AbstractMemorySegmentImpl) srcSegment, srcOffset,
|
SegmentBulkOperations.copy((AbstractMemorySegmentImpl) srcSegment, srcOffset,
|
||||||
(AbstractMemorySegmentImpl) dstSegment, dstOffset,
|
(AbstractMemorySegmentImpl) dstSegment, dstOffset,
|
||||||
bytes);
|
bytes);
|
||||||
}
|
}
|
||||||
@ -2635,8 +2636,9 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
|
|||||||
*/
|
*/
|
||||||
static long mismatch(MemorySegment srcSegment, long srcFromOffset, long srcToOffset,
|
static long mismatch(MemorySegment srcSegment, long srcFromOffset, long srcToOffset,
|
||||||
MemorySegment dstSegment, long dstFromOffset, long dstToOffset) {
|
MemorySegment dstSegment, long dstFromOffset, long dstToOffset) {
|
||||||
return AbstractMemorySegmentImpl.mismatch(srcSegment, srcFromOffset, srcToOffset,
|
return SegmentBulkOperations.mismatch(
|
||||||
dstSegment, dstFromOffset, dstToOffset);
|
(AbstractMemorySegmentImpl)Objects.requireNonNull(srcSegment), srcFromOffset, srcToOffset,
|
||||||
|
(AbstractMemorySegmentImpl)Objects.requireNonNull(dstSegment), dstFromOffset, dstToOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -72,8 +72,6 @@ public abstract sealed class AbstractMemorySegmentImpl
|
|||||||
implements MemorySegment, SegmentAllocator, BiFunction<String, List<Number>, RuntimeException>
|
implements MemorySegment, SegmentAllocator, BiFunction<String, List<Number>, RuntimeException>
|
||||||
permits HeapMemorySegmentImpl, NativeMemorySegmentImpl {
|
permits HeapMemorySegmentImpl, NativeMemorySegmentImpl {
|
||||||
|
|
||||||
private static final ScopedMemoryAccess SCOPED_MEMORY_ACCESS = ScopedMemoryAccess.getScopedMemoryAccess();
|
|
||||||
|
|
||||||
static final JavaNioAccess NIO_ACCESS = SharedSecrets.getJavaNioAccess();
|
static final JavaNioAccess NIO_ACCESS = SharedSecrets.getJavaNioAccess();
|
||||||
|
|
||||||
final long length;
|
final long length;
|
||||||
@ -189,53 +187,10 @@ public abstract sealed class AbstractMemorySegmentImpl
|
|||||||
return StreamSupport.stream(spliterator(elementLayout), false);
|
return StreamSupport.stream(spliterator(elementLayout), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// FILL_NATIVE_THRESHOLD must be a power of two and should be greater than 2^3
|
|
||||||
// Update the value for Aarch64 once 8338975 is fixed.
|
|
||||||
private static final long FILL_NATIVE_THRESHOLD = 1L << (Architecture.isAARCH64() ? 10 : 5);
|
|
||||||
|
|
||||||
@Override
|
|
||||||
@ForceInline
|
@ForceInline
|
||||||
|
@Override
|
||||||
public final MemorySegment fill(byte value) {
|
public final MemorySegment fill(byte value) {
|
||||||
checkReadOnly(false);
|
return SegmentBulkOperations.fill(this, value);
|
||||||
if (length == 0) {
|
|
||||||
// Implicit state check
|
|
||||||
checkValidState();
|
|
||||||
} else if (length < FILL_NATIVE_THRESHOLD) {
|
|
||||||
// 0 <= length < FILL_NATIVE_LIMIT : 0...0X...XXXX
|
|
||||||
|
|
||||||
// Handle smaller segments directly without transitioning to native code
|
|
||||||
final long u = Byte.toUnsignedLong(value);
|
|
||||||
final long longValue = u << 56 | u << 48 | u << 40 | u << 32 | u << 24 | u << 16 | u << 8 | u;
|
|
||||||
|
|
||||||
int offset = 0;
|
|
||||||
// 0...0X...X000
|
|
||||||
final int limit = (int) (length & (FILL_NATIVE_THRESHOLD - 8));
|
|
||||||
for (; offset < limit; offset += 8) {
|
|
||||||
SCOPED_MEMORY_ACCESS.putLong(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, longValue);
|
|
||||||
}
|
|
||||||
int remaining = (int) length - limit;
|
|
||||||
// 0...0X00
|
|
||||||
if (remaining >= 4) {
|
|
||||||
SCOPED_MEMORY_ACCESS.putInt(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, (int) longValue);
|
|
||||||
offset += 4;
|
|
||||||
remaining -= 4;
|
|
||||||
}
|
|
||||||
// 0...00X0
|
|
||||||
if (remaining >= 2) {
|
|
||||||
SCOPED_MEMORY_ACCESS.putShort(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, (short) longValue);
|
|
||||||
offset += 2;
|
|
||||||
remaining -= 2;
|
|
||||||
}
|
|
||||||
// 0...000X
|
|
||||||
if (remaining == 1) {
|
|
||||||
SCOPED_MEMORY_ACCESS.putByte(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, value);
|
|
||||||
}
|
|
||||||
// We have now fully handled 0...0X...XXXX
|
|
||||||
} else {
|
|
||||||
// Handle larger segments via native calls
|
|
||||||
SCOPED_MEMORY_ACCESS.setMemory(sessionImpl(), unsafeGetBase(), unsafeGetOffset(), length, value);
|
|
||||||
}
|
|
||||||
return this;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -244,38 +199,6 @@ public abstract sealed class AbstractMemorySegmentImpl
|
|||||||
return asSlice(0, byteSize, byteAlignment);
|
return asSlice(0, byteSize, byteAlignment);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Mismatch over long lengths.
|
|
||||||
*/
|
|
||||||
public static long vectorizedMismatchLargeForBytes(MemorySessionImpl aSession, MemorySessionImpl bSession,
|
|
||||||
Object a, long aOffset,
|
|
||||||
Object b, long bOffset,
|
|
||||||
long length) {
|
|
||||||
long off = 0;
|
|
||||||
long remaining = length;
|
|
||||||
int i, size;
|
|
||||||
boolean lastSubRange = false;
|
|
||||||
while (remaining > 7 && !lastSubRange) {
|
|
||||||
if (remaining > Integer.MAX_VALUE) {
|
|
||||||
size = Integer.MAX_VALUE;
|
|
||||||
} else {
|
|
||||||
size = (int) remaining;
|
|
||||||
lastSubRange = true;
|
|
||||||
}
|
|
||||||
i = SCOPED_MEMORY_ACCESS.vectorizedMismatch(aSession, bSession,
|
|
||||||
a, aOffset + off,
|
|
||||||
b, bOffset + off,
|
|
||||||
size, ArraysSupport.LOG2_ARRAY_BYTE_INDEX_SCALE);
|
|
||||||
if (i >= 0)
|
|
||||||
return off + i;
|
|
||||||
|
|
||||||
i = size - ~i;
|
|
||||||
off += i;
|
|
||||||
remaining -= i;
|
|
||||||
}
|
|
||||||
return ~remaining;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final ByteBuffer asByteBuffer() {
|
public final ByteBuffer asByteBuffer() {
|
||||||
checkArraySize("ByteBuffer", 1);
|
checkArraySize("ByteBuffer", 1);
|
||||||
@ -314,7 +237,7 @@ public abstract sealed class AbstractMemorySegmentImpl
|
|||||||
}
|
}
|
||||||
|
|
||||||
@ForceInline
|
@ForceInline
|
||||||
private boolean overlaps(AbstractMemorySegmentImpl that) {
|
boolean overlaps(AbstractMemorySegmentImpl that) {
|
||||||
if (unsafeGetBase() == that.unsafeGetBase()) { // both either native or the same heap segment
|
if (unsafeGetBase() == that.unsafeGetBase()) { // both either native or the same heap segment
|
||||||
final long thisStart = this.unsafeGetOffset();
|
final long thisStart = this.unsafeGetOffset();
|
||||||
final long thatStart = that.unsafeGetOffset();
|
final long thatStart = that.unsafeGetOffset();
|
||||||
@ -334,7 +257,8 @@ public abstract sealed class AbstractMemorySegmentImpl
|
|||||||
@Override
|
@Override
|
||||||
public long mismatch(MemorySegment other) {
|
public long mismatch(MemorySegment other) {
|
||||||
Objects.requireNonNull(other);
|
Objects.requireNonNull(other);
|
||||||
return MemorySegment.mismatch(this, 0, byteSize(), other, 0, other.byteSize());
|
return SegmentBulkOperations.mismatch(this, 0, byteSize(),
|
||||||
|
(AbstractMemorySegmentImpl) other, 0, other.byteSize());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@ -650,64 +574,6 @@ public abstract sealed class AbstractMemorySegmentImpl
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// COPY_NATIVE_THRESHOLD must be a power of two and should be greater than 2^3
|
|
||||||
private static final long COPY_NATIVE_THRESHOLD = 1 << 6;
|
|
||||||
|
|
||||||
@ForceInline
|
|
||||||
public static void copy(AbstractMemorySegmentImpl src, long srcOffset,
|
|
||||||
AbstractMemorySegmentImpl dst, long dstOffset,
|
|
||||||
long size) {
|
|
||||||
|
|
||||||
Utils.checkNonNegativeIndex(size, "size");
|
|
||||||
// Implicit null check for src and dst
|
|
||||||
src.checkAccess(srcOffset, size, true);
|
|
||||||
dst.checkAccess(dstOffset, size, false);
|
|
||||||
|
|
||||||
if (size <= 0) {
|
|
||||||
// Do nothing
|
|
||||||
} else if (size < COPY_NATIVE_THRESHOLD && !src.overlaps(dst)) {
|
|
||||||
// 0 < size < FILL_NATIVE_LIMIT : 0...0X...XXXX
|
|
||||||
//
|
|
||||||
// Strictly, we could check for !src.asSlice(srcOffset, size).overlaps(dst.asSlice(dstOffset, size) but
|
|
||||||
// this is a bit slower and it likely very unusual there is any difference in the outcome. Also, if there
|
|
||||||
// is an overlap, we could tolerate one particular direction of overlap (but not the other).
|
|
||||||
|
|
||||||
// 0...0X...X000
|
|
||||||
final int limit = (int) (size & (COPY_NATIVE_THRESHOLD - 8));
|
|
||||||
int offset = 0;
|
|
||||||
for (; offset < limit; offset += 8) {
|
|
||||||
final long v = SCOPED_MEMORY_ACCESS.getLong(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
|
|
||||||
SCOPED_MEMORY_ACCESS.putLong(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
|
|
||||||
}
|
|
||||||
int remaining = (int) size - offset;
|
|
||||||
// 0...0X00
|
|
||||||
if (remaining >= 4) {
|
|
||||||
final int v = SCOPED_MEMORY_ACCESS.getInt(src.sessionImpl(), src.unsafeGetBase(),src.unsafeGetOffset() + srcOffset + offset);
|
|
||||||
SCOPED_MEMORY_ACCESS.putInt(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
|
|
||||||
offset += 4;
|
|
||||||
remaining -= 4;
|
|
||||||
}
|
|
||||||
// 0...00X0
|
|
||||||
if (remaining >= 2) {
|
|
||||||
final short v = SCOPED_MEMORY_ACCESS.getShort(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
|
|
||||||
SCOPED_MEMORY_ACCESS.putShort(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
|
|
||||||
offset += 2;
|
|
||||||
remaining -=2;
|
|
||||||
}
|
|
||||||
// 0...000X
|
|
||||||
if (remaining == 1) {
|
|
||||||
final byte v = SCOPED_MEMORY_ACCESS.getByte(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
|
|
||||||
SCOPED_MEMORY_ACCESS.putByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
|
|
||||||
}
|
|
||||||
// We have now fully handled 0...0X...XXXX
|
|
||||||
} else {
|
|
||||||
// For larger sizes, the transition to native code pays off
|
|
||||||
SCOPED_MEMORY_ACCESS.copyMemory(src.sessionImpl(), dst.sessionImpl(),
|
|
||||||
src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset,
|
|
||||||
dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset, size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@ForceInline
|
@ForceInline
|
||||||
public static void copy(MemorySegment srcSegment, ValueLayout srcElementLayout, long srcOffset,
|
public static void copy(MemorySegment srcSegment, ValueLayout srcElementLayout, long srcOffset,
|
||||||
MemorySegment dstSegment, ValueLayout dstElementLayout, long dstOffset,
|
MemorySegment dstSegment, ValueLayout dstElementLayout, long dstOffset,
|
||||||
@ -794,40 +660,6 @@ public abstract sealed class AbstractMemorySegmentImpl
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static long mismatch(MemorySegment srcSegment, long srcFromOffset, long srcToOffset,
|
|
||||||
MemorySegment dstSegment, long dstFromOffset, long dstToOffset) {
|
|
||||||
AbstractMemorySegmentImpl srcImpl = (AbstractMemorySegmentImpl)Objects.requireNonNull(srcSegment);
|
|
||||||
AbstractMemorySegmentImpl dstImpl = (AbstractMemorySegmentImpl)Objects.requireNonNull(dstSegment);
|
|
||||||
long srcBytes = srcToOffset - srcFromOffset;
|
|
||||||
long dstBytes = dstToOffset - dstFromOffset;
|
|
||||||
srcImpl.checkAccess(srcFromOffset, srcBytes, true);
|
|
||||||
dstImpl.checkAccess(dstFromOffset, dstBytes, true);
|
|
||||||
|
|
||||||
long bytes = Math.min(srcBytes, dstBytes);
|
|
||||||
long i = 0;
|
|
||||||
if (bytes > 7) {
|
|
||||||
if (srcImpl.get(JAVA_BYTE, srcFromOffset) != dstImpl.get(JAVA_BYTE, dstFromOffset)) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
i = AbstractMemorySegmentImpl.vectorizedMismatchLargeForBytes(srcImpl.sessionImpl(), dstImpl.sessionImpl(),
|
|
||||||
srcImpl.unsafeGetBase(), srcImpl.unsafeGetOffset() + srcFromOffset,
|
|
||||||
dstImpl.unsafeGetBase(), dstImpl.unsafeGetOffset() + dstFromOffset,
|
|
||||||
bytes);
|
|
||||||
if (i >= 0) {
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
long remaining = ~i;
|
|
||||||
assert remaining < 8 : "remaining greater than 7: " + remaining;
|
|
||||||
i = bytes - remaining;
|
|
||||||
}
|
|
||||||
for (; i < bytes; i++) {
|
|
||||||
if (srcImpl.get(JAVA_BYTE, srcFromOffset + i) != dstImpl.get(JAVA_BYTE, dstFromOffset + i)) {
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return srcBytes != dstBytes ? bytes : -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int getScaleFactor(Buffer buffer) {
|
private static int getScaleFactor(Buffer buffer) {
|
||||||
return switch (buffer) {
|
return switch (buffer) {
|
||||||
case ByteBuffer _ -> 0;
|
case ByteBuffer _ -> 0;
|
||||||
|
@ -0,0 +1,316 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation. Oracle designates this
|
||||||
|
* particular file as subject to the "Classpath" exception as provided
|
||||||
|
* by Oracle in the LICENSE file that accompanied this code.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package jdk.internal.foreign;
|
||||||
|
|
||||||
|
import jdk.internal.misc.ScopedMemoryAccess;
|
||||||
|
import jdk.internal.util.Architecture;
|
||||||
|
import jdk.internal.util.ArraysSupport;
|
||||||
|
import jdk.internal.vm.annotation.ForceInline;
|
||||||
|
|
||||||
|
import java.lang.foreign.MemorySegment;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class contains optimized bulk operation methods that operate on one or several
|
||||||
|
* memory segments.
|
||||||
|
* <p>
|
||||||
|
* Generally, the methods attempt to work with as-large-as-possible units of memory at
|
||||||
|
* a time.
|
||||||
|
* <p>
|
||||||
|
* It should be noted that when invoking scoped memory access get/set operations, it
|
||||||
|
* is imperative from a performance perspective to convey the sharp types from the
|
||||||
|
* call site in order for the compiler to pick the correct Unsafe access variant.
|
||||||
|
*/
|
||||||
|
public final class SegmentBulkOperations {
|
||||||
|
|
||||||
|
private SegmentBulkOperations() {}
|
||||||
|
|
||||||
|
private static final ScopedMemoryAccess SCOPED_MEMORY_ACCESS = ScopedMemoryAccess.getScopedMemoryAccess();
|
||||||
|
|
||||||
|
// All the threshold values below MUST be a power of two and should preferably be
|
||||||
|
// greater or equal to 2^3.
|
||||||
|
|
||||||
|
// Update the FILL value for Aarch64 once 8338975 is fixed.
|
||||||
|
private static final int NATIVE_THRESHOLD_FILL = powerOfPropertyOr("fill", Architecture.isAARCH64() ? 10 : 5);
|
||||||
|
private static final int NATIVE_THRESHOLD_MISMATCH = powerOfPropertyOr("mismatch", 6);
|
||||||
|
private static final int NATIVE_THRESHOLD_COPY = powerOfPropertyOr("copy", 6);
|
||||||
|
|
||||||
|
@ForceInline
|
||||||
|
public static MemorySegment fill(AbstractMemorySegmentImpl dst, byte value) {
|
||||||
|
dst.checkReadOnly(false);
|
||||||
|
if (dst.length == 0) {
|
||||||
|
// Implicit state check
|
||||||
|
dst.checkValidState();
|
||||||
|
} else if (dst.length < NATIVE_THRESHOLD_FILL) {
|
||||||
|
// 0 <= length < FILL_NATIVE_LIMIT : 0...0X...XXXX
|
||||||
|
|
||||||
|
// Handle smaller segments directly without transitioning to native code
|
||||||
|
final long u = Byte.toUnsignedLong(value);
|
||||||
|
final long longValue = u << 56 | u << 48 | u << 40 | u << 32 | u << 24 | u << 16 | u << 8 | u;
|
||||||
|
|
||||||
|
int offset = 0;
|
||||||
|
// 0...0X...X000
|
||||||
|
final int limit = (int) (dst.length & (NATIVE_THRESHOLD_FILL - 8));
|
||||||
|
for (; offset < limit; offset += 8) {
|
||||||
|
SCOPED_MEMORY_ACCESS.putLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, longValue, !Architecture.isLittleEndian());
|
||||||
|
}
|
||||||
|
int remaining = (int) dst.length - limit;
|
||||||
|
// 0...0X00
|
||||||
|
if (remaining >= 4) {
|
||||||
|
SCOPED_MEMORY_ACCESS.putIntUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, (int) longValue, !Architecture.isLittleEndian());
|
||||||
|
offset += 4;
|
||||||
|
remaining -= 4;
|
||||||
|
}
|
||||||
|
// 0...00X0
|
||||||
|
if (remaining >= 2) {
|
||||||
|
SCOPED_MEMORY_ACCESS.putShortUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, (short) longValue, !Architecture.isLittleEndian());
|
||||||
|
offset += 2;
|
||||||
|
remaining -= 2;
|
||||||
|
}
|
||||||
|
// 0...000X
|
||||||
|
if (remaining == 1) {
|
||||||
|
SCOPED_MEMORY_ACCESS.putByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, value);
|
||||||
|
}
|
||||||
|
// We have now fully handled 0...0X...XXXX
|
||||||
|
} else {
|
||||||
|
// Handle larger segments via native calls
|
||||||
|
SCOPED_MEMORY_ACCESS.setMemory(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset(), dst.length, value);
|
||||||
|
}
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
|
||||||
|
@ForceInline
|
||||||
|
public static void copy(AbstractMemorySegmentImpl src, long srcOffset,
|
||||||
|
AbstractMemorySegmentImpl dst, long dstOffset,
|
||||||
|
long size) {
|
||||||
|
|
||||||
|
Utils.checkNonNegativeIndex(size, "size");
|
||||||
|
// Implicit null check for src and dst
|
||||||
|
src.checkAccess(srcOffset, size, true);
|
||||||
|
dst.checkAccess(dstOffset, size, false);
|
||||||
|
|
||||||
|
if (size <= 0) {
|
||||||
|
// Do nothing
|
||||||
|
} else if (size < NATIVE_THRESHOLD_COPY && !src.overlaps(dst)) {
|
||||||
|
// 0 < size < FILL_NATIVE_LIMIT : 0...0X...XXXX
|
||||||
|
//
|
||||||
|
// Strictly, we could check for !src.asSlice(srcOffset, size).overlaps(dst.asSlice(dstOffset, size) but
|
||||||
|
// this is a bit slower and it likely very unusual there is any difference in the outcome. Also, if there
|
||||||
|
// is an overlap, we could tolerate one particular direction of overlap (but not the other).
|
||||||
|
|
||||||
|
// 0...0X...X000
|
||||||
|
final int limit = (int) (size & (NATIVE_THRESHOLD_COPY - 8));
|
||||||
|
int offset = 0;
|
||||||
|
for (; offset < limit; offset += 8) {
|
||||||
|
final long v = SCOPED_MEMORY_ACCESS.getLongUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset, !Architecture.isLittleEndian());
|
||||||
|
SCOPED_MEMORY_ACCESS.putLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v, !Architecture.isLittleEndian());
|
||||||
|
}
|
||||||
|
int remaining = (int) size - offset;
|
||||||
|
// 0...0X00
|
||||||
|
if (remaining >= 4) {
|
||||||
|
final int v = SCOPED_MEMORY_ACCESS.getIntUnaligned(src.sessionImpl(), src.unsafeGetBase(),src.unsafeGetOffset() + srcOffset + offset, !Architecture.isLittleEndian());
|
||||||
|
SCOPED_MEMORY_ACCESS.putIntUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v, !Architecture.isLittleEndian());
|
||||||
|
offset += 4;
|
||||||
|
remaining -= 4;
|
||||||
|
}
|
||||||
|
// 0...00X0
|
||||||
|
if (remaining >= 2) {
|
||||||
|
final short v = SCOPED_MEMORY_ACCESS.getShortUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset, !Architecture.isLittleEndian());
|
||||||
|
SCOPED_MEMORY_ACCESS.putShortUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v, !Architecture.isLittleEndian());
|
||||||
|
offset += 2;
|
||||||
|
remaining -=2;
|
||||||
|
}
|
||||||
|
// 0...000X
|
||||||
|
if (remaining == 1) {
|
||||||
|
final byte v = SCOPED_MEMORY_ACCESS.getByte(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
|
||||||
|
SCOPED_MEMORY_ACCESS.putByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
|
||||||
|
}
|
||||||
|
// We have now fully handled 0...0X...XXXX
|
||||||
|
} else {
|
||||||
|
// For larger sizes, the transition to native code pays off
|
||||||
|
SCOPED_MEMORY_ACCESS.copyMemory(src.sessionImpl(), dst.sessionImpl(),
|
||||||
|
src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset,
|
||||||
|
dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@ForceInline
|
||||||
|
public static long mismatch(AbstractMemorySegmentImpl src, long srcFromOffset, long srcToOffset,
|
||||||
|
AbstractMemorySegmentImpl dst, long dstFromOffset, long dstToOffset) {
|
||||||
|
final long srcBytes = srcToOffset - srcFromOffset;
|
||||||
|
final long dstBytes = dstToOffset - dstFromOffset;
|
||||||
|
src.checkAccess(srcFromOffset, srcBytes, true);
|
||||||
|
dst.checkAccess(dstFromOffset, dstBytes, true);
|
||||||
|
|
||||||
|
final long length = Math.min(srcBytes, dstBytes);
|
||||||
|
final boolean srcAndDstBytesDiffer = srcBytes != dstBytes;
|
||||||
|
|
||||||
|
if (length == 0) {
|
||||||
|
return srcAndDstBytesDiffer ? 0 : -1;
|
||||||
|
} else if (length < NATIVE_THRESHOLD_MISMATCH) {
|
||||||
|
return mismatch(src, srcFromOffset, dst, dstFromOffset, 0, (int) length, srcAndDstBytesDiffer);
|
||||||
|
} else {
|
||||||
|
long i;
|
||||||
|
if (SCOPED_MEMORY_ACCESS.getByte(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset) !=
|
||||||
|
SCOPED_MEMORY_ACCESS.getByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
i = vectorizedMismatchLargeForBytes(src.sessionImpl(), dst.sessionImpl(),
|
||||||
|
src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset,
|
||||||
|
dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset,
|
||||||
|
length);
|
||||||
|
if (i >= 0) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
final long remaining = ~i;
|
||||||
|
assert remaining < 8 : "remaining greater than 7: " + remaining;
|
||||||
|
i = length - remaining;
|
||||||
|
return mismatch(src, srcFromOffset + i, dst, dstFromOffset + i, i, (int) remaining, srcAndDstBytesDiffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mismatch is handled in chunks of 64 (unroll of eight 8s), 8, 4, 2, and 1 byte(s).
|
||||||
|
@ForceInline
|
||||||
|
private static long mismatch(AbstractMemorySegmentImpl src, long srcFromOffset,
|
||||||
|
AbstractMemorySegmentImpl dst, long dstFromOffset,
|
||||||
|
long start, int length, boolean srcAndDstBytesDiffer) {
|
||||||
|
int offset = 0;
|
||||||
|
final int limit = length & (NATIVE_THRESHOLD_MISMATCH - 8);
|
||||||
|
for (; offset < limit; offset += 8) {
|
||||||
|
final long s = SCOPED_MEMORY_ACCESS.getLongUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, !Architecture.isLittleEndian());
|
||||||
|
final long d = SCOPED_MEMORY_ACCESS.getLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, !Architecture.isLittleEndian());
|
||||||
|
if (s != d) {
|
||||||
|
return start + offset + mismatch(s, d);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int remaining = length - offset;
|
||||||
|
// 0...XXX000
|
||||||
|
for (; remaining >= 8; remaining -= 8) {
|
||||||
|
final long s = SCOPED_MEMORY_ACCESS.getLongUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, !Architecture.isLittleEndian());
|
||||||
|
final long d = SCOPED_MEMORY_ACCESS.getLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, !Architecture.isLittleEndian());
|
||||||
|
if (s != d) {
|
||||||
|
return start + offset + mismatch(s, d);
|
||||||
|
}
|
||||||
|
offset += 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 0...0X00
|
||||||
|
if (remaining >= 4) {
|
||||||
|
final int s = SCOPED_MEMORY_ACCESS.getIntUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, !Architecture.isLittleEndian());
|
||||||
|
final int d = SCOPED_MEMORY_ACCESS.getIntUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, !Architecture.isLittleEndian());
|
||||||
|
if (s != d) {
|
||||||
|
return start + offset + mismatch(s, d);
|
||||||
|
}
|
||||||
|
offset += 4;
|
||||||
|
remaining -= 4;
|
||||||
|
}
|
||||||
|
// 0...00X0
|
||||||
|
if (remaining >= 2) {
|
||||||
|
final short s = SCOPED_MEMORY_ACCESS.getShortUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, !Architecture.isLittleEndian());
|
||||||
|
final short d = SCOPED_MEMORY_ACCESS.getShortUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, !Architecture.isLittleEndian());
|
||||||
|
if (s != d) {
|
||||||
|
return start + offset + mismatch(s, d);
|
||||||
|
}
|
||||||
|
offset += 2;
|
||||||
|
remaining -= 2;
|
||||||
|
}
|
||||||
|
// 0...000X
|
||||||
|
if (remaining == 1) {
|
||||||
|
final byte s = SCOPED_MEMORY_ACCESS.getByte(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset);
|
||||||
|
final byte d = SCOPED_MEMORY_ACCESS.getByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset);
|
||||||
|
if (s != d) {
|
||||||
|
return start + offset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return srcAndDstBytesDiffer ? (start + length) : -1;
|
||||||
|
// We have now fully handled 0...0X...XXXX
|
||||||
|
}
|
||||||
|
|
||||||
|
@ForceInline
|
||||||
|
private static int mismatch(long first, long second) {
|
||||||
|
final long x = first ^ second;
|
||||||
|
return (Architecture.isLittleEndian()
|
||||||
|
? Long.numberOfTrailingZeros(x)
|
||||||
|
: Long.numberOfLeadingZeros(x)) / 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
@ForceInline
|
||||||
|
private static int mismatch(int first, int second) {
|
||||||
|
final int x = first ^ second;
|
||||||
|
return (Architecture.isLittleEndian()
|
||||||
|
? Integer.numberOfTrailingZeros(x)
|
||||||
|
: Integer.numberOfLeadingZeros(x)) / 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
@ForceInline
|
||||||
|
private static int mismatch(short first, short second) {
|
||||||
|
if (Architecture.isLittleEndian()) {
|
||||||
|
return ((0xff & first) == (0xff & second)) ? 1 : 0;
|
||||||
|
} else {
|
||||||
|
return ((0xff & first) == (0xff & second)) ? 0 : 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Mismatch over long lengths.
|
||||||
|
*/
|
||||||
|
private static long vectorizedMismatchLargeForBytes(MemorySessionImpl aSession, MemorySessionImpl bSession,
|
||||||
|
Object a, long aOffset,
|
||||||
|
Object b, long bOffset,
|
||||||
|
long length) {
|
||||||
|
long off = 0;
|
||||||
|
long remaining = length;
|
||||||
|
int i, size;
|
||||||
|
boolean lastSubRange = false;
|
||||||
|
while (remaining > 7 && !lastSubRange) {
|
||||||
|
if (remaining > Integer.MAX_VALUE) {
|
||||||
|
size = Integer.MAX_VALUE;
|
||||||
|
} else {
|
||||||
|
size = (int) remaining;
|
||||||
|
lastSubRange = true;
|
||||||
|
}
|
||||||
|
i = SCOPED_MEMORY_ACCESS.vectorizedMismatch(aSession, bSession,
|
||||||
|
a, aOffset + off,
|
||||||
|
b, bOffset + off,
|
||||||
|
size, ArraysSupport.LOG2_ARRAY_BYTE_INDEX_SCALE);
|
||||||
|
if (i >= 0)
|
||||||
|
return off + i;
|
||||||
|
|
||||||
|
i = size - ~i;
|
||||||
|
off += i;
|
||||||
|
remaining -= i;
|
||||||
|
}
|
||||||
|
return ~remaining;
|
||||||
|
}
|
||||||
|
|
||||||
|
static final String PROPERTY_PATH = "java.lang.foreign.native.threshold.power.";
|
||||||
|
|
||||||
|
// The returned value is in the interval [0, 2^30]
|
||||||
|
static int powerOfPropertyOr(String name, int defaultPower) {
|
||||||
|
final int power = Integer.getInteger(PROPERTY_PATH + name, defaultPower);
|
||||||
|
return 1 << Math.clamp(power, 0, Integer.SIZE - 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -29,7 +29,9 @@
|
|||||||
|
|
||||||
import java.lang.foreign.Arena;
|
import java.lang.foreign.Arena;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
|
||||||
import java.lang.foreign.MemorySegment;
|
import java.lang.foreign.MemorySegment;
|
||||||
@ -122,6 +124,68 @@ public class TestMismatch {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void random() {
|
||||||
|
try (var arena = Arena.ofConfined()) {
|
||||||
|
var rnd = new Random(42);
|
||||||
|
for (int size = 1; size < 64; size++) {
|
||||||
|
// Repeat a fair number of rounds
|
||||||
|
for (int i = 0; i < 147; i++) {
|
||||||
|
var src = arena.allocate(size);
|
||||||
|
// The dst segment might be zero to eight bytes longer
|
||||||
|
var dst = arena.allocate(size + rnd.nextInt(8 + 1));
|
||||||
|
// Fill the src with random data
|
||||||
|
for (int j = 0; j < size; j++) {
|
||||||
|
src.set(ValueLayout.JAVA_BYTE, j, randomByte(rnd));
|
||||||
|
}
|
||||||
|
// copy the random data from src to dst
|
||||||
|
dst.copyFrom(src);
|
||||||
|
// Fill the rest (if any) of the dst with random data
|
||||||
|
for (long j = src.byteSize(); j < dst.byteSize(); j++) {
|
||||||
|
dst.set(ValueLayout.JAVA_BYTE, j, randomByte(rnd));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rnd.nextBoolean()) {
|
||||||
|
// In this branch, we inject one or more deviating bytes
|
||||||
|
int beginDiff = rnd.nextInt(size);
|
||||||
|
int endDiff = rnd.nextInt(beginDiff, size);
|
||||||
|
for (int d = beginDiff; d <= endDiff; d++) {
|
||||||
|
byte existing = dst.get(ValueLayout.JAVA_BYTE, d);
|
||||||
|
// Make sure we never get back the same value
|
||||||
|
byte mutatedValue;
|
||||||
|
do {
|
||||||
|
mutatedValue = randomByte(rnd);
|
||||||
|
} while (existing == mutatedValue);
|
||||||
|
dst.set(ValueLayout.JAVA_BYTE, d, mutatedValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
// They are not equal and differs in position beginDiff
|
||||||
|
assertEquals(src.mismatch(dst), beginDiff);
|
||||||
|
assertEquals(dst.mismatch(src), beginDiff);
|
||||||
|
} else {
|
||||||
|
// In this branch, there is no injection
|
||||||
|
|
||||||
|
if (src.byteSize() == dst.byteSize()) {
|
||||||
|
// The content matches and they are of equal size
|
||||||
|
assertEquals(src.mismatch(dst), -1);
|
||||||
|
assertEquals(dst.mismatch(src), -1);
|
||||||
|
} else {
|
||||||
|
// The content matches but they are of different length
|
||||||
|
// Remember, the size of src is always smaller or equal
|
||||||
|
// to the size of dst.
|
||||||
|
assertEquals(src.mismatch(dst), src.byteSize());
|
||||||
|
assertEquals(dst.mismatch(src), src.byteSize());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static byte randomByte(Random rnd) {
|
||||||
|
return (byte) rnd.nextInt(Byte.MIN_VALUE, Byte.MAX_VALUE + 1);
|
||||||
|
}
|
||||||
|
|
||||||
@Test(dataProvider = "slices")
|
@Test(dataProvider = "slices")
|
||||||
public void testDifferentValues(MemorySegment s1, MemorySegment s2) {
|
public void testDifferentValues(MemorySegment s1, MemorySegment s2) {
|
||||||
out.format("testDifferentValues s1:%s, s2:%s\n", s1, s2);
|
out.format("testDifferentValues s1:%s, s2:%s\n", s1, s2);
|
||||||
|
@ -41,21 +41,16 @@ import java.lang.foreign.MemorySegment;
|
|||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
import static java.lang.foreign.ValueLayout.*;
|
|
||||||
|
|
||||||
@BenchmarkMode(Mode.AverageTime)
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||||
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||||
@State(Scope.Thread)
|
@State(Scope.Thread)
|
||||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||||
@Fork(value = 3)
|
@Fork(value = 3)
|
||||||
public class CopyTest {
|
public class SegmentBulkCopy {
|
||||||
|
|
||||||
@Param({"0", "1", "2", "3", "4", "5", "6", "7", "8",
|
@Param({"2", "3", "4", "5", "6", "7", "8", "64", "512",
|
||||||
"9", "10", "11", "12", "13", "14", "15", "16",
|
"4096", "32768", "262144", "2097152", "16777216", "134217728"})
|
||||||
"17", "18", "19", "20", "21", "22", "23", "24",
|
|
||||||
"25", "26", "27", "28", "29", "30", "31", "32",
|
|
||||||
"33", "36", "40", "44", "48", "52", "56", "60", "63", "64", "128"})
|
|
||||||
public int ELEM_SIZE;
|
public int ELEM_SIZE;
|
||||||
|
|
||||||
byte[] srcArray;
|
byte[] srcArray;
|
||||||
@ -80,28 +75,37 @@ public class CopyTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public void array_copy() {
|
public void arrayCopy() {
|
||||||
System.arraycopy(srcArray, 0, dstArray, 0, ELEM_SIZE);
|
System.arraycopy(srcArray, 0, dstArray, 0, ELEM_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public void heap_segment_copy5Arg() {
|
public void bufferCopy() {
|
||||||
MemorySegment.copy(heapSrcSegment, 0, heapDstSegment, 0, ELEM_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Benchmark
|
|
||||||
public void native_segment_copy5Arg() {
|
|
||||||
MemorySegment.copy(nativeSrcSegment, 0, nativeDstSegment, 0, ELEM_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Benchmark
|
|
||||||
public void heap_segment_copy7arg() {
|
|
||||||
MemorySegment.copy(heapSrcSegment, JAVA_BYTE, 0, heapDstSegment, JAVA_BYTE, 0, ELEM_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Benchmark
|
|
||||||
public void buffer_copy() {
|
|
||||||
dstBuffer.put(srcBuffer);
|
dstBuffer.put(srcBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.copy=31"})
|
||||||
|
@Benchmark
|
||||||
|
public void heapSegmentCopyJava() {
|
||||||
|
MemorySegment.copy(heapSrcSegment, 0, heapDstSegment, 0, ELEM_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.copy=0"})
|
||||||
|
@Benchmark
|
||||||
|
public void heapSegmentCopyUnsafe() {
|
||||||
|
MemorySegment.copy(heapSrcSegment, 0, heapDstSegment, 0, ELEM_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.copy=31"})
|
||||||
|
@Benchmark
|
||||||
|
public void nativeSegmentCopyJava() {
|
||||||
|
MemorySegment.copy(nativeSrcSegment, 0, nativeDstSegment, 0, ELEM_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.copy=0"})
|
||||||
|
@Benchmark
|
||||||
|
public void nativeSegmentCopyUnsafe() {
|
||||||
|
MemorySegment.copy(nativeSrcSegment, 0, nativeDstSegment, 0, ELEM_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -48,13 +48,10 @@ import java.util.concurrent.TimeUnit;
|
|||||||
@State(Scope.Thread)
|
@State(Scope.Thread)
|
||||||
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||||
@Fork(value = 3)
|
@Fork(value = 3)
|
||||||
public class TestFill {
|
public class SegmentBulkFill {
|
||||||
|
|
||||||
@Param({"0", "1", "2", "3", "4", "5", "6", "7",
|
@Param({"2", "3", "4", "5", "6", "7", "8", "64", "512",
|
||||||
"8", "9", "10", "11", "12", "13", "14", "15",
|
"4096", "32768", "262144", "2097152", "16777216", "134217728"})
|
||||||
"16", "17", "18", "19", "20", "21", "22", "23",
|
|
||||||
"24", "25", "26", "27", "28", "29", "30", "31",
|
|
||||||
"32", "128", "256", "384", "511", "512"})
|
|
||||||
public int ELEM_SIZE;
|
public int ELEM_SIZE;
|
||||||
|
|
||||||
byte[] array;
|
byte[] array;
|
||||||
@ -73,22 +70,43 @@ public class TestFill {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public void arrays_fill() {
|
public void arraysFill() {
|
||||||
Arrays.fill(array, (byte) 0);
|
Arrays.fill(array, (byte) 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=31"})
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public void heap_segment_fill() {
|
public void heapSegmentFillJava() {
|
||||||
heapSegment.fill((byte) 0);
|
heapSegment.fill((byte) 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=0"})
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public void native_segment_fill() {
|
public void heapSegmentFillUnsafe() {
|
||||||
|
heapSegment.fill((byte) 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=31"})
|
||||||
|
@Benchmark
|
||||||
|
public void nativeSegmentFillJava() {
|
||||||
nativeSegment.fill((byte) 0);
|
nativeSegment.fill((byte) 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=0"})
|
||||||
@Benchmark
|
@Benchmark
|
||||||
public void unaligned_segment_fill() {
|
public void nativeSegmentFillUnsafe() {
|
||||||
|
nativeSegment.fill((byte) 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=31"})
|
||||||
|
@Benchmark
|
||||||
|
public void unalignedSegmentFillJava() {
|
||||||
|
unalignedSegment.fill((byte) 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=0"})
|
||||||
|
@Benchmark
|
||||||
|
public void unalignedSegmentFillUnsafe() {
|
||||||
unalignedSegment.fill((byte) 0);
|
unalignedSegment.fill((byte) 0);
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,112 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.openjdk.bench.java.lang.foreign;
|
||||||
|
|
||||||
|
import org.openjdk.jmh.annotations.Benchmark;
|
||||||
|
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||||
|
import org.openjdk.jmh.annotations.Fork;
|
||||||
|
import org.openjdk.jmh.annotations.Measurement;
|
||||||
|
import org.openjdk.jmh.annotations.Mode;
|
||||||
|
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||||
|
import org.openjdk.jmh.annotations.Param;
|
||||||
|
import org.openjdk.jmh.annotations.Scope;
|
||||||
|
import org.openjdk.jmh.annotations.Setup;
|
||||||
|
import org.openjdk.jmh.annotations.State;
|
||||||
|
import org.openjdk.jmh.annotations.Warmup;
|
||||||
|
|
||||||
|
import java.lang.foreign.Arena;
|
||||||
|
import java.lang.foreign.MemorySegment;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import static java.lang.foreign.ValueLayout.*;
|
||||||
|
|
||||||
|
@BenchmarkMode(Mode.AverageTime)
|
||||||
|
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||||
|
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
|
||||||
|
@State(Scope.Thread)
|
||||||
|
@OutputTimeUnit(TimeUnit.NANOSECONDS)
|
||||||
|
@Fork(value = 3)
|
||||||
|
public class SegmentBulkMismatch {
|
||||||
|
|
||||||
|
@Param({"2", "3", "4", "5", "6", "7", "8", "64", "512",
|
||||||
|
"4096", "32768", "262144", "2097152", "16777216", "134217728"})
|
||||||
|
public int ELEM_SIZE;
|
||||||
|
|
||||||
|
MemorySegment srcNative;
|
||||||
|
MemorySegment dstNative;
|
||||||
|
byte[] srcArray;
|
||||||
|
byte[] dstArray;
|
||||||
|
MemorySegment srcHeap;
|
||||||
|
MemorySegment dstHeap;
|
||||||
|
|
||||||
|
@Setup
|
||||||
|
public void setup() {
|
||||||
|
// Always use the same alignment regardless of size
|
||||||
|
srcNative = Arena.ofAuto().allocate(ELEM_SIZE,16);
|
||||||
|
dstNative = Arena.ofAuto().allocate(ELEM_SIZE, 16);
|
||||||
|
var rnd = new Random(42);
|
||||||
|
for (int i = 0; i < ELEM_SIZE; i++) {
|
||||||
|
srcNative.set(JAVA_BYTE, i, (byte) rnd.nextInt(Byte.MIN_VALUE, Byte.MAX_VALUE));
|
||||||
|
}
|
||||||
|
dstNative.copyFrom(srcNative);
|
||||||
|
srcArray = srcNative.toArray(JAVA_BYTE);
|
||||||
|
dstArray = dstNative.toArray(JAVA_BYTE);
|
||||||
|
srcHeap = MemorySegment.ofArray(srcArray);
|
||||||
|
dstHeap = MemorySegment.ofArray(dstArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.mismatch=31"})
|
||||||
|
@Benchmark
|
||||||
|
public long nativeSegmentJava() {
|
||||||
|
return srcNative.mismatch(dstNative);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.mismatch=31"})
|
||||||
|
@Benchmark
|
||||||
|
public long heapSegmentJava() {
|
||||||
|
return srcHeap.mismatch(dstHeap);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.mismatch=0"})
|
||||||
|
@Benchmark
|
||||||
|
public long nativeSegmentUnsafe() {
|
||||||
|
return srcNative.mismatch(dstNative);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.mismatch=0"})
|
||||||
|
@Benchmark
|
||||||
|
public long heapSegmentUnsafe() {
|
||||||
|
return srcHeap.mismatch(dstHeap);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Benchmark
|
||||||
|
public long array() {
|
||||||
|
return Arrays.mismatch(srcArray, dstArray);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user