8339531: Improve performance of MemorySegment::mismatch

Reviewed-by: mcimadamore
This commit is contained in:
Per Minborg 2024-09-12 18:31:08 +00:00
parent ab9b72c50a
commit 81ff91ef27
7 changed files with 559 additions and 211 deletions
src/java.base/share/classes
test
jdk/java/foreign
micro/org/openjdk/bench/java/lang/foreign

@ -43,6 +43,7 @@ import java.util.function.Consumer;
import java.util.stream.Stream; import java.util.stream.Stream;
import jdk.internal.foreign.AbstractMemorySegmentImpl; import jdk.internal.foreign.AbstractMemorySegmentImpl;
import jdk.internal.foreign.MemorySessionImpl; import jdk.internal.foreign.MemorySessionImpl;
import jdk.internal.foreign.SegmentBulkOperations;
import jdk.internal.foreign.SegmentFactories; import jdk.internal.foreign.SegmentFactories;
import jdk.internal.javac.Restricted; import jdk.internal.javac.Restricted;
import jdk.internal.reflect.CallerSensitive; import jdk.internal.reflect.CallerSensitive;
@ -1571,7 +1572,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
static void copy(MemorySegment srcSegment, long srcOffset, static void copy(MemorySegment srcSegment, long srcOffset,
MemorySegment dstSegment, long dstOffset, long bytes) { MemorySegment dstSegment, long dstOffset, long bytes) {
AbstractMemorySegmentImpl.copy((AbstractMemorySegmentImpl) srcSegment, srcOffset, SegmentBulkOperations.copy((AbstractMemorySegmentImpl) srcSegment, srcOffset,
(AbstractMemorySegmentImpl) dstSegment, dstOffset, (AbstractMemorySegmentImpl) dstSegment, dstOffset,
bytes); bytes);
} }
@ -2635,8 +2636,9 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
*/ */
static long mismatch(MemorySegment srcSegment, long srcFromOffset, long srcToOffset, static long mismatch(MemorySegment srcSegment, long srcFromOffset, long srcToOffset,
MemorySegment dstSegment, long dstFromOffset, long dstToOffset) { MemorySegment dstSegment, long dstFromOffset, long dstToOffset) {
return AbstractMemorySegmentImpl.mismatch(srcSegment, srcFromOffset, srcToOffset, return SegmentBulkOperations.mismatch(
dstSegment, dstFromOffset, dstToOffset); (AbstractMemorySegmentImpl)Objects.requireNonNull(srcSegment), srcFromOffset, srcToOffset,
(AbstractMemorySegmentImpl)Objects.requireNonNull(dstSegment), dstFromOffset, dstToOffset);
} }
/** /**

@ -72,8 +72,6 @@ public abstract sealed class AbstractMemorySegmentImpl
implements MemorySegment, SegmentAllocator, BiFunction<String, List<Number>, RuntimeException> implements MemorySegment, SegmentAllocator, BiFunction<String, List<Number>, RuntimeException>
permits HeapMemorySegmentImpl, NativeMemorySegmentImpl { permits HeapMemorySegmentImpl, NativeMemorySegmentImpl {
private static final ScopedMemoryAccess SCOPED_MEMORY_ACCESS = ScopedMemoryAccess.getScopedMemoryAccess();
static final JavaNioAccess NIO_ACCESS = SharedSecrets.getJavaNioAccess(); static final JavaNioAccess NIO_ACCESS = SharedSecrets.getJavaNioAccess();
final long length; final long length;
@ -189,53 +187,10 @@ public abstract sealed class AbstractMemorySegmentImpl
return StreamSupport.stream(spliterator(elementLayout), false); return StreamSupport.stream(spliterator(elementLayout), false);
} }
// FILL_NATIVE_THRESHOLD must be a power of two and should be greater than 2^3
// Update the value for Aarch64 once 8338975 is fixed.
private static final long FILL_NATIVE_THRESHOLD = 1L << (Architecture.isAARCH64() ? 10 : 5);
@Override
@ForceInline @ForceInline
@Override
public final MemorySegment fill(byte value) { public final MemorySegment fill(byte value) {
checkReadOnly(false); return SegmentBulkOperations.fill(this, value);
if (length == 0) {
// Implicit state check
checkValidState();
} else if (length < FILL_NATIVE_THRESHOLD) {
// 0 <= length < FILL_NATIVE_LIMIT : 0...0X...XXXX
// Handle smaller segments directly without transitioning to native code
final long u = Byte.toUnsignedLong(value);
final long longValue = u << 56 | u << 48 | u << 40 | u << 32 | u << 24 | u << 16 | u << 8 | u;
int offset = 0;
// 0...0X...X000
final int limit = (int) (length & (FILL_NATIVE_THRESHOLD - 8));
for (; offset < limit; offset += 8) {
SCOPED_MEMORY_ACCESS.putLong(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, longValue);
}
int remaining = (int) length - limit;
// 0...0X00
if (remaining >= 4) {
SCOPED_MEMORY_ACCESS.putInt(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, (int) longValue);
offset += 4;
remaining -= 4;
}
// 0...00X0
if (remaining >= 2) {
SCOPED_MEMORY_ACCESS.putShort(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, (short) longValue);
offset += 2;
remaining -= 2;
}
// 0...000X
if (remaining == 1) {
SCOPED_MEMORY_ACCESS.putByte(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, value);
}
// We have now fully handled 0...0X...XXXX
} else {
// Handle larger segments via native calls
SCOPED_MEMORY_ACCESS.setMemory(sessionImpl(), unsafeGetBase(), unsafeGetOffset(), length, value);
}
return this;
} }
@Override @Override
@ -244,38 +199,6 @@ public abstract sealed class AbstractMemorySegmentImpl
return asSlice(0, byteSize, byteAlignment); return asSlice(0, byteSize, byteAlignment);
} }
/**
* Mismatch over long lengths.
*/
public static long vectorizedMismatchLargeForBytes(MemorySessionImpl aSession, MemorySessionImpl bSession,
Object a, long aOffset,
Object b, long bOffset,
long length) {
long off = 0;
long remaining = length;
int i, size;
boolean lastSubRange = false;
while (remaining > 7 && !lastSubRange) {
if (remaining > Integer.MAX_VALUE) {
size = Integer.MAX_VALUE;
} else {
size = (int) remaining;
lastSubRange = true;
}
i = SCOPED_MEMORY_ACCESS.vectorizedMismatch(aSession, bSession,
a, aOffset + off,
b, bOffset + off,
size, ArraysSupport.LOG2_ARRAY_BYTE_INDEX_SCALE);
if (i >= 0)
return off + i;
i = size - ~i;
off += i;
remaining -= i;
}
return ~remaining;
}
@Override @Override
public final ByteBuffer asByteBuffer() { public final ByteBuffer asByteBuffer() {
checkArraySize("ByteBuffer", 1); checkArraySize("ByteBuffer", 1);
@ -314,7 +237,7 @@ public abstract sealed class AbstractMemorySegmentImpl
} }
@ForceInline @ForceInline
private boolean overlaps(AbstractMemorySegmentImpl that) { boolean overlaps(AbstractMemorySegmentImpl that) {
if (unsafeGetBase() == that.unsafeGetBase()) { // both either native or the same heap segment if (unsafeGetBase() == that.unsafeGetBase()) { // both either native or the same heap segment
final long thisStart = this.unsafeGetOffset(); final long thisStart = this.unsafeGetOffset();
final long thatStart = that.unsafeGetOffset(); final long thatStart = that.unsafeGetOffset();
@ -334,7 +257,8 @@ public abstract sealed class AbstractMemorySegmentImpl
@Override @Override
public long mismatch(MemorySegment other) { public long mismatch(MemorySegment other) {
Objects.requireNonNull(other); Objects.requireNonNull(other);
return MemorySegment.mismatch(this, 0, byteSize(), other, 0, other.byteSize()); return SegmentBulkOperations.mismatch(this, 0, byteSize(),
(AbstractMemorySegmentImpl) other, 0, other.byteSize());
} }
@Override @Override
@ -650,64 +574,6 @@ public abstract sealed class AbstractMemorySegmentImpl
} }
} }
// COPY_NATIVE_THRESHOLD must be a power of two and should be greater than 2^3
private static final long COPY_NATIVE_THRESHOLD = 1 << 6;
@ForceInline
public static void copy(AbstractMemorySegmentImpl src, long srcOffset,
AbstractMemorySegmentImpl dst, long dstOffset,
long size) {
Utils.checkNonNegativeIndex(size, "size");
// Implicit null check for src and dst
src.checkAccess(srcOffset, size, true);
dst.checkAccess(dstOffset, size, false);
if (size <= 0) {
// Do nothing
} else if (size < COPY_NATIVE_THRESHOLD && !src.overlaps(dst)) {
// 0 < size < FILL_NATIVE_LIMIT : 0...0X...XXXX
//
// Strictly, we could check for !src.asSlice(srcOffset, size).overlaps(dst.asSlice(dstOffset, size) but
// this is a bit slower and it likely very unusual there is any difference in the outcome. Also, if there
// is an overlap, we could tolerate one particular direction of overlap (but not the other).
// 0...0X...X000
final int limit = (int) (size & (COPY_NATIVE_THRESHOLD - 8));
int offset = 0;
for (; offset < limit; offset += 8) {
final long v = SCOPED_MEMORY_ACCESS.getLong(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
SCOPED_MEMORY_ACCESS.putLong(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
}
int remaining = (int) size - offset;
// 0...0X00
if (remaining >= 4) {
final int v = SCOPED_MEMORY_ACCESS.getInt(src.sessionImpl(), src.unsafeGetBase(),src.unsafeGetOffset() + srcOffset + offset);
SCOPED_MEMORY_ACCESS.putInt(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
offset += 4;
remaining -= 4;
}
// 0...00X0
if (remaining >= 2) {
final short v = SCOPED_MEMORY_ACCESS.getShort(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
SCOPED_MEMORY_ACCESS.putShort(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
offset += 2;
remaining -=2;
}
// 0...000X
if (remaining == 1) {
final byte v = SCOPED_MEMORY_ACCESS.getByte(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
SCOPED_MEMORY_ACCESS.putByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
}
// We have now fully handled 0...0X...XXXX
} else {
// For larger sizes, the transition to native code pays off
SCOPED_MEMORY_ACCESS.copyMemory(src.sessionImpl(), dst.sessionImpl(),
src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset,
dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset, size);
}
}
@ForceInline @ForceInline
public static void copy(MemorySegment srcSegment, ValueLayout srcElementLayout, long srcOffset, public static void copy(MemorySegment srcSegment, ValueLayout srcElementLayout, long srcOffset,
MemorySegment dstSegment, ValueLayout dstElementLayout, long dstOffset, MemorySegment dstSegment, ValueLayout dstElementLayout, long dstOffset,
@ -794,40 +660,6 @@ public abstract sealed class AbstractMemorySegmentImpl
} }
} }
public static long mismatch(MemorySegment srcSegment, long srcFromOffset, long srcToOffset,
MemorySegment dstSegment, long dstFromOffset, long dstToOffset) {
AbstractMemorySegmentImpl srcImpl = (AbstractMemorySegmentImpl)Objects.requireNonNull(srcSegment);
AbstractMemorySegmentImpl dstImpl = (AbstractMemorySegmentImpl)Objects.requireNonNull(dstSegment);
long srcBytes = srcToOffset - srcFromOffset;
long dstBytes = dstToOffset - dstFromOffset;
srcImpl.checkAccess(srcFromOffset, srcBytes, true);
dstImpl.checkAccess(dstFromOffset, dstBytes, true);
long bytes = Math.min(srcBytes, dstBytes);
long i = 0;
if (bytes > 7) {
if (srcImpl.get(JAVA_BYTE, srcFromOffset) != dstImpl.get(JAVA_BYTE, dstFromOffset)) {
return 0;
}
i = AbstractMemorySegmentImpl.vectorizedMismatchLargeForBytes(srcImpl.sessionImpl(), dstImpl.sessionImpl(),
srcImpl.unsafeGetBase(), srcImpl.unsafeGetOffset() + srcFromOffset,
dstImpl.unsafeGetBase(), dstImpl.unsafeGetOffset() + dstFromOffset,
bytes);
if (i >= 0) {
return i;
}
long remaining = ~i;
assert remaining < 8 : "remaining greater than 7: " + remaining;
i = bytes - remaining;
}
for (; i < bytes; i++) {
if (srcImpl.get(JAVA_BYTE, srcFromOffset + i) != dstImpl.get(JAVA_BYTE, dstFromOffset + i)) {
return i;
}
}
return srcBytes != dstBytes ? bytes : -1;
}
private static int getScaleFactor(Buffer buffer) { private static int getScaleFactor(Buffer buffer) {
return switch (buffer) { return switch (buffer) {
case ByteBuffer _ -> 0; case ByteBuffer _ -> 0;

@ -0,0 +1,316 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.internal.foreign;
import jdk.internal.misc.ScopedMemoryAccess;
import jdk.internal.util.Architecture;
import jdk.internal.util.ArraysSupport;
import jdk.internal.vm.annotation.ForceInline;
import java.lang.foreign.MemorySegment;
/**
* This class contains optimized bulk operation methods that operate on one or several
* memory segments.
* <p>
* Generally, the methods attempt to work with as-large-as-possible units of memory at
* a time.
* <p>
* It should be noted that when invoking scoped memory access get/set operations, it
* is imperative from a performance perspective to convey the sharp types from the
* call site in order for the compiler to pick the correct Unsafe access variant.
*/
public final class SegmentBulkOperations {
private SegmentBulkOperations() {}
private static final ScopedMemoryAccess SCOPED_MEMORY_ACCESS = ScopedMemoryAccess.getScopedMemoryAccess();
// All the threshold values below MUST be a power of two and should preferably be
// greater or equal to 2^3.
// Update the FILL value for Aarch64 once 8338975 is fixed.
private static final int NATIVE_THRESHOLD_FILL = powerOfPropertyOr("fill", Architecture.isAARCH64() ? 10 : 5);
private static final int NATIVE_THRESHOLD_MISMATCH = powerOfPropertyOr("mismatch", 6);
private static final int NATIVE_THRESHOLD_COPY = powerOfPropertyOr("copy", 6);
@ForceInline
public static MemorySegment fill(AbstractMemorySegmentImpl dst, byte value) {
dst.checkReadOnly(false);
if (dst.length == 0) {
// Implicit state check
dst.checkValidState();
} else if (dst.length < NATIVE_THRESHOLD_FILL) {
// 0 <= length < FILL_NATIVE_LIMIT : 0...0X...XXXX
// Handle smaller segments directly without transitioning to native code
final long u = Byte.toUnsignedLong(value);
final long longValue = u << 56 | u << 48 | u << 40 | u << 32 | u << 24 | u << 16 | u << 8 | u;
int offset = 0;
// 0...0X...X000
final int limit = (int) (dst.length & (NATIVE_THRESHOLD_FILL - 8));
for (; offset < limit; offset += 8) {
SCOPED_MEMORY_ACCESS.putLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, longValue, !Architecture.isLittleEndian());
}
int remaining = (int) dst.length - limit;
// 0...0X00
if (remaining >= 4) {
SCOPED_MEMORY_ACCESS.putIntUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, (int) longValue, !Architecture.isLittleEndian());
offset += 4;
remaining -= 4;
}
// 0...00X0
if (remaining >= 2) {
SCOPED_MEMORY_ACCESS.putShortUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, (short) longValue, !Architecture.isLittleEndian());
offset += 2;
remaining -= 2;
}
// 0...000X
if (remaining == 1) {
SCOPED_MEMORY_ACCESS.putByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, value);
}
// We have now fully handled 0...0X...XXXX
} else {
// Handle larger segments via native calls
SCOPED_MEMORY_ACCESS.setMemory(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset(), dst.length, value);
}
return dst;
}
@ForceInline
public static void copy(AbstractMemorySegmentImpl src, long srcOffset,
AbstractMemorySegmentImpl dst, long dstOffset,
long size) {
Utils.checkNonNegativeIndex(size, "size");
// Implicit null check for src and dst
src.checkAccess(srcOffset, size, true);
dst.checkAccess(dstOffset, size, false);
if (size <= 0) {
// Do nothing
} else if (size < NATIVE_THRESHOLD_COPY && !src.overlaps(dst)) {
// 0 < size < FILL_NATIVE_LIMIT : 0...0X...XXXX
//
// Strictly, we could check for !src.asSlice(srcOffset, size).overlaps(dst.asSlice(dstOffset, size) but
// this is a bit slower and it likely very unusual there is any difference in the outcome. Also, if there
// is an overlap, we could tolerate one particular direction of overlap (but not the other).
// 0...0X...X000
final int limit = (int) (size & (NATIVE_THRESHOLD_COPY - 8));
int offset = 0;
for (; offset < limit; offset += 8) {
final long v = SCOPED_MEMORY_ACCESS.getLongUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset, !Architecture.isLittleEndian());
SCOPED_MEMORY_ACCESS.putLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v, !Architecture.isLittleEndian());
}
int remaining = (int) size - offset;
// 0...0X00
if (remaining >= 4) {
final int v = SCOPED_MEMORY_ACCESS.getIntUnaligned(src.sessionImpl(), src.unsafeGetBase(),src.unsafeGetOffset() + srcOffset + offset, !Architecture.isLittleEndian());
SCOPED_MEMORY_ACCESS.putIntUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v, !Architecture.isLittleEndian());
offset += 4;
remaining -= 4;
}
// 0...00X0
if (remaining >= 2) {
final short v = SCOPED_MEMORY_ACCESS.getShortUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset, !Architecture.isLittleEndian());
SCOPED_MEMORY_ACCESS.putShortUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v, !Architecture.isLittleEndian());
offset += 2;
remaining -=2;
}
// 0...000X
if (remaining == 1) {
final byte v = SCOPED_MEMORY_ACCESS.getByte(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
SCOPED_MEMORY_ACCESS.putByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
}
// We have now fully handled 0...0X...XXXX
} else {
// For larger sizes, the transition to native code pays off
SCOPED_MEMORY_ACCESS.copyMemory(src.sessionImpl(), dst.sessionImpl(),
src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset,
dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset, size);
}
}
@ForceInline
public static long mismatch(AbstractMemorySegmentImpl src, long srcFromOffset, long srcToOffset,
AbstractMemorySegmentImpl dst, long dstFromOffset, long dstToOffset) {
final long srcBytes = srcToOffset - srcFromOffset;
final long dstBytes = dstToOffset - dstFromOffset;
src.checkAccess(srcFromOffset, srcBytes, true);
dst.checkAccess(dstFromOffset, dstBytes, true);
final long length = Math.min(srcBytes, dstBytes);
final boolean srcAndDstBytesDiffer = srcBytes != dstBytes;
if (length == 0) {
return srcAndDstBytesDiffer ? 0 : -1;
} else if (length < NATIVE_THRESHOLD_MISMATCH) {
return mismatch(src, srcFromOffset, dst, dstFromOffset, 0, (int) length, srcAndDstBytesDiffer);
} else {
long i;
if (SCOPED_MEMORY_ACCESS.getByte(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset) !=
SCOPED_MEMORY_ACCESS.getByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset)) {
return 0;
}
i = vectorizedMismatchLargeForBytes(src.sessionImpl(), dst.sessionImpl(),
src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset,
dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset,
length);
if (i >= 0) {
return i;
}
final long remaining = ~i;
assert remaining < 8 : "remaining greater than 7: " + remaining;
i = length - remaining;
return mismatch(src, srcFromOffset + i, dst, dstFromOffset + i, i, (int) remaining, srcAndDstBytesDiffer);
}
}
// Mismatch is handled in chunks of 64 (unroll of eight 8s), 8, 4, 2, and 1 byte(s).
@ForceInline
private static long mismatch(AbstractMemorySegmentImpl src, long srcFromOffset,
AbstractMemorySegmentImpl dst, long dstFromOffset,
long start, int length, boolean srcAndDstBytesDiffer) {
int offset = 0;
final int limit = length & (NATIVE_THRESHOLD_MISMATCH - 8);
for (; offset < limit; offset += 8) {
final long s = SCOPED_MEMORY_ACCESS.getLongUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, !Architecture.isLittleEndian());
final long d = SCOPED_MEMORY_ACCESS.getLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, !Architecture.isLittleEndian());
if (s != d) {
return start + offset + mismatch(s, d);
}
}
int remaining = length - offset;
// 0...XXX000
for (; remaining >= 8; remaining -= 8) {
final long s = SCOPED_MEMORY_ACCESS.getLongUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, !Architecture.isLittleEndian());
final long d = SCOPED_MEMORY_ACCESS.getLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, !Architecture.isLittleEndian());
if (s != d) {
return start + offset + mismatch(s, d);
}
offset += 8;
}
// 0...0X00
if (remaining >= 4) {
final int s = SCOPED_MEMORY_ACCESS.getIntUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, !Architecture.isLittleEndian());
final int d = SCOPED_MEMORY_ACCESS.getIntUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, !Architecture.isLittleEndian());
if (s != d) {
return start + offset + mismatch(s, d);
}
offset += 4;
remaining -= 4;
}
// 0...00X0
if (remaining >= 2) {
final short s = SCOPED_MEMORY_ACCESS.getShortUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, !Architecture.isLittleEndian());
final short d = SCOPED_MEMORY_ACCESS.getShortUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, !Architecture.isLittleEndian());
if (s != d) {
return start + offset + mismatch(s, d);
}
offset += 2;
remaining -= 2;
}
// 0...000X
if (remaining == 1) {
final byte s = SCOPED_MEMORY_ACCESS.getByte(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset);
final byte d = SCOPED_MEMORY_ACCESS.getByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset);
if (s != d) {
return start + offset;
}
}
return srcAndDstBytesDiffer ? (start + length) : -1;
// We have now fully handled 0...0X...XXXX
}
@ForceInline
private static int mismatch(long first, long second) {
final long x = first ^ second;
return (Architecture.isLittleEndian()
? Long.numberOfTrailingZeros(x)
: Long.numberOfLeadingZeros(x)) / 8;
}
@ForceInline
private static int mismatch(int first, int second) {
final int x = first ^ second;
return (Architecture.isLittleEndian()
? Integer.numberOfTrailingZeros(x)
: Integer.numberOfLeadingZeros(x)) / 8;
}
@ForceInline
private static int mismatch(short first, short second) {
if (Architecture.isLittleEndian()) {
return ((0xff & first) == (0xff & second)) ? 1 : 0;
} else {
return ((0xff & first) == (0xff & second)) ? 0 : 1;
}
}
/**
* Mismatch over long lengths.
*/
private static long vectorizedMismatchLargeForBytes(MemorySessionImpl aSession, MemorySessionImpl bSession,
Object a, long aOffset,
Object b, long bOffset,
long length) {
long off = 0;
long remaining = length;
int i, size;
boolean lastSubRange = false;
while (remaining > 7 && !lastSubRange) {
if (remaining > Integer.MAX_VALUE) {
size = Integer.MAX_VALUE;
} else {
size = (int) remaining;
lastSubRange = true;
}
i = SCOPED_MEMORY_ACCESS.vectorizedMismatch(aSession, bSession,
a, aOffset + off,
b, bOffset + off,
size, ArraysSupport.LOG2_ARRAY_BYTE_INDEX_SCALE);
if (i >= 0)
return off + i;
i = size - ~i;
off += i;
remaining -= i;
}
return ~remaining;
}
static final String PROPERTY_PATH = "java.lang.foreign.native.threshold.power.";
// The returned value is in the interval [0, 2^30]
static int powerOfPropertyOr(String name, int defaultPower) {
final int power = Integer.getInteger(PROPERTY_PATH + name, defaultPower);
return 1 << Math.clamp(power, 0, Integer.SIZE - 2);
}
}

@ -29,7 +29,9 @@
import java.lang.foreign.Arena; import java.lang.foreign.Arena;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Random;
import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.AtomicReference;
import java.lang.foreign.MemorySegment; import java.lang.foreign.MemorySegment;
@ -122,6 +124,68 @@ public class TestMismatch {
} }
} }
@Test
public void random() {
try (var arena = Arena.ofConfined()) {
var rnd = new Random(42);
for (int size = 1; size < 64; size++) {
// Repeat a fair number of rounds
for (int i = 0; i < 147; i++) {
var src = arena.allocate(size);
// The dst segment might be zero to eight bytes longer
var dst = arena.allocate(size + rnd.nextInt(8 + 1));
// Fill the src with random data
for (int j = 0; j < size; j++) {
src.set(ValueLayout.JAVA_BYTE, j, randomByte(rnd));
}
// copy the random data from src to dst
dst.copyFrom(src);
// Fill the rest (if any) of the dst with random data
for (long j = src.byteSize(); j < dst.byteSize(); j++) {
dst.set(ValueLayout.JAVA_BYTE, j, randomByte(rnd));
}
if (rnd.nextBoolean()) {
// In this branch, we inject one or more deviating bytes
int beginDiff = rnd.nextInt(size);
int endDiff = rnd.nextInt(beginDiff, size);
for (int d = beginDiff; d <= endDiff; d++) {
byte existing = dst.get(ValueLayout.JAVA_BYTE, d);
// Make sure we never get back the same value
byte mutatedValue;
do {
mutatedValue = randomByte(rnd);
} while (existing == mutatedValue);
dst.set(ValueLayout.JAVA_BYTE, d, mutatedValue);
}
// They are not equal and differs in position beginDiff
assertEquals(src.mismatch(dst), beginDiff);
assertEquals(dst.mismatch(src), beginDiff);
} else {
// In this branch, there is no injection
if (src.byteSize() == dst.byteSize()) {
// The content matches and they are of equal size
assertEquals(src.mismatch(dst), -1);
assertEquals(dst.mismatch(src), -1);
} else {
// The content matches but they are of different length
// Remember, the size of src is always smaller or equal
// to the size of dst.
assertEquals(src.mismatch(dst), src.byteSize());
assertEquals(dst.mismatch(src), src.byteSize());
}
}
}
}
}
}
static byte randomByte(Random rnd) {
return (byte) rnd.nextInt(Byte.MIN_VALUE, Byte.MAX_VALUE + 1);
}
@Test(dataProvider = "slices") @Test(dataProvider = "slices")
public void testDifferentValues(MemorySegment s1, MemorySegment s2) { public void testDifferentValues(MemorySegment s1, MemorySegment s2) {
out.format("testDifferentValues s1:%s, s2:%s\n", s1, s2); out.format("testDifferentValues s1:%s, s2:%s\n", s1, s2);

@ -41,21 +41,16 @@ import java.lang.foreign.MemorySegment;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import static java.lang.foreign.ValueLayout.*;
@BenchmarkMode(Mode.AverageTime) @BenchmarkMode(Mode.AverageTime)
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS) @Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) @Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@State(Scope.Thread) @State(Scope.Thread)
@OutputTimeUnit(TimeUnit.NANOSECONDS) @OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3) @Fork(value = 3)
public class CopyTest { public class SegmentBulkCopy {
@Param({"0", "1", "2", "3", "4", "5", "6", "7", "8", @Param({"2", "3", "4", "5", "6", "7", "8", "64", "512",
"9", "10", "11", "12", "13", "14", "15", "16", "4096", "32768", "262144", "2097152", "16777216", "134217728"})
"17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32",
"33", "36", "40", "44", "48", "52", "56", "60", "63", "64", "128"})
public int ELEM_SIZE; public int ELEM_SIZE;
byte[] srcArray; byte[] srcArray;
@ -80,28 +75,37 @@ public class CopyTest {
} }
@Benchmark @Benchmark
public void array_copy() { public void arrayCopy() {
System.arraycopy(srcArray, 0, dstArray, 0, ELEM_SIZE); System.arraycopy(srcArray, 0, dstArray, 0, ELEM_SIZE);
} }
@Benchmark @Benchmark
public void heap_segment_copy5Arg() { public void bufferCopy() {
MemorySegment.copy(heapSrcSegment, 0, heapDstSegment, 0, ELEM_SIZE);
}
@Benchmark
public void native_segment_copy5Arg() {
MemorySegment.copy(nativeSrcSegment, 0, nativeDstSegment, 0, ELEM_SIZE);
}
@Benchmark
public void heap_segment_copy7arg() {
MemorySegment.copy(heapSrcSegment, JAVA_BYTE, 0, heapDstSegment, JAVA_BYTE, 0, ELEM_SIZE);
}
@Benchmark
public void buffer_copy() {
dstBuffer.put(srcBuffer); dstBuffer.put(srcBuffer);
} }
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.copy=31"})
@Benchmark
public void heapSegmentCopyJava() {
MemorySegment.copy(heapSrcSegment, 0, heapDstSegment, 0, ELEM_SIZE);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.copy=0"})
@Benchmark
public void heapSegmentCopyUnsafe() {
MemorySegment.copy(heapSrcSegment, 0, heapDstSegment, 0, ELEM_SIZE);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.copy=31"})
@Benchmark
public void nativeSegmentCopyJava() {
MemorySegment.copy(nativeSrcSegment, 0, nativeDstSegment, 0, ELEM_SIZE);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.copy=0"})
@Benchmark
public void nativeSegmentCopyUnsafe() {
MemorySegment.copy(nativeSrcSegment, 0, nativeDstSegment, 0, ELEM_SIZE);
}
} }

@ -48,13 +48,10 @@ import java.util.concurrent.TimeUnit;
@State(Scope.Thread) @State(Scope.Thread)
@OutputTimeUnit(TimeUnit.NANOSECONDS) @OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3) @Fork(value = 3)
public class TestFill { public class SegmentBulkFill {
@Param({"0", "1", "2", "3", "4", "5", "6", "7", @Param({"2", "3", "4", "5", "6", "7", "8", "64", "512",
"8", "9", "10", "11", "12", "13", "14", "15", "4096", "32768", "262144", "2097152", "16777216", "134217728"})
"16", "17", "18", "19", "20", "21", "22", "23",
"24", "25", "26", "27", "28", "29", "30", "31",
"32", "128", "256", "384", "511", "512"})
public int ELEM_SIZE; public int ELEM_SIZE;
byte[] array; byte[] array;
@ -73,22 +70,43 @@ public class TestFill {
} }
@Benchmark @Benchmark
public void arrays_fill() { public void arraysFill() {
Arrays.fill(array, (byte) 0); Arrays.fill(array, (byte) 0);
} }
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=31"})
@Benchmark @Benchmark
public void heap_segment_fill() { public void heapSegmentFillJava() {
heapSegment.fill((byte) 0); heapSegment.fill((byte) 0);
} }
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=0"})
@Benchmark @Benchmark
public void native_segment_fill() { public void heapSegmentFillUnsafe() {
heapSegment.fill((byte) 0);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=31"})
@Benchmark
public void nativeSegmentFillJava() {
nativeSegment.fill((byte) 0); nativeSegment.fill((byte) 0);
} }
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=0"})
@Benchmark @Benchmark
public void unaligned_segment_fill() { public void nativeSegmentFillUnsafe() {
nativeSegment.fill((byte) 0);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=31"})
@Benchmark
public void unalignedSegmentFillJava() {
unalignedSegment.fill((byte) 0);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=0"})
@Benchmark
public void unalignedSegmentFillUnsafe() {
unalignedSegment.fill((byte) 0); unalignedSegment.fill((byte) 0);
} }

@ -0,0 +1,112 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
package org.openjdk.bench.java.lang.foreign;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import static java.lang.foreign.ValueLayout.*;
@BenchmarkMode(Mode.AverageTime)
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@State(Scope.Thread)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3)
public class SegmentBulkMismatch {
@Param({"2", "3", "4", "5", "6", "7", "8", "64", "512",
"4096", "32768", "262144", "2097152", "16777216", "134217728"})
public int ELEM_SIZE;
MemorySegment srcNative;
MemorySegment dstNative;
byte[] srcArray;
byte[] dstArray;
MemorySegment srcHeap;
MemorySegment dstHeap;
@Setup
public void setup() {
// Always use the same alignment regardless of size
srcNative = Arena.ofAuto().allocate(ELEM_SIZE,16);
dstNative = Arena.ofAuto().allocate(ELEM_SIZE, 16);
var rnd = new Random(42);
for (int i = 0; i < ELEM_SIZE; i++) {
srcNative.set(JAVA_BYTE, i, (byte) rnd.nextInt(Byte.MIN_VALUE, Byte.MAX_VALUE));
}
dstNative.copyFrom(srcNative);
srcArray = srcNative.toArray(JAVA_BYTE);
dstArray = dstNative.toArray(JAVA_BYTE);
srcHeap = MemorySegment.ofArray(srcArray);
dstHeap = MemorySegment.ofArray(dstArray);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.mismatch=31"})
@Benchmark
public long nativeSegmentJava() {
return srcNative.mismatch(dstNative);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.mismatch=31"})
@Benchmark
public long heapSegmentJava() {
return srcHeap.mismatch(dstHeap);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.mismatch=0"})
@Benchmark
public long nativeSegmentUnsafe() {
return srcNative.mismatch(dstNative);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.mismatch=0"})
@Benchmark
public long heapSegmentUnsafe() {
return srcHeap.mismatch(dstHeap);
}
@Benchmark
public long array() {
return Arrays.mismatch(srcArray, dstArray);
}
}