8339531: Improve performance of MemorySegment::mismatch

Reviewed-by: mcimadamore
This commit is contained in:
Per Minborg 2024-09-12 18:31:08 +00:00
parent ab9b72c50a
commit 81ff91ef27
7 changed files with 559 additions and 211 deletions

View File

@ -43,6 +43,7 @@ import java.util.function.Consumer;
import java.util.stream.Stream;
import jdk.internal.foreign.AbstractMemorySegmentImpl;
import jdk.internal.foreign.MemorySessionImpl;
import jdk.internal.foreign.SegmentBulkOperations;
import jdk.internal.foreign.SegmentFactories;
import jdk.internal.javac.Restricted;
import jdk.internal.reflect.CallerSensitive;
@ -1571,7 +1572,7 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
static void copy(MemorySegment srcSegment, long srcOffset,
MemorySegment dstSegment, long dstOffset, long bytes) {
AbstractMemorySegmentImpl.copy((AbstractMemorySegmentImpl) srcSegment, srcOffset,
SegmentBulkOperations.copy((AbstractMemorySegmentImpl) srcSegment, srcOffset,
(AbstractMemorySegmentImpl) dstSegment, dstOffset,
bytes);
}
@ -2635,8 +2636,9 @@ public sealed interface MemorySegment permits AbstractMemorySegmentImpl {
*/
static long mismatch(MemorySegment srcSegment, long srcFromOffset, long srcToOffset,
MemorySegment dstSegment, long dstFromOffset, long dstToOffset) {
return AbstractMemorySegmentImpl.mismatch(srcSegment, srcFromOffset, srcToOffset,
dstSegment, dstFromOffset, dstToOffset);
return SegmentBulkOperations.mismatch(
(AbstractMemorySegmentImpl)Objects.requireNonNull(srcSegment), srcFromOffset, srcToOffset,
(AbstractMemorySegmentImpl)Objects.requireNonNull(dstSegment), dstFromOffset, dstToOffset);
}
/**

View File

@ -72,8 +72,6 @@ public abstract sealed class AbstractMemorySegmentImpl
implements MemorySegment, SegmentAllocator, BiFunction<String, List<Number>, RuntimeException>
permits HeapMemorySegmentImpl, NativeMemorySegmentImpl {
private static final ScopedMemoryAccess SCOPED_MEMORY_ACCESS = ScopedMemoryAccess.getScopedMemoryAccess();
static final JavaNioAccess NIO_ACCESS = SharedSecrets.getJavaNioAccess();
final long length;
@ -189,53 +187,10 @@ public abstract sealed class AbstractMemorySegmentImpl
return StreamSupport.stream(spliterator(elementLayout), false);
}
// FILL_NATIVE_THRESHOLD must be a power of two and should be greater than 2^3
// Update the value for Aarch64 once 8338975 is fixed.
private static final long FILL_NATIVE_THRESHOLD = 1L << (Architecture.isAARCH64() ? 10 : 5);
@Override
@ForceInline
@Override
public final MemorySegment fill(byte value) {
checkReadOnly(false);
if (length == 0) {
// Implicit state check
checkValidState();
} else if (length < FILL_NATIVE_THRESHOLD) {
// 0 <= length < FILL_NATIVE_LIMIT : 0...0X...XXXX
// Handle smaller segments directly without transitioning to native code
final long u = Byte.toUnsignedLong(value);
final long longValue = u << 56 | u << 48 | u << 40 | u << 32 | u << 24 | u << 16 | u << 8 | u;
int offset = 0;
// 0...0X...X000
final int limit = (int) (length & (FILL_NATIVE_THRESHOLD - 8));
for (; offset < limit; offset += 8) {
SCOPED_MEMORY_ACCESS.putLong(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, longValue);
}
int remaining = (int) length - limit;
// 0...0X00
if (remaining >= 4) {
SCOPED_MEMORY_ACCESS.putInt(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, (int) longValue);
offset += 4;
remaining -= 4;
}
// 0...00X0
if (remaining >= 2) {
SCOPED_MEMORY_ACCESS.putShort(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, (short) longValue);
offset += 2;
remaining -= 2;
}
// 0...000X
if (remaining == 1) {
SCOPED_MEMORY_ACCESS.putByte(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, value);
}
// We have now fully handled 0...0X...XXXX
} else {
// Handle larger segments via native calls
SCOPED_MEMORY_ACCESS.setMemory(sessionImpl(), unsafeGetBase(), unsafeGetOffset(), length, value);
}
return this;
return SegmentBulkOperations.fill(this, value);
}
@Override
@ -244,38 +199,6 @@ public abstract sealed class AbstractMemorySegmentImpl
return asSlice(0, byteSize, byteAlignment);
}
/**
* Mismatch over long lengths.
*/
public static long vectorizedMismatchLargeForBytes(MemorySessionImpl aSession, MemorySessionImpl bSession,
Object a, long aOffset,
Object b, long bOffset,
long length) {
long off = 0;
long remaining = length;
int i, size;
boolean lastSubRange = false;
while (remaining > 7 && !lastSubRange) {
if (remaining > Integer.MAX_VALUE) {
size = Integer.MAX_VALUE;
} else {
size = (int) remaining;
lastSubRange = true;
}
i = SCOPED_MEMORY_ACCESS.vectorizedMismatch(aSession, bSession,
a, aOffset + off,
b, bOffset + off,
size, ArraysSupport.LOG2_ARRAY_BYTE_INDEX_SCALE);
if (i >= 0)
return off + i;
i = size - ~i;
off += i;
remaining -= i;
}
return ~remaining;
}
@Override
public final ByteBuffer asByteBuffer() {
checkArraySize("ByteBuffer", 1);
@ -314,7 +237,7 @@ public abstract sealed class AbstractMemorySegmentImpl
}
@ForceInline
private boolean overlaps(AbstractMemorySegmentImpl that) {
boolean overlaps(AbstractMemorySegmentImpl that) {
if (unsafeGetBase() == that.unsafeGetBase()) { // both either native or the same heap segment
final long thisStart = this.unsafeGetOffset();
final long thatStart = that.unsafeGetOffset();
@ -334,7 +257,8 @@ public abstract sealed class AbstractMemorySegmentImpl
@Override
public long mismatch(MemorySegment other) {
Objects.requireNonNull(other);
return MemorySegment.mismatch(this, 0, byteSize(), other, 0, other.byteSize());
return SegmentBulkOperations.mismatch(this, 0, byteSize(),
(AbstractMemorySegmentImpl) other, 0, other.byteSize());
}
@Override
@ -650,64 +574,6 @@ public abstract sealed class AbstractMemorySegmentImpl
}
}
// COPY_NATIVE_THRESHOLD must be a power of two and should be greater than 2^3
private static final long COPY_NATIVE_THRESHOLD = 1 << 6;
@ForceInline
public static void copy(AbstractMemorySegmentImpl src, long srcOffset,
AbstractMemorySegmentImpl dst, long dstOffset,
long size) {
Utils.checkNonNegativeIndex(size, "size");
// Implicit null check for src and dst
src.checkAccess(srcOffset, size, true);
dst.checkAccess(dstOffset, size, false);
if (size <= 0) {
// Do nothing
} else if (size < COPY_NATIVE_THRESHOLD && !src.overlaps(dst)) {
// 0 < size < FILL_NATIVE_LIMIT : 0...0X...XXXX
//
// Strictly, we could check for !src.asSlice(srcOffset, size).overlaps(dst.asSlice(dstOffset, size) but
// this is a bit slower and it likely very unusual there is any difference in the outcome. Also, if there
// is an overlap, we could tolerate one particular direction of overlap (but not the other).
// 0...0X...X000
final int limit = (int) (size & (COPY_NATIVE_THRESHOLD - 8));
int offset = 0;
for (; offset < limit; offset += 8) {
final long v = SCOPED_MEMORY_ACCESS.getLong(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
SCOPED_MEMORY_ACCESS.putLong(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
}
int remaining = (int) size - offset;
// 0...0X00
if (remaining >= 4) {
final int v = SCOPED_MEMORY_ACCESS.getInt(src.sessionImpl(), src.unsafeGetBase(),src.unsafeGetOffset() + srcOffset + offset);
SCOPED_MEMORY_ACCESS.putInt(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
offset += 4;
remaining -= 4;
}
// 0...00X0
if (remaining >= 2) {
final short v = SCOPED_MEMORY_ACCESS.getShort(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
SCOPED_MEMORY_ACCESS.putShort(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
offset += 2;
remaining -=2;
}
// 0...000X
if (remaining == 1) {
final byte v = SCOPED_MEMORY_ACCESS.getByte(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
SCOPED_MEMORY_ACCESS.putByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
}
// We have now fully handled 0...0X...XXXX
} else {
// For larger sizes, the transition to native code pays off
SCOPED_MEMORY_ACCESS.copyMemory(src.sessionImpl(), dst.sessionImpl(),
src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset,
dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset, size);
}
}
@ForceInline
public static void copy(MemorySegment srcSegment, ValueLayout srcElementLayout, long srcOffset,
MemorySegment dstSegment, ValueLayout dstElementLayout, long dstOffset,
@ -794,40 +660,6 @@ public abstract sealed class AbstractMemorySegmentImpl
}
}
public static long mismatch(MemorySegment srcSegment, long srcFromOffset, long srcToOffset,
MemorySegment dstSegment, long dstFromOffset, long dstToOffset) {
AbstractMemorySegmentImpl srcImpl = (AbstractMemorySegmentImpl)Objects.requireNonNull(srcSegment);
AbstractMemorySegmentImpl dstImpl = (AbstractMemorySegmentImpl)Objects.requireNonNull(dstSegment);
long srcBytes = srcToOffset - srcFromOffset;
long dstBytes = dstToOffset - dstFromOffset;
srcImpl.checkAccess(srcFromOffset, srcBytes, true);
dstImpl.checkAccess(dstFromOffset, dstBytes, true);
long bytes = Math.min(srcBytes, dstBytes);
long i = 0;
if (bytes > 7) {
if (srcImpl.get(JAVA_BYTE, srcFromOffset) != dstImpl.get(JAVA_BYTE, dstFromOffset)) {
return 0;
}
i = AbstractMemorySegmentImpl.vectorizedMismatchLargeForBytes(srcImpl.sessionImpl(), dstImpl.sessionImpl(),
srcImpl.unsafeGetBase(), srcImpl.unsafeGetOffset() + srcFromOffset,
dstImpl.unsafeGetBase(), dstImpl.unsafeGetOffset() + dstFromOffset,
bytes);
if (i >= 0) {
return i;
}
long remaining = ~i;
assert remaining < 8 : "remaining greater than 7: " + remaining;
i = bytes - remaining;
}
for (; i < bytes; i++) {
if (srcImpl.get(JAVA_BYTE, srcFromOffset + i) != dstImpl.get(JAVA_BYTE, dstFromOffset + i)) {
return i;
}
}
return srcBytes != dstBytes ? bytes : -1;
}
private static int getScaleFactor(Buffer buffer) {
return switch (buffer) {
case ByteBuffer _ -> 0;

View File

@ -0,0 +1,316 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package jdk.internal.foreign;
import jdk.internal.misc.ScopedMemoryAccess;
import jdk.internal.util.Architecture;
import jdk.internal.util.ArraysSupport;
import jdk.internal.vm.annotation.ForceInline;
import java.lang.foreign.MemorySegment;
/**
* This class contains optimized bulk operation methods that operate on one or several
* memory segments.
* <p>
* Generally, the methods attempt to work with as-large-as-possible units of memory at
* a time.
* <p>
* It should be noted that when invoking scoped memory access get/set operations, it
* is imperative from a performance perspective to convey the sharp types from the
* call site in order for the compiler to pick the correct Unsafe access variant.
*/
public final class SegmentBulkOperations {
private SegmentBulkOperations() {}
private static final ScopedMemoryAccess SCOPED_MEMORY_ACCESS = ScopedMemoryAccess.getScopedMemoryAccess();
// All the threshold values below MUST be a power of two and should preferably be
// greater or equal to 2^3.
// Update the FILL value for Aarch64 once 8338975 is fixed.
private static final int NATIVE_THRESHOLD_FILL = powerOfPropertyOr("fill", Architecture.isAARCH64() ? 10 : 5);
private static final int NATIVE_THRESHOLD_MISMATCH = powerOfPropertyOr("mismatch", 6);
private static final int NATIVE_THRESHOLD_COPY = powerOfPropertyOr("copy", 6);
@ForceInline
public static MemorySegment fill(AbstractMemorySegmentImpl dst, byte value) {
dst.checkReadOnly(false);
if (dst.length == 0) {
// Implicit state check
dst.checkValidState();
} else if (dst.length < NATIVE_THRESHOLD_FILL) {
// 0 <= length < FILL_NATIVE_LIMIT : 0...0X...XXXX
// Handle smaller segments directly without transitioning to native code
final long u = Byte.toUnsignedLong(value);
final long longValue = u << 56 | u << 48 | u << 40 | u << 32 | u << 24 | u << 16 | u << 8 | u;
int offset = 0;
// 0...0X...X000
final int limit = (int) (dst.length & (NATIVE_THRESHOLD_FILL - 8));
for (; offset < limit; offset += 8) {
SCOPED_MEMORY_ACCESS.putLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, longValue, !Architecture.isLittleEndian());
}
int remaining = (int) dst.length - limit;
// 0...0X00
if (remaining >= 4) {
SCOPED_MEMORY_ACCESS.putIntUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, (int) longValue, !Architecture.isLittleEndian());
offset += 4;
remaining -= 4;
}
// 0...00X0
if (remaining >= 2) {
SCOPED_MEMORY_ACCESS.putShortUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, (short) longValue, !Architecture.isLittleEndian());
offset += 2;
remaining -= 2;
}
// 0...000X
if (remaining == 1) {
SCOPED_MEMORY_ACCESS.putByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + offset, value);
}
// We have now fully handled 0...0X...XXXX
} else {
// Handle larger segments via native calls
SCOPED_MEMORY_ACCESS.setMemory(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset(), dst.length, value);
}
return dst;
}
@ForceInline
public static void copy(AbstractMemorySegmentImpl src, long srcOffset,
AbstractMemorySegmentImpl dst, long dstOffset,
long size) {
Utils.checkNonNegativeIndex(size, "size");
// Implicit null check for src and dst
src.checkAccess(srcOffset, size, true);
dst.checkAccess(dstOffset, size, false);
if (size <= 0) {
// Do nothing
} else if (size < NATIVE_THRESHOLD_COPY && !src.overlaps(dst)) {
// 0 < size < FILL_NATIVE_LIMIT : 0...0X...XXXX
//
// Strictly, we could check for !src.asSlice(srcOffset, size).overlaps(dst.asSlice(dstOffset, size) but
// this is a bit slower and it likely very unusual there is any difference in the outcome. Also, if there
// is an overlap, we could tolerate one particular direction of overlap (but not the other).
// 0...0X...X000
final int limit = (int) (size & (NATIVE_THRESHOLD_COPY - 8));
int offset = 0;
for (; offset < limit; offset += 8) {
final long v = SCOPED_MEMORY_ACCESS.getLongUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset, !Architecture.isLittleEndian());
SCOPED_MEMORY_ACCESS.putLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v, !Architecture.isLittleEndian());
}
int remaining = (int) size - offset;
// 0...0X00
if (remaining >= 4) {
final int v = SCOPED_MEMORY_ACCESS.getIntUnaligned(src.sessionImpl(), src.unsafeGetBase(),src.unsafeGetOffset() + srcOffset + offset, !Architecture.isLittleEndian());
SCOPED_MEMORY_ACCESS.putIntUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v, !Architecture.isLittleEndian());
offset += 4;
remaining -= 4;
}
// 0...00X0
if (remaining >= 2) {
final short v = SCOPED_MEMORY_ACCESS.getShortUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset, !Architecture.isLittleEndian());
SCOPED_MEMORY_ACCESS.putShortUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v, !Architecture.isLittleEndian());
offset += 2;
remaining -=2;
}
// 0...000X
if (remaining == 1) {
final byte v = SCOPED_MEMORY_ACCESS.getByte(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
SCOPED_MEMORY_ACCESS.putByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
}
// We have now fully handled 0...0X...XXXX
} else {
// For larger sizes, the transition to native code pays off
SCOPED_MEMORY_ACCESS.copyMemory(src.sessionImpl(), dst.sessionImpl(),
src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset,
dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset, size);
}
}
@ForceInline
public static long mismatch(AbstractMemorySegmentImpl src, long srcFromOffset, long srcToOffset,
AbstractMemorySegmentImpl dst, long dstFromOffset, long dstToOffset) {
final long srcBytes = srcToOffset - srcFromOffset;
final long dstBytes = dstToOffset - dstFromOffset;
src.checkAccess(srcFromOffset, srcBytes, true);
dst.checkAccess(dstFromOffset, dstBytes, true);
final long length = Math.min(srcBytes, dstBytes);
final boolean srcAndDstBytesDiffer = srcBytes != dstBytes;
if (length == 0) {
return srcAndDstBytesDiffer ? 0 : -1;
} else if (length < NATIVE_THRESHOLD_MISMATCH) {
return mismatch(src, srcFromOffset, dst, dstFromOffset, 0, (int) length, srcAndDstBytesDiffer);
} else {
long i;
if (SCOPED_MEMORY_ACCESS.getByte(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset) !=
SCOPED_MEMORY_ACCESS.getByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset)) {
return 0;
}
i = vectorizedMismatchLargeForBytes(src.sessionImpl(), dst.sessionImpl(),
src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset,
dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset,
length);
if (i >= 0) {
return i;
}
final long remaining = ~i;
assert remaining < 8 : "remaining greater than 7: " + remaining;
i = length - remaining;
return mismatch(src, srcFromOffset + i, dst, dstFromOffset + i, i, (int) remaining, srcAndDstBytesDiffer);
}
}
// Mismatch is handled in chunks of 64 (unroll of eight 8s), 8, 4, 2, and 1 byte(s).
@ForceInline
private static long mismatch(AbstractMemorySegmentImpl src, long srcFromOffset,
AbstractMemorySegmentImpl dst, long dstFromOffset,
long start, int length, boolean srcAndDstBytesDiffer) {
int offset = 0;
final int limit = length & (NATIVE_THRESHOLD_MISMATCH - 8);
for (; offset < limit; offset += 8) {
final long s = SCOPED_MEMORY_ACCESS.getLongUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, !Architecture.isLittleEndian());
final long d = SCOPED_MEMORY_ACCESS.getLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, !Architecture.isLittleEndian());
if (s != d) {
return start + offset + mismatch(s, d);
}
}
int remaining = length - offset;
// 0...XXX000
for (; remaining >= 8; remaining -= 8) {
final long s = SCOPED_MEMORY_ACCESS.getLongUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, !Architecture.isLittleEndian());
final long d = SCOPED_MEMORY_ACCESS.getLongUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, !Architecture.isLittleEndian());
if (s != d) {
return start + offset + mismatch(s, d);
}
offset += 8;
}
// 0...0X00
if (remaining >= 4) {
final int s = SCOPED_MEMORY_ACCESS.getIntUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, !Architecture.isLittleEndian());
final int d = SCOPED_MEMORY_ACCESS.getIntUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, !Architecture.isLittleEndian());
if (s != d) {
return start + offset + mismatch(s, d);
}
offset += 4;
remaining -= 4;
}
// 0...00X0
if (remaining >= 2) {
final short s = SCOPED_MEMORY_ACCESS.getShortUnaligned(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset, !Architecture.isLittleEndian());
final short d = SCOPED_MEMORY_ACCESS.getShortUnaligned(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset, !Architecture.isLittleEndian());
if (s != d) {
return start + offset + mismatch(s, d);
}
offset += 2;
remaining -= 2;
}
// 0...000X
if (remaining == 1) {
final byte s = SCOPED_MEMORY_ACCESS.getByte(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcFromOffset + offset);
final byte d = SCOPED_MEMORY_ACCESS.getByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstFromOffset + offset);
if (s != d) {
return start + offset;
}
}
return srcAndDstBytesDiffer ? (start + length) : -1;
// We have now fully handled 0...0X...XXXX
}
@ForceInline
private static int mismatch(long first, long second) {
final long x = first ^ second;
return (Architecture.isLittleEndian()
? Long.numberOfTrailingZeros(x)
: Long.numberOfLeadingZeros(x)) / 8;
}
@ForceInline
private static int mismatch(int first, int second) {
final int x = first ^ second;
return (Architecture.isLittleEndian()
? Integer.numberOfTrailingZeros(x)
: Integer.numberOfLeadingZeros(x)) / 8;
}
@ForceInline
private static int mismatch(short first, short second) {
if (Architecture.isLittleEndian()) {
return ((0xff & first) == (0xff & second)) ? 1 : 0;
} else {
return ((0xff & first) == (0xff & second)) ? 0 : 1;
}
}
/**
* Mismatch over long lengths.
*/
private static long vectorizedMismatchLargeForBytes(MemorySessionImpl aSession, MemorySessionImpl bSession,
Object a, long aOffset,
Object b, long bOffset,
long length) {
long off = 0;
long remaining = length;
int i, size;
boolean lastSubRange = false;
while (remaining > 7 && !lastSubRange) {
if (remaining > Integer.MAX_VALUE) {
size = Integer.MAX_VALUE;
} else {
size = (int) remaining;
lastSubRange = true;
}
i = SCOPED_MEMORY_ACCESS.vectorizedMismatch(aSession, bSession,
a, aOffset + off,
b, bOffset + off,
size, ArraysSupport.LOG2_ARRAY_BYTE_INDEX_SCALE);
if (i >= 0)
return off + i;
i = size - ~i;
off += i;
remaining -= i;
}
return ~remaining;
}
static final String PROPERTY_PATH = "java.lang.foreign.native.threshold.power.";
// The returned value is in the interval [0, 2^30]
static int powerOfPropertyOr(String name, int defaultPower) {
final int power = Integer.getInteger(PROPERTY_PATH + name, defaultPower);
return 1 << Math.clamp(power, 0, Integer.SIZE - 2);
}
}

View File

@ -29,7 +29,9 @@
import java.lang.foreign.Arena;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import java.util.concurrent.atomic.AtomicReference;
import java.lang.foreign.MemorySegment;
@ -122,6 +124,68 @@ public class TestMismatch {
}
}
@Test
public void random() {
try (var arena = Arena.ofConfined()) {
var rnd = new Random(42);
for (int size = 1; size < 64; size++) {
// Repeat a fair number of rounds
for (int i = 0; i < 147; i++) {
var src = arena.allocate(size);
// The dst segment might be zero to eight bytes longer
var dst = arena.allocate(size + rnd.nextInt(8 + 1));
// Fill the src with random data
for (int j = 0; j < size; j++) {
src.set(ValueLayout.JAVA_BYTE, j, randomByte(rnd));
}
// copy the random data from src to dst
dst.copyFrom(src);
// Fill the rest (if any) of the dst with random data
for (long j = src.byteSize(); j < dst.byteSize(); j++) {
dst.set(ValueLayout.JAVA_BYTE, j, randomByte(rnd));
}
if (rnd.nextBoolean()) {
// In this branch, we inject one or more deviating bytes
int beginDiff = rnd.nextInt(size);
int endDiff = rnd.nextInt(beginDiff, size);
for (int d = beginDiff; d <= endDiff; d++) {
byte existing = dst.get(ValueLayout.JAVA_BYTE, d);
// Make sure we never get back the same value
byte mutatedValue;
do {
mutatedValue = randomByte(rnd);
} while (existing == mutatedValue);
dst.set(ValueLayout.JAVA_BYTE, d, mutatedValue);
}
// They are not equal and differs in position beginDiff
assertEquals(src.mismatch(dst), beginDiff);
assertEquals(dst.mismatch(src), beginDiff);
} else {
// In this branch, there is no injection
if (src.byteSize() == dst.byteSize()) {
// The content matches and they are of equal size
assertEquals(src.mismatch(dst), -1);
assertEquals(dst.mismatch(src), -1);
} else {
// The content matches but they are of different length
// Remember, the size of src is always smaller or equal
// to the size of dst.
assertEquals(src.mismatch(dst), src.byteSize());
assertEquals(dst.mismatch(src), src.byteSize());
}
}
}
}
}
}
static byte randomByte(Random rnd) {
return (byte) rnd.nextInt(Byte.MIN_VALUE, Byte.MAX_VALUE + 1);
}
@Test(dataProvider = "slices")
public void testDifferentValues(MemorySegment s1, MemorySegment s2) {
out.format("testDifferentValues s1:%s, s2:%s\n", s1, s2);

View File

@ -41,21 +41,16 @@ import java.lang.foreign.MemorySegment;
import java.nio.ByteBuffer;
import java.util.concurrent.TimeUnit;
import static java.lang.foreign.ValueLayout.*;
@BenchmarkMode(Mode.AverageTime)
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@State(Scope.Thread)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3)
public class CopyTest {
public class SegmentBulkCopy {
@Param({"0", "1", "2", "3", "4", "5", "6", "7", "8",
"9", "10", "11", "12", "13", "14", "15", "16",
"17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30", "31", "32",
"33", "36", "40", "44", "48", "52", "56", "60", "63", "64", "128"})
@Param({"2", "3", "4", "5", "6", "7", "8", "64", "512",
"4096", "32768", "262144", "2097152", "16777216", "134217728"})
public int ELEM_SIZE;
byte[] srcArray;
@ -80,28 +75,37 @@ public class CopyTest {
}
@Benchmark
public void array_copy() {
public void arrayCopy() {
System.arraycopy(srcArray, 0, dstArray, 0, ELEM_SIZE);
}
@Benchmark
public void heap_segment_copy5Arg() {
MemorySegment.copy(heapSrcSegment, 0, heapDstSegment, 0, ELEM_SIZE);
}
@Benchmark
public void native_segment_copy5Arg() {
MemorySegment.copy(nativeSrcSegment, 0, nativeDstSegment, 0, ELEM_SIZE);
}
@Benchmark
public void heap_segment_copy7arg() {
MemorySegment.copy(heapSrcSegment, JAVA_BYTE, 0, heapDstSegment, JAVA_BYTE, 0, ELEM_SIZE);
}
@Benchmark
public void buffer_copy() {
public void bufferCopy() {
dstBuffer.put(srcBuffer);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.copy=31"})
@Benchmark
public void heapSegmentCopyJava() {
MemorySegment.copy(heapSrcSegment, 0, heapDstSegment, 0, ELEM_SIZE);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.copy=0"})
@Benchmark
public void heapSegmentCopyUnsafe() {
MemorySegment.copy(heapSrcSegment, 0, heapDstSegment, 0, ELEM_SIZE);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.copy=31"})
@Benchmark
public void nativeSegmentCopyJava() {
MemorySegment.copy(nativeSrcSegment, 0, nativeDstSegment, 0, ELEM_SIZE);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.copy=0"})
@Benchmark
public void nativeSegmentCopyUnsafe() {
MemorySegment.copy(nativeSrcSegment, 0, nativeDstSegment, 0, ELEM_SIZE);
}
}

View File

@ -48,13 +48,10 @@ import java.util.concurrent.TimeUnit;
@State(Scope.Thread)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3)
public class TestFill {
public class SegmentBulkFill {
@Param({"0", "1", "2", "3", "4", "5", "6", "7",
"8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21", "22", "23",
"24", "25", "26", "27", "28", "29", "30", "31",
"32", "128", "256", "384", "511", "512"})
@Param({"2", "3", "4", "5", "6", "7", "8", "64", "512",
"4096", "32768", "262144", "2097152", "16777216", "134217728"})
public int ELEM_SIZE;
byte[] array;
@ -73,22 +70,43 @@ public class TestFill {
}
@Benchmark
public void arrays_fill() {
public void arraysFill() {
Arrays.fill(array, (byte) 0);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=31"})
@Benchmark
public void heap_segment_fill() {
public void heapSegmentFillJava() {
heapSegment.fill((byte) 0);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=0"})
@Benchmark
public void native_segment_fill() {
public void heapSegmentFillUnsafe() {
heapSegment.fill((byte) 0);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=31"})
@Benchmark
public void nativeSegmentFillJava() {
nativeSegment.fill((byte) 0);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=0"})
@Benchmark
public void unaligned_segment_fill() {
public void nativeSegmentFillUnsafe() {
nativeSegment.fill((byte) 0);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=31"})
@Benchmark
public void unalignedSegmentFillJava() {
unalignedSegment.fill((byte) 0);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.fill=0"})
@Benchmark
public void unalignedSegmentFillUnsafe() {
unalignedSegment.fill((byte) 0);
}

View File

@ -0,0 +1,112 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
package org.openjdk.bench.java.lang.foreign;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import static java.lang.foreign.ValueLayout.*;
@BenchmarkMode(Mode.AverageTime)
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@State(Scope.Thread)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3)
public class SegmentBulkMismatch {
@Param({"2", "3", "4", "5", "6", "7", "8", "64", "512",
"4096", "32768", "262144", "2097152", "16777216", "134217728"})
public int ELEM_SIZE;
MemorySegment srcNative;
MemorySegment dstNative;
byte[] srcArray;
byte[] dstArray;
MemorySegment srcHeap;
MemorySegment dstHeap;
@Setup
public void setup() {
// Always use the same alignment regardless of size
srcNative = Arena.ofAuto().allocate(ELEM_SIZE,16);
dstNative = Arena.ofAuto().allocate(ELEM_SIZE, 16);
var rnd = new Random(42);
for (int i = 0; i < ELEM_SIZE; i++) {
srcNative.set(JAVA_BYTE, i, (byte) rnd.nextInt(Byte.MIN_VALUE, Byte.MAX_VALUE));
}
dstNative.copyFrom(srcNative);
srcArray = srcNative.toArray(JAVA_BYTE);
dstArray = dstNative.toArray(JAVA_BYTE);
srcHeap = MemorySegment.ofArray(srcArray);
dstHeap = MemorySegment.ofArray(dstArray);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.mismatch=31"})
@Benchmark
public long nativeSegmentJava() {
return srcNative.mismatch(dstNative);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.mismatch=31"})
@Benchmark
public long heapSegmentJava() {
return srcHeap.mismatch(dstHeap);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.mismatch=0"})
@Benchmark
public long nativeSegmentUnsafe() {
return srcNative.mismatch(dstNative);
}
@Fork(value = 3, jvmArgsAppend = {"-Djava.lang.foreign.native.threshold.power.mismatch=0"})
@Benchmark
public long heapSegmentUnsafe() {
return srcHeap.mismatch(dstHeap);
}
@Benchmark
public long array() {
return Arrays.mismatch(srcArray, dstArray);
}
}