8338967: Improve performance for MemorySegment::fill

Reviewed-by: mcimadamore, psandoz
This commit is contained in:
Per Minborg 2024-09-03 10:25:27 +00:00
parent 633fad8e53
commit 7a418fc074
3 changed files with 277 additions and 3 deletions

View File

@ -51,6 +51,7 @@ import jdk.internal.access.foreign.UnmapperProxy;
import jdk.internal.misc.ScopedMemoryAccess;
import jdk.internal.reflect.CallerSensitive;
import jdk.internal.reflect.Reflection;
import jdk.internal.util.Architecture;
import jdk.internal.util.ArraysSupport;
import jdk.internal.util.Preconditions;
import jdk.internal.vm.annotation.ForceInline;
@ -188,10 +189,52 @@ public abstract sealed class AbstractMemorySegmentImpl
return StreamSupport.stream(spliterator(elementLayout), false);
}
// FILL_NATIVE_THRESHOLD must be a power of two and should be greater than 2^3
// Update the value for Aarch64 once 8338975 is fixed.
private static final long FILL_NATIVE_THRESHOLD = 1L << (Architecture.isAARCH64() ? 10 : 5);
@Override
public final MemorySegment fill(byte value){
checkAccess(0, length, false);
SCOPED_MEMORY_ACCESS.setMemory(sessionImpl(), unsafeGetBase(), unsafeGetOffset(), length, value);
@ForceInline
public final MemorySegment fill(byte value) {
checkReadOnly(false);
if (length == 0) {
// Implicit state check
checkValidState();
} else if (length < FILL_NATIVE_THRESHOLD) {
// 0 <= length < FILL_NATIVE_LIMIT : 0...0X...XXXX
// Handle smaller segments directly without transitioning to native code
final long u = Byte.toUnsignedLong(value);
final long longValue = u << 56 | u << 48 | u << 40 | u << 32 | u << 24 | u << 16 | u << 8 | u;
int offset = 0;
// 0...0X...X000
final int limit = (int) (length & (FILL_NATIVE_THRESHOLD - 8));
for (; offset < limit; offset += 8) {
SCOPED_MEMORY_ACCESS.putLong(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, longValue);
}
int remaining = (int) length - limit;
// 0...0X00
if (remaining >= 4) {
SCOPED_MEMORY_ACCESS.putInt(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, (int) longValue);
offset += 4;
remaining -= 4;
}
// 0...00X0
if (remaining >= 2) {
SCOPED_MEMORY_ACCESS.putShort(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, (short) longValue);
offset += 2;
remaining -= 2;
}
// 0...000X
if (remaining == 1) {
SCOPED_MEMORY_ACCESS.putByte(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, value);
}
// We have now fully handled 0...0X...XXXX
} else {
// Handle larger segments via native calls
SCOPED_MEMORY_ACCESS.setMemory(sessionImpl(), unsafeGetBase(), unsafeGetOffset(), length, value);
}
return this;
}

View File

@ -0,0 +1,136 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @summary Test MemorySegment::fill
* @run junit TestFill
*/
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;
import java.lang.foreign.Arena;
import java.lang.foreign.ValueLayout;
import java.util.Arrays;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import static org.junit.jupiter.api.Assertions.*;
final class TestFill {
// Make sure negative values are treated as expected
private static final byte VALUE = -71;
@ParameterizedTest
@MethodSource("sizes")
void testFill(int len) {
int offset = 16;
int expandedLen = offset + MAX_SIZE + offset;
// Make sure fill only affects the intended region XXXXXX
//
// ................XXXXXX................
// | offset | len | offset |
try (var arena = Arena.ofConfined()) {
var segment = arena.allocate(expandedLen);
var slice = segment.asSlice(offset, len);
slice.fill(VALUE);
var expected = new byte[expandedLen];
Arrays.fill(expected, offset, offset + len, VALUE);
// This checks the actual fill region as well as potential under and overflows
assertArrayEquals(expected, segment.toArray(ValueLayout.JAVA_BYTE));
}
}
@ParameterizedTest
@MethodSource("values")
void testValues(int value) {
int size = 0b1111;
try (var arena = Arena.ofConfined()) {
var segment = arena.allocate(size);
segment.fill((byte) value);
assertTrue(segment.elements(ValueLayout.JAVA_BYTE)
.map(s -> s.get(ValueLayout.JAVA_BYTE, 0))
.allMatch(v -> v == value), "Failed to fill with value " + value);
}
}
@ParameterizedTest
@MethodSource("sizes")
void testReadOnly(int len) {
try (var arena = Arena.ofConfined()) {
var segment = arena.allocate(10).asReadOnly();
assertThrows(IllegalArgumentException.class, () -> segment.fill(VALUE));
}
}
@ParameterizedTest
@MethodSource("sizes")
void testConfinement(int len) {
try (var arena = Arena.ofConfined()) {
var segment = arena.allocate(10);
AtomicReference<RuntimeException> ex = new AtomicReference<>();
CompletableFuture<Void> future = CompletableFuture.runAsync(() -> {
try {
segment.fill(VALUE);
} catch (RuntimeException e) {
ex.set(e);
}
});
future.join();
assertInstanceOf(WrongThreadException.class, ex.get());
}
}
@ParameterizedTest
@MethodSource("sizes")
void testScope(int len) {
var arena = Arena.ofConfined();
var segment = arena.allocate(len);
arena.close();
assertThrows(IllegalStateException.class, () -> segment.fill(VALUE));
}
private static final int MAX_SIZE = 1 << 10;
private static Stream<Arguments> sizes() {
return IntStream.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 23, 32, 63, 128, 256, 511, MAX_SIZE)
.boxed()
.map(Arguments::of);
}
private static Stream<Arguments> values() {
return IntStream.rangeClosed(Byte.MIN_VALUE, Byte.MAX_VALUE)
.boxed()
.map(Arguments::of);
}
}

View File

@ -0,0 +1,95 @@
/*
* Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
package org.openjdk.bench.java.lang.foreign;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.AverageTime)
@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
@State(Scope.Thread)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Fork(value = 3)
public class TestFill {
@Param({"0", "1", "2", "3", "4", "5", "6", "7",
"8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21", "22", "23",
"24", "25", "26", "27", "28", "29", "30", "31",
"32", "128", "256", "384", "511", "512"})
public int ELEM_SIZE;
byte[] array;
MemorySegment heapSegment;
MemorySegment nativeSegment;
MemorySegment unalignedSegment;
ByteBuffer buffer;
@Setup
public void setup() {
array = new byte[ELEM_SIZE];
heapSegment = MemorySegment.ofArray(array);
nativeSegment = Arena.ofAuto().allocate(ELEM_SIZE, 8);
unalignedSegment = Arena.ofAuto().allocate(ELEM_SIZE + 1, 8).asSlice(1);
buffer = ByteBuffer.wrap(array);
}
@Benchmark
public void arrays_fill() {
Arrays.fill(array, (byte) 0);
}
@Benchmark
public void heap_segment_fill() {
heapSegment.fill((byte) 0);
}
@Benchmark
public void native_segment_fill() {
nativeSegment.fill((byte) 0);
}
@Benchmark
public void unaligned_segment_fill() {
unalignedSegment.fill((byte) 0);
}
}