From 7a418fc07464fe359a0b45b6d797c65c573770cb Mon Sep 17 00:00:00 2001 From: Per Minborg Date: Tue, 3 Sep 2024 10:25:27 +0000 Subject: [PATCH] 8338967: Improve performance for MemorySegment::fill Reviewed-by: mcimadamore, psandoz --- .../foreign/AbstractMemorySegmentImpl.java | 49 ++++++- test/jdk/java/foreign/TestFill.java | 136 ++++++++++++++++++ .../bench/java/lang/foreign/TestFill.java | 95 ++++++++++++ 3 files changed, 277 insertions(+), 3 deletions(-) create mode 100644 test/jdk/java/foreign/TestFill.java create mode 100644 test/micro/org/openjdk/bench/java/lang/foreign/TestFill.java diff --git a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java index 75be22ac454..5d43c28a667 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java +++ b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java @@ -51,6 +51,7 @@ import jdk.internal.access.foreign.UnmapperProxy; import jdk.internal.misc.ScopedMemoryAccess; import jdk.internal.reflect.CallerSensitive; import jdk.internal.reflect.Reflection; +import jdk.internal.util.Architecture; import jdk.internal.util.ArraysSupport; import jdk.internal.util.Preconditions; import jdk.internal.vm.annotation.ForceInline; @@ -188,10 +189,52 @@ public abstract sealed class AbstractMemorySegmentImpl return StreamSupport.stream(spliterator(elementLayout), false); } + // FILL_NATIVE_THRESHOLD must be a power of two and should be greater than 2^3 + // Update the value for Aarch64 once 8338975 is fixed. + private static final long FILL_NATIVE_THRESHOLD = 1L << (Architecture.isAARCH64() ? 10 : 5); + @Override - public final MemorySegment fill(byte value){ - checkAccess(0, length, false); - SCOPED_MEMORY_ACCESS.setMemory(sessionImpl(), unsafeGetBase(), unsafeGetOffset(), length, value); + @ForceInline + public final MemorySegment fill(byte value) { + checkReadOnly(false); + if (length == 0) { + // Implicit state check + checkValidState(); + } else if (length < FILL_NATIVE_THRESHOLD) { + // 0 <= length < FILL_NATIVE_LIMIT : 0...0X...XXXX + + // Handle smaller segments directly without transitioning to native code + final long u = Byte.toUnsignedLong(value); + final long longValue = u << 56 | u << 48 | u << 40 | u << 32 | u << 24 | u << 16 | u << 8 | u; + + int offset = 0; + // 0...0X...X000 + final int limit = (int) (length & (FILL_NATIVE_THRESHOLD - 8)); + for (; offset < limit; offset += 8) { + SCOPED_MEMORY_ACCESS.putLong(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, longValue); + } + int remaining = (int) length - limit; + // 0...0X00 + if (remaining >= 4) { + SCOPED_MEMORY_ACCESS.putInt(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, (int) longValue); + offset += 4; + remaining -= 4; + } + // 0...00X0 + if (remaining >= 2) { + SCOPED_MEMORY_ACCESS.putShort(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, (short) longValue); + offset += 2; + remaining -= 2; + } + // 0...000X + if (remaining == 1) { + SCOPED_MEMORY_ACCESS.putByte(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, value); + } + // We have now fully handled 0...0X...XXXX + } else { + // Handle larger segments via native calls + SCOPED_MEMORY_ACCESS.setMemory(sessionImpl(), unsafeGetBase(), unsafeGetOffset(), length, value); + } return this; } diff --git a/test/jdk/java/foreign/TestFill.java b/test/jdk/java/foreign/TestFill.java new file mode 100644 index 00000000000..e5f69587f2f --- /dev/null +++ b/test/jdk/java/foreign/TestFill.java @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @summary Test MemorySegment::fill + * @run junit TestFill + */ + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.lang.foreign.Arena; +import java.lang.foreign.ValueLayout; +import java.util.Arrays; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +import static org.junit.jupiter.api.Assertions.*; + +final class TestFill { + + // Make sure negative values are treated as expected + private static final byte VALUE = -71; + + @ParameterizedTest + @MethodSource("sizes") + void testFill(int len) { + int offset = 16; + int expandedLen = offset + MAX_SIZE + offset; + + // Make sure fill only affects the intended region XXXXXX + // + // ................XXXXXX................ + // | offset | len | offset | + + try (var arena = Arena.ofConfined()) { + var segment = arena.allocate(expandedLen); + var slice = segment.asSlice(offset, len); + slice.fill(VALUE); + + var expected = new byte[expandedLen]; + Arrays.fill(expected, offset, offset + len, VALUE); + + // This checks the actual fill region as well as potential under and overflows + assertArrayEquals(expected, segment.toArray(ValueLayout.JAVA_BYTE)); + } + } + + @ParameterizedTest + @MethodSource("values") + void testValues(int value) { + int size = 0b1111; + try (var arena = Arena.ofConfined()) { + var segment = arena.allocate(size); + segment.fill((byte) value); + assertTrue(segment.elements(ValueLayout.JAVA_BYTE) + .map(s -> s.get(ValueLayout.JAVA_BYTE, 0)) + .allMatch(v -> v == value), "Failed to fill with value " + value); + } + } + + @ParameterizedTest + @MethodSource("sizes") + void testReadOnly(int len) { + try (var arena = Arena.ofConfined()) { + var segment = arena.allocate(10).asReadOnly(); + assertThrows(IllegalArgumentException.class, () -> segment.fill(VALUE)); + } + } + + @ParameterizedTest + @MethodSource("sizes") + void testConfinement(int len) { + try (var arena = Arena.ofConfined()) { + var segment = arena.allocate(10); + AtomicReference ex = new AtomicReference<>(); + CompletableFuture future = CompletableFuture.runAsync(() -> { + try { + segment.fill(VALUE); + } catch (RuntimeException e) { + ex.set(e); + } + }); + future.join(); + assertInstanceOf(WrongThreadException.class, ex.get()); + } + } + + @ParameterizedTest + @MethodSource("sizes") + void testScope(int len) { + var arena = Arena.ofConfined(); + var segment = arena.allocate(len); + arena.close(); + assertThrows(IllegalStateException.class, () -> segment.fill(VALUE)); + } + + private static final int MAX_SIZE = 1 << 10; + + private static Stream sizes() { + return IntStream.of(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 23, 32, 63, 128, 256, 511, MAX_SIZE) + .boxed() + .map(Arguments::of); + } + + private static Stream values() { + return IntStream.rangeClosed(Byte.MIN_VALUE, Byte.MAX_VALUE) + .boxed() + .map(Arguments::of); + } + +} diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/TestFill.java b/test/micro/org/openjdk/bench/java/lang/foreign/TestFill.java new file mode 100644 index 00000000000..78719f03bc3 --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/foreign/TestFill.java @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +package org.openjdk.bench.java.lang.foreign; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.AverageTime) +@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@State(Scope.Thread) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Fork(value = 3) +public class TestFill { + + @Param({"0", "1", "2", "3", "4", "5", "6", "7", + "8", "9", "10", "11", "12", "13", "14", "15", + "16", "17", "18", "19", "20", "21", "22", "23", + "24", "25", "26", "27", "28", "29", "30", "31", + "32", "128", "256", "384", "511", "512"}) + public int ELEM_SIZE; + + byte[] array; + MemorySegment heapSegment; + MemorySegment nativeSegment; + MemorySegment unalignedSegment; + ByteBuffer buffer; + + @Setup + public void setup() { + array = new byte[ELEM_SIZE]; + heapSegment = MemorySegment.ofArray(array); + nativeSegment = Arena.ofAuto().allocate(ELEM_SIZE, 8); + unalignedSegment = Arena.ofAuto().allocate(ELEM_SIZE + 1, 8).asSlice(1); + buffer = ByteBuffer.wrap(array); + } + + @Benchmark + public void arrays_fill() { + Arrays.fill(array, (byte) 0); + } + + @Benchmark + public void heap_segment_fill() { + heapSegment.fill((byte) 0); + } + + @Benchmark + public void native_segment_fill() { + nativeSegment.fill((byte) 0); + } + + @Benchmark + public void unaligned_segment_fill() { + unalignedSegment.fill((byte) 0); + } + +}