/* * Copyright (c) 2021, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. * */ import java.lang.foreign.Arena; import java.lang.foreign.FunctionDescriptor; import java.lang.foreign.Linker; import java.lang.foreign.MemoryLayout; import java.lang.foreign.MemorySegment; import java.lang.foreign.SegmentAllocator; import java.lang.foreign.ValueLayout; import java.lang.invoke.MethodHandle; import java.lang.reflect.Field; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Random; import java.util.function.UnaryOperator; import jdk.internal.foreign.StringSupport; import org.testng.annotations.*; import static java.lang.foreign.ValueLayout.*; import static org.testng.Assert.*; /* * @test * @modules java.base/jdk.internal.foreign * @run testng TestStringEncoding */ public class TestStringEncoding { @Test(dataProvider = "strings") public void testStrings(String testString) { for (Charset charset : Charset.availableCharsets().values()) { if (isStandard(charset)) { for (Arena arena : arenas()) { try (arena) { MemorySegment text = arena.allocateFrom(testString, charset); int terminatorSize = "\0".getBytes(charset).length; if (charset == StandardCharsets.UTF_16) { terminatorSize -= 2; // drop BOM } // Note that the JDK's UTF_32 encoder doesn't add a BOM. // This is legal under the Unicode standard, and means the byte order is BE. // See: https://unicode.org/faq/utf_bom.html#gen7 int expectedByteLength = testString.getBytes(charset).length + terminatorSize; assertEquals(text.byteSize(), expectedByteLength); String roundTrip = text.getString(0, charset); if (charset.newEncoder().canEncode(testString)) { assertEquals(roundTrip, testString); } } } } else { assertThrows(IllegalArgumentException.class, () -> Arena.global().allocateFrom(testString, charset)); } } } @Test(dataProvider = "strings") public void testStringsHeap(String testString) { for (Charset charset : singleByteCharsets()) { for (var arena : arenas()) { try (arena) { MemorySegment text = arena.allocateFrom(testString, charset); text = toHeapSegment(text); int expectedByteLength = testString.getBytes(charset).length + 1; assertEquals(text.byteSize(), expectedByteLength); String roundTrip = text.getString(0, charset); if (charset.newEncoder().canEncode(testString)) { assertEquals(roundTrip, testString); } } } } } MemorySegment toHeapSegment(MemorySegment segment) { var heapArray = segment.toArray(JAVA_BYTE); return MemorySegment.ofArray(heapArray); } @Test(dataProvider = "strings") public void unboundedSegment(String testString) { testModifyingSegment(testString, standardCharsets(), s -> s.reinterpret(Long.MAX_VALUE), UnaryOperator.identity()); } @Test(dataProvider = "strings") public void unalignedSegmentSingleByte(String testString) { testModifyingSegment(testString, singleByteCharsets(), s -> s.byteSize() > 1 ? s.asSlice(1) : s, s -> s.length() > 0 ? s.substring(1) : s); } @Test(dataProvider = "strings") public void expandedSegment(String testString) { try (var arena = Arena.ofConfined()) { for (int i = 0; i < Long.BYTES; i++) { int extra = i; testModifyingSegment(testString, // Single byte charsets standardCharsets(), s -> { var s2 = arena.allocate(s.byteSize() + extra); MemorySegment.copy(s, 0, s2, 0, s.byteSize()); return s2; }, UnaryOperator.identity()); } } } public void testModifyingSegment(String testString, List charsets, UnaryOperator segmentMapper, UnaryOperator stringMapper) { for (var charset : charsets) { try (Arena arena = Arena.ofConfined()) { MemorySegment text = arena.allocateFrom(testString, charset); text = segmentMapper.apply(text); String roundTrip = text.getString(0, charset); String expected = stringMapper.apply(testString); if (charset.newEncoder().canEncode(testString)) { assertEquals(roundTrip, expected); } } } } @Test() public void testPeculiarContentSingleByte() { Random random = new Random(42); for (int len = 7; len < 71; len++) { for (var arena : arenas()) { try (arena) { var segment = arena.allocate(len, 1); var arr = new byte[len]; random.nextBytes(arr); segment.copyFrom(MemorySegment.ofArray(arr)); int terminatorIndex = random.nextInt(len); segment.set(ValueLayout.JAVA_BYTE, terminatorIndex, (byte) 0); for (Charset charset : singleByteCharsets()) { var s = segment.getString(0, charset); var ref = referenceImpl(segment, 0, charset); assertEquals(s, ref); } } } } } @Test(dataProvider = "strings") public void testOffset(String testString) { if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) { return; } for (var charset : singleByteCharsets()) { for (var arena: arenas()) { try (arena) { MemorySegment inSegment = arena.allocateFrom(testString, charset); for (int i = 0; i < 3; i++) { String actual = inSegment.getString(i, charset); assertEquals(actual, testString.substring(i)); } } } } } private static final MemoryLayout CHAR_POINTER = ADDRESS .withTargetLayout(MemoryLayout.sequenceLayout(Long.MAX_VALUE, JAVA_BYTE)); private static final Linker LINKER = Linker.nativeLinker(); private static final MethodHandle STRCAT = LINKER.downcallHandle( LINKER.defaultLookup().find("strcat").orElseThrow(), FunctionDescriptor.of(CHAR_POINTER, CHAR_POINTER, CHAR_POINTER)); @Test(dataProvider = "strings") public void nativeSegFromNativeCall(String testString) { String addition = "123"; try (var arena = Arena.ofConfined()) { try { var testStringSegment = arena.allocateFrom(testString); var additionSegment = arena.allocateFrom(addition); var destination = arena.allocate(testStringSegment.byteSize() + additionSegment.byteSize() - 1); destination.copyFrom(testStringSegment); MemorySegment concatenation = (MemorySegment) STRCAT.invokeExact(destination, arena.allocateFrom(addition)); var actual = concatenation.getString(0); assertEquals(actual, testString + addition); } catch (Throwable t) { throw new AssertionError(t); } } } @Test public void segmentationFault() { for (int i = 1; i < 18; i++) { var size = 1 << i; try (var arena = Arena.ofConfined()) { var seg = arena.allocate(size, size); seg.fill((byte) 1); try { var s = seg.getString(0); System.out.println("s.length() = " + s.length()); } catch (IndexOutOfBoundsException e) { // we will end up here if strlen finds a zero outside the MS } } } } private static final int TEST_LENGTH_MAX = 277; private Random deterministicRandom() { return new Random(42); } @Test public void chunked_strlen_byte() { Random random = deterministicRandom(); for (int skew = 0; skew < Long.BYTES; skew++) { for (int len = 0; len < TEST_LENGTH_MAX; len++) { try (var arena = Arena.ofConfined()) { var segment = arena.allocate(len + 1 + skew) .asSlice(skew); for (int i = 0; i < len; i++) { byte value; while ((value = (byte) random.nextInt()) == 0) { } segment.setAtIndex(JAVA_BYTE, i, value); } segment.setAtIndex(JAVA_BYTE, len, (byte) 0); for (int j = 0; j < len; j++) { int actual = StringSupport.chunkedStrlenByte(segment, j); assertEquals(actual, len - j); } } } } } @Test public void chunked_strlen_short() { Random random = deterministicRandom(); for (int skew = 0; skew < Long.BYTES; skew += Short.BYTES) { for (int len = 0; len < TEST_LENGTH_MAX; len++) { try (var arena = Arena.ofConfined()) { var segment = arena.allocate((len + 1) * Short.BYTES + skew, JAVA_SHORT.byteAlignment()) .asSlice(skew); for (int i = 0; i < len; i++) { short value; while ((value = (short) random.nextInt()) == 0) { } segment.setAtIndex(JAVA_SHORT, i, value); } segment.setAtIndex(JAVA_SHORT, len, (short) 0); for (int j = 0; j < len; j++) { int actual = StringSupport.chunkedStrlenShort(segment, j * Short.BYTES); assertEquals(actual, (len - j) * Short.BYTES); } } } } } @Test public void strlen_int() { Random random = deterministicRandom(); for (int skew = 0; skew < Long.BYTES; skew += Integer.BYTES) { for (int len = 0; len < TEST_LENGTH_MAX; len++) { try (var arena = Arena.ofConfined()) { var segment = arena.allocate((len + 1) * Integer.BYTES + skew, JAVA_INT.byteAlignment()) .asSlice(skew); for (int i = 0; i < len; i++) { int value; while ((value = random.nextInt()) == 0) { } segment.setAtIndex(JAVA_INT, i, value); } segment.setAtIndex(JAVA_INT, len, 0); for (int j = 0; j < len; j++) { int actual = StringSupport.strlenInt(segment, j * Integer.BYTES); assertEquals(actual, (len - j) * Integer.BYTES); } } } } } @Test(dataProvider = "charsetsAndSegments") public void testStringGetWithCharset(Charset charset, MemorySegment segment) { for (int offset = 0 ; offset < Long.BYTES ; offset++) { segment.getString(offset, charset); } } @Test(dataProvider = "charsetsAndSegments") public void testStringSetWithCharset(Charset charset, MemorySegment segment) { for (int offset = 0 ; offset < Long.BYTES ; offset++) { segment.setString(offset, "H", charset); } } @Test(dataProvider = "charsetsAndSegments") public void testStringAllocateFromWithCharset(Charset charset, MemorySegment segment) { for (int offset = 0 ; offset < Long.BYTES ; offset++) { SegmentAllocator.prefixAllocator(segment.asSlice(offset)).allocateFrom("H", charset); } } @DataProvider public static Object[][] strings() { return new Object[][]{ {"testing"}, {""}, {"X"}, {"12345"}, {"yen \u00A5"}, {"snowman \u26C4"}, {"rainbow \uD83C\uDF08"}, {"0"}, {"01"}, {"012"}, {"0123"}, {"01234"}, {"012345"}, {"0123456"}, {"01234567"}, {"012345678"}, {"0123456789"} }; } public static boolean containsOnlyRegularCharacters(String s) { return s.chars() .allMatch(c -> Character.isLetterOrDigit((char) c)); } static boolean isStandard(Charset charset) { for (Field standardCharset : StandardCharsets.class.getDeclaredFields()) { try { if (standardCharset.get(null) == charset) { return true; } } catch (ReflectiveOperationException e) { throw new AssertionError(e); } } return false; } static List standardCharsets() { return Charset.availableCharsets().values().stream() .filter(TestStringEncoding::isStandard) .toList(); } List singleByteCharsets() { return Arrays.asList(StandardCharsets.UTF_8, StandardCharsets.ISO_8859_1, StandardCharsets.US_ASCII); } static String referenceImpl(MemorySegment segment, long offset, Charset charset) { long len = strlen_byte(segment, offset); byte[] bytes = new byte[(int) len]; MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, (int) len); return new String(bytes, charset); } // Reference implementation private static int strlen_byte(MemorySegment segment, long start) { // iterate until overflow (String can only hold a byte[], whose length can be expressed as an int) for (int offset = 0; offset >= 0; offset++) { byte curr = segment.get(JAVA_BYTE, start + offset); if (curr == 0) { return offset; } } throw new IllegalArgumentException("String too large"); } private static List arenas() { return Arrays.asList( Arena.ofConfined(), // Native memory new HeapArena(byte.class), // Heap memory backed by a byte array new HeapArena(short.class), // Heap memory backed by a short array new HeapArena(int.class), // Heap memory backed by an int array new HeapArena(long.class)); // Heap memory backed by a long array } private static final class HeapArena implements Arena { private static final int ELEMENT_SIZE = 1_000; private final MemorySegment backingSegment; private final SegmentAllocator allocator; public HeapArena(Class type) { backingSegment = switch (type) { case Class c when byte.class.equals(c) -> MemorySegment.ofArray(new byte[ELEMENT_SIZE]); case Class c when short.class.equals(c) -> MemorySegment.ofArray(new short[ELEMENT_SIZE]); case Class c when int.class.equals(c) -> MemorySegment.ofArray(new int[ELEMENT_SIZE]); case Class c when long.class.equals(c) -> MemorySegment.ofArray(new long[ELEMENT_SIZE]); default -> throw new IllegalArgumentException(type.toString()); }; allocator = SegmentAllocator.slicingAllocator(backingSegment); } @Override public MemorySegment allocate(long byteSize, long byteAlignment) { return allocator.allocate(byteSize, byteAlignment); } @Override public MemorySegment.Scope scope() { return backingSegment.scope(); } @Override public void close() { // Do nothing } @Override public String toString() { return "HeapArena{" + "type=" + backingSegment.heapBase().orElseThrow().getClass().getName() + '}'; } } static MemorySegment[] heapSegments() { return new MemorySegment[]{ MemorySegment.ofArray(new byte[80]), MemorySegment.ofArray(new char[40]), MemorySegment.ofArray(new short[40]), MemorySegment.ofArray(new int[20]), MemorySegment.ofArray(new float[20]), MemorySegment.ofArray(new long[10]), MemorySegment.ofArray(new double[10]) }; } @DataProvider public static Object[][] charsetsAndSegments() { List values = new ArrayList<>(); for (Charset charset : standardCharsets()) { for (MemorySegment heapSegment : heapSegments()) { values.add(new Object[] { charset, heapSegment }); } } return values.toArray(Object[][]::new); } }