From 7ff9c85639ace351b6d99c9916e3afe46ac9c69d Mon Sep 17 00:00:00 2001
From: Maurizio Cimadamore <mcimadamore@openjdk.org>
Date: Tue, 15 Dec 2020 13:50:40 +0000
Subject: [PATCH] 8258242: Type profile pollution occurs when memory segments
 of different kinds are used

Reviewed-by: vlivanov, redestad
---
 src/hotspot/share/classfile/vmSymbols.hpp     |   1 +
 src/hotspot/share/oops/methodData.cpp         |  16 ++
 src/hotspot/share/oops/methodData.hpp         |   1 +
 .../access/foreign/MemorySegmentProxy.java    |  18 +-
 .../misc/X-ScopedMemoryAccess.java.template   |   3 +
 .../jdk/incubator/foreign/MemoryAccess.java   |   4 +
 .../foreign/AbstractMemorySegmentImpl.java    |   2 +-
 .../foreign/LoopOverPollutedSegments.java     | 190 ++++++++++++++++++
 8 files changed, 225 insertions(+), 10 deletions(-)
 create mode 100644 test/micro/org/openjdk/bench/jdk/incubator/foreign/LoopOverPollutedSegments.java

diff --git a/src/hotspot/share/classfile/vmSymbols.hpp b/src/hotspot/share/classfile/vmSymbols.hpp
index 495c30cfa60..4fe2054390f 100644
--- a/src/hotspot/share/classfile/vmSymbols.hpp
+++ b/src/hotspot/share/classfile/vmSymbols.hpp
@@ -353,6 +353,7 @@
   /* Panama Support */                                                                                          \
   template(jdk_internal_invoke_NativeEntryPoint,                 "jdk/internal/invoke/NativeEntryPoint")           \
   template(jdk_internal_invoke_NativeEntryPoint_signature,       "Ljdk/internal/invoke/NativeEntryPoint;")         \
+  template(jdk_incubator_foreign_MemoryAccess,       "jdk/incubator/foreign/MemoryAccess")        \
                                                                                                   \
   /* Support for JVMCI */                                                                         \
   JVMCI_VM_SYMBOLS_DO(template, do_alias)                                                         \
diff --git a/src/hotspot/share/oops/methodData.cpp b/src/hotspot/share/oops/methodData.cpp
index 311acab8460..1720c4c7b54 100644
--- a/src/hotspot/share/oops/methodData.cpp
+++ b/src/hotspot/share/oops/methodData.cpp
@@ -1600,6 +1600,18 @@ bool MethodData::profile_unsafe(const methodHandle& m, int bci) {
   return false;
 }
 
+bool MethodData::profile_memory_access(const methodHandle& m, int bci) {
+  Bytecode_invoke inv(m , bci);
+  if (inv.is_invokestatic()) {
+    if (inv.klass() == vmSymbols::jdk_incubator_foreign_MemoryAccess()) {
+      if (inv.name()->starts_with("get") || inv.name()->starts_with("set")) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 int MethodData::profile_arguments_flag() {
   return TypeProfileLevel % 10;
 }
@@ -1629,6 +1641,10 @@ bool MethodData::profile_arguments_for_invoke(const methodHandle& m, int bci) {
     return true;
   }
 
+  if (profile_memory_access(m, bci)) {
+    return true;
+  }
+
   assert(profile_arguments_jsr292_only(), "inconsistent");
   return profile_jsr292(m, bci);
 }
diff --git a/src/hotspot/share/oops/methodData.hpp b/src/hotspot/share/oops/methodData.hpp
index 83c7bd6fd6d..464e783b2a4 100644
--- a/src/hotspot/share/oops/methodData.hpp
+++ b/src/hotspot/share/oops/methodData.hpp
@@ -2148,6 +2148,7 @@ private:
 
   static bool profile_jsr292(const methodHandle& m, int bci);
   static bool profile_unsafe(const methodHandle& m, int bci);
+  static bool profile_memory_access(const methodHandle& m, int bci);
   static int profile_arguments_flag();
   static bool profile_all_arguments();
   static bool profile_arguments_for_invoke(const methodHandle& m, int bci);
diff --git a/src/java.base/share/classes/jdk/internal/access/foreign/MemorySegmentProxy.java b/src/java.base/share/classes/jdk/internal/access/foreign/MemorySegmentProxy.java
index e45ad08f8fe..a00b6516a55 100644
--- a/src/java.base/share/classes/jdk/internal/access/foreign/MemorySegmentProxy.java
+++ b/src/java.base/share/classes/jdk/internal/access/foreign/MemorySegmentProxy.java
@@ -29,21 +29,21 @@ package jdk.internal.access.foreign;
 import jdk.internal.misc.ScopedMemoryAccess;
 
 /**
- * This proxy interface is required to allow instances of the {@code MemorySegment} interface (which is defined inside
+ * This abstract class is required to allow implementations of the {@code MemorySegment} interface (which is defined inside
  * an incubating module) to be accessed from the memory access var handles.
  */
-public interface MemorySegmentProxy {
+public abstract class MemorySegmentProxy {
     /**
      * Check that memory access is within spatial bounds and that access is compatible with segment access modes.
      * @throws UnsupportedOperationException if underlying segment has incompatible access modes (e.g. attempting to write
      * a read-only segment).
      * @throws IndexOutOfBoundsException if access is out-of-bounds.
      */
-    void checkAccess(long offset, long length, boolean readOnly);
-    long unsafeGetOffset();
-    Object unsafeGetBase();
-    boolean isSmall();
-    ScopedMemoryAccess.Scope scope();
+    public abstract void checkAccess(long offset, long length, boolean readOnly);
+    public abstract long unsafeGetOffset();
+    public abstract Object unsafeGetBase();
+    public abstract boolean isSmall();
+    public abstract ScopedMemoryAccess.Scope scope();
 
     /* Helper functions for offset computations. These are required so that we can avoid issuing long opcodes
      * (e.g. LMUL, LADD) when we're operating on 'small' segments (segments whose length can be expressed with an int).
@@ -51,7 +51,7 @@ public interface MemorySegmentProxy {
      * BCE when working with small segments. This workaround should be dropped when JDK-8223051 is resolved.
      */
 
-    static long addOffsets(long op1, long op2, MemorySegmentProxy segmentProxy) {
+    public static long addOffsets(long op1, long op2, MemorySegmentProxy segmentProxy) {
         if (segmentProxy.isSmall()) {
             // force ints for BCE
             if (op1 > Integer.MAX_VALUE || op2 > Integer.MAX_VALUE
@@ -74,7 +74,7 @@ public interface MemorySegmentProxy {
         }
     }
 
-    static long multiplyOffsets(long op1, long op2, MemorySegmentProxy segmentProxy) {
+    public static long multiplyOffsets(long op1, long op2, MemorySegmentProxy segmentProxy) {
         if (segmentProxy.isSmall()) {
             if (op1 > Integer.MAX_VALUE || op2 > Integer.MAX_VALUE
                     || op1 < Integer.MIN_VALUE || op2 < Integer.MIN_VALUE) {
diff --git a/src/java.base/share/classes/jdk/internal/misc/X-ScopedMemoryAccess.java.template b/src/java.base/share/classes/jdk/internal/misc/X-ScopedMemoryAccess.java.template
index 2bf948d33d6..7e6823718ae 100644
--- a/src/java.base/share/classes/jdk/internal/misc/X-ScopedMemoryAccess.java.template
+++ b/src/java.base/share/classes/jdk/internal/misc/X-ScopedMemoryAccess.java.template
@@ -325,3 +325,6 @@ public class ScopedMemoryAccess {
     }
     // typed-ops here
 
+    // Note: all the accessor methods defined below take advantage of argument type profiling
+    // (see src/hotspot/share/oops/methodData.cpp) which greatly enhances performance when the same accessor
+    // method is used repeatedly with different 'base' objects.
diff --git a/src/jdk.incubator.foreign/share/classes/jdk/incubator/foreign/MemoryAccess.java b/src/jdk.incubator.foreign/share/classes/jdk/incubator/foreign/MemoryAccess.java
index 9de681274fc..3e2182b5963 100644
--- a/src/jdk.incubator.foreign/share/classes/jdk/incubator/foreign/MemoryAccess.java
+++ b/src/jdk.incubator.foreign/share/classes/jdk/incubator/foreign/MemoryAccess.java
@@ -88,6 +88,10 @@ public final class MemoryAccess {
         return MemoryHandles.varHandle(carrier, 1, elementLayout.order());
     }
 
+    // Note: all the accessor methods defined below take advantage of argument type profiling
+    // (see src/hotspot/share/oops/methodData.cpp) which greatly enhances performance when the same accessor
+    // method is used repeatedly with different segment kinds (e.g. on-heap vs. off-heap).
+
     /**
      * Reads a byte from given segment and offset.
      *
diff --git a/src/jdk.incubator.foreign/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java b/src/jdk.incubator.foreign/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java
index a26428e4959..971533569cf 100644
--- a/src/jdk.incubator.foreign/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java
+++ b/src/jdk.incubator.foreign/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java
@@ -53,7 +53,7 @@ import java.util.function.IntFunction;
  * are defined for each memory segment kind, see {@link NativeMemorySegmentImpl}, {@link HeapMemorySegmentImpl} and
  * {@link MappedMemorySegmentImpl}.
  */
-public abstract class AbstractMemorySegmentImpl implements MemorySegment, MemorySegmentProxy {
+public abstract class AbstractMemorySegmentImpl extends MemorySegmentProxy implements MemorySegment {
 
     private static final ScopedMemoryAccess SCOPED_MEMORY_ACCESS = ScopedMemoryAccess.getScopedMemoryAccess();
 
diff --git a/test/micro/org/openjdk/bench/jdk/incubator/foreign/LoopOverPollutedSegments.java b/test/micro/org/openjdk/bench/jdk/incubator/foreign/LoopOverPollutedSegments.java
new file mode 100644
index 00000000000..463474ee004
--- /dev/null
+++ b/test/micro/org/openjdk/bench/jdk/incubator/foreign/LoopOverPollutedSegments.java
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.bench.jdk.incubator.foreign;
+
+import jdk.incubator.foreign.MemoryAccess;
+import jdk.incubator.foreign.MemoryLayout;
+import jdk.incubator.foreign.MemorySegment;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+import sun.misc.Unsafe;
+
+import java.lang.invoke.VarHandle;
+import java.util.concurrent.TimeUnit;
+
+import static jdk.incubator.foreign.MemoryLayout.PathElement.sequenceElement;
+import static jdk.incubator.foreign.MemoryLayouts.JAVA_INT;
+
+@BenchmarkMode(Mode.AverageTime)
+@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
+@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
+@State(org.openjdk.jmh.annotations.Scope.Thread)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@Fork(value = 3, jvmArgsAppend = { "--add-modules=jdk.incubator.foreign" })
+public class LoopOverPollutedSegments {
+
+    static final int ELEM_SIZE = 1_000_000;
+    static final int CARRIER_SIZE = (int) JAVA_INT.byteSize();
+    static final int ALLOC_SIZE = ELEM_SIZE * CARRIER_SIZE;
+
+    static final Unsafe unsafe = Utils.unsafe;
+
+    MemorySegment nativeSegment, heapSegmentBytes, heapSegmentFloats;
+    byte[] arr;
+    long addr;
+
+    static final VarHandle intHandle = MemoryLayout.ofSequence(JAVA_INT).varHandle(int.class, MemoryLayout.PathElement.sequenceElement());
+
+
+    @Setup
+    public void setup() {
+        addr = unsafe.allocateMemory(ALLOC_SIZE);
+        for (int i = 0; i < ELEM_SIZE; i++) {
+            unsafe.putInt(addr + (i * 4), i);
+        }
+        arr = new byte[ALLOC_SIZE];
+        nativeSegment = MemorySegment.allocateNative(ALLOC_SIZE, 4);
+        heapSegmentBytes = MemorySegment.ofArray(new byte[ALLOC_SIZE]);
+        heapSegmentFloats = MemorySegment.ofArray(new float[ELEM_SIZE]);
+
+        for (int rep = 0 ; rep < 5 ; rep++) {
+            for (int i = 0; i < ELEM_SIZE; i++) {
+                unsafe.putInt(arr, Unsafe.ARRAY_BYTE_BASE_OFFSET + (i * 4), i);
+                MemoryAccess.setIntAtIndex(nativeSegment, i, i);
+                MemoryAccess.setFloatAtIndex(nativeSegment, i, i);
+                intHandle.set(nativeSegment, (long)i, i);
+                MemoryAccess.setIntAtIndex(heapSegmentBytes, i, i);
+                MemoryAccess.setFloatAtIndex(heapSegmentBytes, i, i);
+                intHandle.set(heapSegmentBytes, (long)i, i);
+                MemoryAccess.setIntAtIndex(heapSegmentFloats, i, i);
+                MemoryAccess.setFloatAtIndex(heapSegmentFloats, i, i);
+                intHandle.set(heapSegmentFloats, (long)i, i);
+            }
+        }
+    }
+
+    @TearDown
+    public void tearDown() {
+        nativeSegment.close();
+        heapSegmentBytes = null;
+        heapSegmentFloats = null;
+        arr = null;
+        unsafe.freeMemory(addr);
+    }
+
+    @Benchmark
+    public int native_segment_VH() {
+        int sum = 0;
+        for (int k = 0; k < ELEM_SIZE; k++) {
+            intHandle.set(nativeSegment, (long)k, k + 1);
+            int v = (int) intHandle.get(nativeSegment, (long)k);
+            sum += v;
+        }
+        return sum;
+    }
+
+    @Benchmark
+    public int native_segment_static() {
+        int sum = 0;
+        for (int k = 0; k < ELEM_SIZE; k++) {
+            MemoryAccess.setIntAtOffset(nativeSegment, k, k + 1);
+            int v = MemoryAccess.getIntAtOffset(nativeSegment, k);
+            sum += v;
+        }
+        return sum;
+    }
+
+    @Benchmark
+    public int heap_segment_ints_VH() {
+        int sum = 0;
+        for (int k = 0; k < ELEM_SIZE; k++) {
+            intHandle.set(heapSegmentBytes, (long)k, k + 1);
+            int v = (int) intHandle.get(heapSegmentBytes, (long)k);
+            sum += v;
+        }
+        return sum;
+    }
+
+    @Benchmark
+    public int heap_segment_ints_static() {
+        int sum = 0;
+        for (int k = 0; k < ELEM_SIZE; k++) {
+            MemoryAccess.setIntAtOffset(heapSegmentBytes, k, k + 1);
+            int v = MemoryAccess.getIntAtOffset(heapSegmentBytes, k);
+            sum += v;
+        }
+        return sum;
+    }
+
+    @Benchmark
+    public int heap_segment_floats_VH() {
+        int sum = 0;
+        for (int k = 0; k < ELEM_SIZE; k++) {
+            intHandle.set(heapSegmentFloats, (long)k, k + 1);
+            int v = (int)intHandle.get(heapSegmentFloats, (long)k);
+            sum += v;
+        }
+        return sum;
+    }
+
+    @Benchmark
+    public int heap_segment_floats_static() {
+        int sum = 0;
+        for (int k = 0; k < ELEM_SIZE; k++) {
+            MemoryAccess.setIntAtOffset(heapSegmentFloats, k, k + 1);
+            int v = MemoryAccess.getIntAtOffset(heapSegmentFloats, k);
+            sum += v;
+        }
+        return sum;
+    }
+
+    @Benchmark
+    public int heap_unsafe() {
+        int sum = 0;
+        for (int k = 0; k < ALLOC_SIZE; k += 4) {
+            unsafe.putInt(arr, k + Unsafe.ARRAY_BYTE_BASE_OFFSET, k + 1);
+            int v = unsafe.getInt(arr, k + Unsafe.ARRAY_BYTE_BASE_OFFSET);
+            sum += v;
+        }
+        return sum;
+    }
+
+    @Benchmark
+    public int native_unsafe() {
+        int sum = 0;
+        for (int k = 0; k < ALLOC_SIZE; k += 4) {
+            unsafe.putInt(addr + k, k + 1);
+            int v = unsafe.getInt(addr + k);
+            sum += v;
+        }
+        return sum;
+    }
+}