8316645: RISC-V: Remove dependency on libatomic by adding cmpxchg 1b

Reviewed-by: ihse, fyang, luhenry, mli
2023-10-01 16:32:05 +00:00 · 2023-10-01 16:32:05 +00:00 · fb055e7e53
commit fb055e7e53
parent 009f5e1fa1
3 changed files with 118 additions and 6 deletions
--- a/make/autoconf/libraries.m4
+++ b/make/autoconf/libraries.m4
@ -108,12 +108,6 @@ AC_DEFUN([LIB_SETUP_JVM_LIBS],
      BASIC_JVM_LIBS_$1="$BASIC_JVM_LIBS_$1 -latomic"
    fi
  fi
-
-  # Because RISC-V only has word-sized atomics, it requires libatomic where
-  # other common architectures do not, so link libatomic by default.
-  if test "x$OPENJDK_$1_OS" = xlinux && test "x$OPENJDK_$1_CPU" = xriscv64; then
-    BASIC_JVM_LIBS_$1="$BASIC_JVM_LIBS_$1 -latomic"
-  fi
 ])

 ################################################################################
--- a/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
+++ b/src/hotspot/os_cpu/linux_riscv/atomic_linux_riscv.hpp
@ -33,10 +33,23 @@
 // Note that memory_order_conservative requires a full barrier after atomic stores.
 // See https://patchwork.kernel.org/patch/3575821/

+#if defined(__clang_major__)
+#define FULL_COMPILER_ATOMIC_SUPPORT
+#elif (__GNUC__ > 13) || ((__GNUC__ == 13) && (__GNUC_MINOR__ >= 2))
+#define FULL_COMPILER_ATOMIC_SUPPORT
+#endif
+
 template<size_t byte_size>
 struct Atomic::PlatformAdd {
  template<typename D, typename I>
  D add_then_fetch(D volatile* dest, I add_value, atomic_memory_order order) const {
+
+#ifndef FULL_COMPILER_ATOMIC_SUPPORT
+    // If we add add and fetch for sub word and are using older compiler
+    // it must be added here due to not using lib atomic.
+    STATIC_ASSERT(byte_size >= 4);
+#endif
+
    if (order != memory_order_relaxed) {
      FULL_MEM_BARRIER;
    }
@ -55,12 +68,65 @@ struct Atomic::PlatformAdd {
  }
 };

+#ifndef FULL_COMPILER_ATOMIC_SUPPORT
+template<>
+template<typename T>
+inline T Atomic::PlatformCmpxchg<1>::operator()(T volatile* dest __attribute__((unused)),
+                                                T compare_value,
+                                                T exchange_value,
+                                                atomic_memory_order order) const {
+  STATIC_ASSERT(1 == sizeof(T));
+
+  if (order != memory_order_relaxed) {
+    FULL_MEM_BARRIER;
+  }
+
+  uint32_t volatile* aligned_dst = (uint32_t volatile*)(((uintptr_t)dest) & (~((uintptr_t)0x3)));
+  int shift = 8 * (((uintptr_t)dest) - ((uintptr_t)aligned_dst)); // 0, 8, 16, 24
+
+  uint64_t mask = 0xfful << shift; // 0x00000000..FF..
+  uint64_t remask = ~mask;         // 0xFFFFFFFF..00..
+
+  uint64_t w_cv = ((uint64_t)(unsigned char)compare_value) << shift;  // widen to 64-bit 0x00000000..CC..
+  uint64_t w_ev = ((uint64_t)(unsigned char)exchange_value) << shift; // widen to 64-bit 0x00000000..EE..
+
+  uint64_t old_value;
+  uint64_t rc_temp;
+
+  __asm__ __volatile__ (
+    "1:  lr.w      %0, %2      \n\t"
+    "    and       %1, %0, %5  \n\t" // ignore unrelated bytes and widen to 64-bit 0x00000000..XX..
+    "    bne       %1, %3, 2f  \n\t" // compare 64-bit w_cv
+    "    and       %1, %0, %6  \n\t" // remove old byte
+    "    or        %1, %1, %4  \n\t" // add new byte
+    "    sc.w      %1, %1, %2  \n\t" // store new word
+    "    bnez      %1, 1b      \n\t"
+    "2:                        \n\t"
+    : /*%0*/"=&r" (old_value), /*%1*/"=&r" (rc_temp), /*%2*/"+A" (*aligned_dst)
+    : /*%3*/"r" (w_cv), /*%4*/"r" (w_ev), /*%5*/"r" (mask), /*%6*/"r" (remask)
+    : "memory" );
+
+  if (order != memory_order_relaxed) {
+    FULL_MEM_BARRIER;
+  }
+
+  return (T)((old_value & mask) >> shift);
+}
+#endif
+
 template<size_t byte_size>
 template<typename T>
 inline T Atomic::PlatformXchg<byte_size>::operator()(T volatile* dest,
                                                     T exchange_value,
                                                     atomic_memory_order order) const {
+#ifndef FULL_COMPILER_ATOMIC_SUPPORT
+  // If we add xchg for sub word and are using older compiler
+  // it must be added here due to not using lib atomic.
+  STATIC_ASSERT(byte_size >= 4);
+#endif
+
  STATIC_ASSERT(byte_size == sizeof(T));
+
  if (order != memory_order_relaxed) {
    FULL_MEM_BARRIER;
  }
@ -80,6 +146,11 @@ inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T volatile* dest __attri
                                                        T compare_value,
                                                        T exchange_value,
                                                        atomic_memory_order order) const {
+
+#ifndef FULL_COMPILER_ATOMIC_SUPPORT
+  STATIC_ASSERT(byte_size >= 4);
+#endif
+
  STATIC_ASSERT(byte_size == sizeof(T));
  T value = compare_value;
  if (order != memory_order_relaxed) {
@ -148,4 +219,6 @@ struct Atomic::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
  void operator()(volatile T* p, T v) const { release_store(p, v); OrderAccess::fence(); }
 };

+#undef FULL_COMPILER_ATOMIC_SUPPORT
+
 #endif // OS_CPU_LINUX_RISCV_ATOMIC_LINUX_RISCV_HPP
--- a/test/hotspot/gtest/runtime/test_atomic.cpp
+++ b/test/hotspot/gtest/runtime/test_atomic.cpp
@ -146,6 +146,51 @@ TEST(AtomicCmpxchgTest, int64) {
  Support().test();
 }

+struct AtomicCmpxchg1ByteStressSupport {
+  char _default_val;
+  int  _base;
+  char _array[7+32+7];
+
+  AtomicCmpxchg1ByteStressSupport() : _default_val(0xaa), _base(7), _array{} {}
+
+  void validate(char val, char val2, int index) {
+    for (int i = 0; i < 7; i++) {
+      EXPECT_EQ(_array[i], _default_val);
+    }
+    for (int i = 7; i < (7+32); i++) {
+      if (i == index) {
+        EXPECT_EQ(_array[i], val2);
+      } else {
+        EXPECT_EQ(_array[i], val);
+      }
+    }
+    for (int i = 0; i < 7; i++) {
+      EXPECT_EQ(_array[i], _default_val);
+    }
+  }
+
+  void test_index(int index) {
+    char one = 1;
+    Atomic::cmpxchg(&_array[index], _default_val, one);
+    validate(_default_val, one, index);
+
+    Atomic::cmpxchg(&_array[index], one, _default_val);
+    validate(_default_val, _default_val, index);
+  }
+
+  void test() {
+    memset(_array, _default_val, sizeof(_array));
+    for (int i = _base; i < (_base+32); i++) {
+      test_index(i);
+    }
+  }
+};
+
+TEST(AtomicCmpxchg1Byte, stress) {
+  AtomicCmpxchg1ByteStressSupport support;
+  support.test();
+}
+
 template<typename T>
 struct AtomicEnumTestSupport {
  volatile T _test_value;