3789983e89
Reviewed-by: darcy, ihse
161 lines
6.5 KiB
C++
161 lines
6.5 KiB
C++
/*
|
|
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License version 2 only, as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* version 2 for more details (a copy is included in the LICENSE file that
|
|
* accompanied this code).
|
|
*
|
|
* You should have received a copy of the GNU General Public License version
|
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*
|
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
* or visit www.oracle.com if you need additional information or have any
|
|
* questions.
|
|
*
|
|
*/
|
|
|
|
#include "precompiled.hpp"
|
|
|
|
#include "gc/shared/memset_with_concurrent_readers.hpp"
|
|
#include "runtime/prefetch.inline.hpp"
|
|
#include "utilities/align.hpp"
|
|
#include "utilities/debug.hpp"
|
|
#include "utilities/globalDefinitions.hpp"
|
|
#include "utilities/macros.hpp"
|
|
|
|
#if INCLUDE_ALL_GCS
|
|
|
|
// An implementation of memset, for use when there may be concurrent
|
|
// readers of the region being stored into.
|
|
//
|
|
// We can't use the standard library memset if it is implemented using
|
|
// block initializing stores. Doing so can result in concurrent readers
|
|
// seeing spurious zeros.
|
|
//
|
|
// We can't use the obvious C/C++ for-loop, because the compiler may
|
|
// recognize the idiomatic loop and optimize it into a call to the
|
|
// standard library memset; we've seen exactly this happen with, for
|
|
// example, Solaris Studio 12.3. Hence the use of inline assembly
|
|
// code, hiding loops from the compiler's optimizer.
|
|
//
|
|
// We don't attempt to use the standard library memset when it is safe
|
|
// to do so. We could conservatively do so by detecting the presence
|
|
// of block initializing stores (VM_Version::has_blk_init()), but the
|
|
// implementation provided here should be sufficient.
|
|
|
|
inline void fill_subword(void* start, void* end, int value) {
|
|
STATIC_ASSERT(BytesPerWord == 8);
|
|
assert(pointer_delta(end, start, 1) < (size_t)BytesPerWord, "precondition");
|
|
// Dispatch on (end - start).
|
|
void* pc;
|
|
__asm__ volatile(
|
|
// offset := (7 - (end - start)) + 3
|
|
// 3 instructions from rdpc to DISPATCH
|
|
" sub %[offset], %[end], %[offset]\n\t" // offset := start - end
|
|
" sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4
|
|
" add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size
|
|
" rd %%pc, %[pc]\n\t" // dispatch on scaled offset
|
|
" jmpl %[pc]+%[offset], %%g0\n\t"
|
|
" nop\n\t"
|
|
// DISPATCH: no direct reference, but without it the store block may be elided.
|
|
"1:\n\t"
|
|
" stb %[value], [%[end]-7]\n\t" // end[-7] = value
|
|
" stb %[value], [%[end]-6]\n\t"
|
|
" stb %[value], [%[end]-5]\n\t"
|
|
" stb %[value], [%[end]-4]\n\t"
|
|
" stb %[value], [%[end]-3]\n\t"
|
|
" stb %[value], [%[end]-2]\n\t"
|
|
" stb %[value], [%[end]-1]\n\t" // end[-1] = value
|
|
: /* only temporaries/overwritten outputs */
|
|
[pc] "=&r" (pc), // temp
|
|
[offset] "+&r" (start)
|
|
: [end] "r" (end),
|
|
[value] "r" (value)
|
|
: "memory");
|
|
}
|
|
|
|
void memset_with_concurrent_readers(void* to, int value, size_t size) {
|
|
Prefetch::write(to, 0);
|
|
void* end = static_cast<char*>(to) + size;
|
|
if (size >= (size_t)BytesPerWord) {
|
|
// Fill any partial word prefix.
|
|
uintx* aligned_to = static_cast<uintx*>(align_up(to, BytesPerWord));
|
|
fill_subword(to, aligned_to, value);
|
|
|
|
// Compute fill word.
|
|
STATIC_ASSERT(BitsPerByte == 8);
|
|
STATIC_ASSERT(BitsPerWord == 64);
|
|
uintx xvalue = value & 0xff;
|
|
xvalue |= (xvalue << 8);
|
|
xvalue |= (xvalue << 16);
|
|
xvalue |= (xvalue << 32);
|
|
|
|
uintx* aligned_end = static_cast<uintx*>(align_down(end, BytesPerWord));
|
|
assert(aligned_to <= aligned_end, "invariant");
|
|
|
|
// for ( ; aligned_to < aligned_end; ++aligned_to) {
|
|
// *aligned_to = xvalue;
|
|
// }
|
|
uintptr_t temp;
|
|
__asm__ volatile(
|
|
// Unroll loop x8.
|
|
" sub %[aend], %[ato], %[temp]\n\t"
|
|
" cmp %[temp], 56\n\t" // cc := (aligned_end - aligned_to) > 7 words
|
|
" ba %%xcc, 2f\n\t" // goto TEST always
|
|
" sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words
|
|
// LOOP:
|
|
"1:\n\t" // unrolled x8 store loop top
|
|
" cmp %[temp], %[ato]\n\t" // cc := limit > (next) aligned_to
|
|
" stx %[xvalue], [%[ato]-64]\n\t" // store 8 words, aligned_to pre-incremented
|
|
" stx %[xvalue], [%[ato]-56]\n\t"
|
|
" stx %[xvalue], [%[ato]-48]\n\t"
|
|
" stx %[xvalue], [%[ato]-40]\n\t"
|
|
" stx %[xvalue], [%[ato]-32]\n\t"
|
|
" stx %[xvalue], [%[ato]-24]\n\t"
|
|
" stx %[xvalue], [%[ato]-16]\n\t"
|
|
" stx %[xvalue], [%[ato]-8]\n\t"
|
|
// TEST:
|
|
"2:\n\t"
|
|
" bgu,a %%xcc, 1b\n\t" // goto LOOP if more than 7 words remaining
|
|
" add %[ato], 64, %[ato]\n\t" // aligned_to += 8, for next iteration
|
|
// Fill remaining < 8 full words.
|
|
// Dispatch on (aligned_end - aligned_to).
|
|
// offset := (7 - (aligned_end - aligned_to)) + 3
|
|
// 3 instructions from rdpc to DISPATCH
|
|
" sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end
|
|
" srax %[ato], 1, %[ato]\n\t" // scale offset for instruction size of 4
|
|
" add %[ato], 40, %[ato]\n\t" // offset += 10 * instruction size
|
|
" rd %%pc, %[temp]\n\t" // dispatch on scaled offset
|
|
" jmpl %[temp]+%[ato], %%g0\n\t"
|
|
" nop\n\t"
|
|
// DISPATCH: no direct reference, but without it the store block may be elided.
|
|
"3:\n\t"
|
|
" stx %[xvalue], [%[aend]-56]\n\t" // aligned_end[-7] = xvalue
|
|
" stx %[xvalue], [%[aend]-48]\n\t"
|
|
" stx %[xvalue], [%[aend]-40]\n\t"
|
|
" stx %[xvalue], [%[aend]-32]\n\t"
|
|
" stx %[xvalue], [%[aend]-24]\n\t"
|
|
" stx %[xvalue], [%[aend]-16]\n\t"
|
|
" stx %[xvalue], [%[aend]-8]\n\t" // aligned_end[-1] = xvalue
|
|
: /* only temporaries/overwritten outputs */
|
|
[temp] "=&r" (temp),
|
|
[ato] "+&r" (aligned_to)
|
|
: [aend] "r" (aligned_end),
|
|
[xvalue] "r" (xvalue)
|
|
: "cc", "memory");
|
|
to = aligned_end; // setup for suffix
|
|
}
|
|
// Fill any partial word suffix. Also the prefix if size < BytesPerWord.
|
|
fill_subword(to, end, value);
|
|
}
|
|
|
|
#endif // INCLUDE_ALL_GCS
|