This commit is contained in:
Jiangli Zhou 2017-08-15 18:19:18 -04:00
commit a6dcc4531f
12 changed files with 280 additions and 8 deletions

View File

@ -16102,6 +16102,16 @@ instruct array_equalsC(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
ins_pipe(pipe_class_memory);
%}
instruct has_negatives(iRegP_R1 ary1, iRegI_R2 len, iRegI_R0 result, rFlagsReg cr)
%{
match(Set result (HasNegatives ary1 len));
effect(USE_KILL ary1, USE_KILL len, KILL cr);
format %{ "has negatives byte[] $ary1,$len -> $result" %}
ins_encode %{
__ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
%}
ins_pipe( pipe_slow );
%}
// fast char[] to byte[] compression
instruct string_compress(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,

View File

@ -154,8 +154,11 @@ define_pd_global(intx, InlineSmallCode, 1000);
product(intx, BlockZeroingLowLimit, 256, \
"Minimum size in bytes when block zeroing will be used") \
range(1, max_jint) \
product(bool, TraceTraps, false, "Trace all traps the signal handler")
product(bool, TraceTraps, false, "Trace all traps the signal handler")\
product(int, SoftwarePrefetchHintDistance, -1, \
"Use prfm hint with specified distance in compiled code." \
"Value -1 means off.") \
range(-1, 32760)
#endif

View File

@ -4829,6 +4829,62 @@ void MacroAssembler::string_compare(Register str1, Register str2,
BLOCK_COMMENT("} string_compare");
}
// This method checks if provided byte array contains byte with highest bit set.
void MacroAssembler::has_negatives(Register ary1, Register len, Register result) {
// Simple and most common case of aligned small array which is not at the
// end of memory page is placed here. All other cases are in stub.
Label LOOP, END, STUB, STUB_LONG, SET_RESULT, DONE;
const uint64_t UPPER_BIT_MASK=0x8080808080808080;
assert_different_registers(ary1, len, result);
cmpw(len, 0);
br(LE, SET_RESULT);
cmpw(len, 4 * wordSize);
br(GE, STUB_LONG); // size > 32 then go to stub
int shift = 64 - exact_log2(os::vm_page_size());
lsl(rscratch1, ary1, shift);
mov(rscratch2, (size_t)(4 * wordSize) << shift);
adds(rscratch2, rscratch1, rscratch2); // At end of page?
br(CS, STUB); // at the end of page then go to stub
subs(len, len, wordSize);
br(LT, END);
BIND(LOOP);
ldr(rscratch1, Address(post(ary1, wordSize)));
tst(rscratch1, UPPER_BIT_MASK);
br(NE, SET_RESULT);
subs(len, len, wordSize);
br(GE, LOOP);
cmpw(len, -wordSize);
br(EQ, SET_RESULT);
BIND(END);
ldr(result, Address(ary1));
sub(len, zr, len, LSL, 3); // LSL 3 is to get bits from bytes
lslv(result, result, len);
tst(result, UPPER_BIT_MASK);
b(SET_RESULT);
BIND(STUB);
RuntimeAddress has_neg = RuntimeAddress(StubRoutines::aarch64::has_negatives());
assert(has_neg.target() != NULL, "has_negatives stub has not been generated");
trampoline_call(has_neg);
b(DONE);
BIND(STUB_LONG);
RuntimeAddress has_neg_long = RuntimeAddress(
StubRoutines::aarch64::has_negatives_long());
assert(has_neg_long.target() != NULL, "has_negatives stub has not been generated");
trampoline_call(has_neg_long);
b(DONE);
BIND(SET_RESULT);
cset(result, NE); // set true or false
BIND(DONE);
}
// Compare Strings or char/byte arrays.
// is_string is true iff this is a string comparison.

View File

@ -1210,6 +1210,8 @@ public:
Register tmp1,
FloatRegister vtmp, FloatRegister vtmpZ, int ae);
void has_negatives(Register ary1, Register len, Register result);
void arrays_equals(Register a1, Register a2,
Register result, Register cnt1,
int elem_size, bool is_string);

View File

@ -3670,6 +3670,167 @@ class StubGenerator: public StubCodeGenerator {
__ eor(result, __ T16B, lo, t0);
}
address generate_has_negatives(address &has_negatives_long) {
StubCodeMark mark(this, "StubRoutines", "has_negatives");
const int large_loop_size = 64;
const uint64_t UPPER_BIT_MASK=0x8080808080808080;
int dcache_line = VM_Version::dcache_line_size();
Register ary1 = r1, len = r2, result = r0;
__ align(CodeEntryAlignment);
address entry = __ pc();
__ enter();
Label RET_TRUE, RET_TRUE_NO_POP, RET_FALSE, ALIGNED, LOOP16, CHECK_16, DONE,
LARGE_LOOP, POST_LOOP16, LEN_OVER_15, LEN_OVER_8, POST_LOOP16_LOAD_TAIL;
__ cmp(len, 15);
__ br(Assembler::GT, LEN_OVER_15);
// The only case when execution falls into this code is when pointer is near
// the end of memory page and we have to avoid reading next page
__ add(ary1, ary1, len);
__ subs(len, len, 8);
__ br(Assembler::GT, LEN_OVER_8);
__ ldr(rscratch2, Address(ary1, -8));
__ sub(rscratch1, zr, len, __ LSL, 3); // LSL 3 is to get bits from bytes.
__ lsrv(rscratch2, rscratch2, rscratch1);
__ tst(rscratch2, UPPER_BIT_MASK);
__ cset(result, Assembler::NE);
__ leave();
__ ret(lr);
__ bind(LEN_OVER_8);
__ ldp(rscratch1, rscratch2, Address(ary1, -16));
__ sub(len, len, 8); // no data dep., then sub can be executed while loading
__ tst(rscratch2, UPPER_BIT_MASK);
__ br(Assembler::NE, RET_TRUE_NO_POP);
__ sub(rscratch2, zr, len, __ LSL, 3); // LSL 3 is to get bits from bytes
__ lsrv(rscratch1, rscratch1, rscratch2);
__ tst(rscratch1, UPPER_BIT_MASK);
__ cset(result, Assembler::NE);
__ leave();
__ ret(lr);
Register tmp1 = r3, tmp2 = r4, tmp3 = r5, tmp4 = r6, tmp5 = r7, tmp6 = r10;
const RegSet spilled_regs = RegSet::range(tmp1, tmp5) + tmp6;
has_negatives_long = __ pc(); // 2nd entry point
__ enter();
__ bind(LEN_OVER_15);
__ push(spilled_regs, sp);
__ andr(rscratch2, ary1, 15); // check pointer for 16-byte alignment
__ cbz(rscratch2, ALIGNED);
__ ldp(tmp6, tmp1, Address(ary1));
__ mov(tmp5, 16);
__ sub(rscratch1, tmp5, rscratch2); // amount of bytes until aligned address
__ add(ary1, ary1, rscratch1);
__ sub(len, len, rscratch1);
__ orr(tmp6, tmp6, tmp1);
__ tst(tmp6, UPPER_BIT_MASK);
__ br(Assembler::NE, RET_TRUE);
__ bind(ALIGNED);
__ cmp(len, large_loop_size);
__ br(Assembler::LT, CHECK_16);
// Perform 16-byte load as early return in pre-loop to handle situation
// when initially aligned large array has negative values at starting bytes,
// so LARGE_LOOP would do 4 reads instead of 1 (in worst case), which is
// slower. Cases with negative bytes further ahead won't be affected that
// much. In fact, it'll be faster due to early loads, less instructions and
// less branches in LARGE_LOOP.
__ ldp(tmp6, tmp1, Address(__ post(ary1, 16)));
__ sub(len, len, 16);
__ orr(tmp6, tmp6, tmp1);
__ tst(tmp6, UPPER_BIT_MASK);
__ br(Assembler::NE, RET_TRUE);
__ cmp(len, large_loop_size);
__ br(Assembler::LT, CHECK_16);
if (SoftwarePrefetchHintDistance >= 0
&& SoftwarePrefetchHintDistance >= dcache_line) {
// initial prefetch
__ prfm(Address(ary1, SoftwarePrefetchHintDistance - dcache_line));
}
__ bind(LARGE_LOOP);
if (SoftwarePrefetchHintDistance >= 0) {
__ prfm(Address(ary1, SoftwarePrefetchHintDistance));
}
// Issue load instructions first, since it can save few CPU/MEM cycles, also
// instead of 4 triples of "orr(...), addr(...);cbnz(...);" (for each ldp)
// better generate 7 * orr(...) + 1 andr(...) + 1 cbnz(...) which saves 3
// instructions per cycle and have less branches, but this approach disables
// early return, thus, all 64 bytes are loaded and checked every time.
__ ldp(tmp2, tmp3, Address(ary1));
__ ldp(tmp4, tmp5, Address(ary1, 16));
__ ldp(rscratch1, rscratch2, Address(ary1, 32));
__ ldp(tmp6, tmp1, Address(ary1, 48));
__ add(ary1, ary1, large_loop_size);
__ sub(len, len, large_loop_size);
__ orr(tmp2, tmp2, tmp3);
__ orr(tmp4, tmp4, tmp5);
__ orr(rscratch1, rscratch1, rscratch2);
__ orr(tmp6, tmp6, tmp1);
__ orr(tmp2, tmp2, tmp4);
__ orr(rscratch1, rscratch1, tmp6);
__ orr(tmp2, tmp2, rscratch1);
__ tst(tmp2, UPPER_BIT_MASK);
__ br(Assembler::NE, RET_TRUE);
__ cmp(len, large_loop_size);
__ br(Assembler::GE, LARGE_LOOP);
__ bind(CHECK_16); // small 16-byte load pre-loop
__ cmp(len, 16);
__ br(Assembler::LT, POST_LOOP16);
__ bind(LOOP16); // small 16-byte load loop
__ ldp(tmp2, tmp3, Address(__ post(ary1, 16)));
__ sub(len, len, 16);
__ orr(tmp2, tmp2, tmp3);
__ tst(tmp2, UPPER_BIT_MASK);
__ br(Assembler::NE, RET_TRUE);
__ cmp(len, 16);
__ br(Assembler::GE, LOOP16); // 16-byte load loop end
__ bind(POST_LOOP16); // 16-byte aligned, so we can read unconditionally
__ cmp(len, 8);
__ br(Assembler::LE, POST_LOOP16_LOAD_TAIL);
__ ldr(tmp3, Address(__ post(ary1, 8)));
__ sub(len, len, 8);
__ tst(tmp3, UPPER_BIT_MASK);
__ br(Assembler::NE, RET_TRUE);
__ bind(POST_LOOP16_LOAD_TAIL);
__ cbz(len, RET_FALSE); // Can't shift left by 64 when len==0
__ ldr(tmp1, Address(ary1));
__ mov(tmp2, 64);
__ sub(tmp4, tmp2, len, __ LSL, 3);
__ lslv(tmp1, tmp1, tmp4);
__ tst(tmp1, UPPER_BIT_MASK);
__ br(Assembler::NE, RET_TRUE);
// Fallthrough
__ bind(RET_FALSE);
__ pop(spilled_regs, sp);
__ leave();
__ mov(result, zr);
__ ret(lr);
__ bind(RET_TRUE);
__ pop(spilled_regs, sp);
__ bind(RET_TRUE_NO_POP);
__ leave();
__ mov(result, 1);
__ ret(lr);
__ bind(DONE);
__ pop(spilled_regs, sp);
__ leave();
__ ret(lr);
return entry;
}
/**
* Arguments:
*
@ -4686,6 +4847,7 @@ class StubGenerator: public StubCodeGenerator {
// }
};
// Initialization
void generate_initial() {
// Generate initial stubs and initializes the entry points
@ -4744,6 +4906,9 @@ class StubGenerator: public StubCodeGenerator {
// arraycopy stubs used by compilers
generate_arraycopy_stubs();
// has negatives stub for large arrays.
StubRoutines::aarch64::_has_negatives = generate_has_negatives(StubRoutines::aarch64::_has_negatives_long);
if (UseMultiplyToLenIntrinsic) {
StubRoutines::_multiplyToLen = generate_multiplyToLen();
}

View File

@ -44,6 +44,8 @@ address StubRoutines::aarch64::_float_sign_flip = NULL;
address StubRoutines::aarch64::_double_sign_mask = NULL;
address StubRoutines::aarch64::_double_sign_flip = NULL;
address StubRoutines::aarch64::_zero_blocks = NULL;
address StubRoutines::aarch64::_has_negatives = NULL;
address StubRoutines::aarch64::_has_negatives_long = NULL;
bool StubRoutines::aarch64::_completed = false;
/**

View File

@ -62,6 +62,9 @@ class aarch64 {
static address _double_sign_flip;
static address _zero_blocks;
static address _has_negatives;
static address _has_negatives_long;
static bool _completed;
public:
@ -120,6 +123,14 @@ class aarch64 {
return _zero_blocks;
}
static address has_negatives() {
return _has_negatives;
}
static address has_negatives_long() {
return _has_negatives_long;
}
static bool complete() {
return _completed;
}

View File

@ -137,6 +137,8 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 3*dcache_line);
if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes))
FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 3*dcache_line);
if (FLAG_IS_DEFAULT(SoftwarePrefetchHintDistance))
FLAG_SET_DEFAULT(SoftwarePrefetchHintDistance, 3*dcache_line);
if (PrefetchCopyIntervalInBytes != -1 &&
((PrefetchCopyIntervalInBytes & 7) || (PrefetchCopyIntervalInBytes >= 32768))) {
@ -146,6 +148,12 @@ void VM_Version::get_processor_features() {
PrefetchCopyIntervalInBytes = 32760;
}
if (SoftwarePrefetchHintDistance != -1 &&
(SoftwarePrefetchHintDistance & 7)) {
warning("SoftwarePrefetchHintDistance must be -1, or a multiple of 8");
SoftwarePrefetchHintDistance &= ~7;
}
unsigned long auxv = getauxval(AT_HWCAP);
char buf[512];

View File

@ -2201,6 +2201,8 @@ void os::pd_print_cpu_info(outputStream* st, char* buf, size_t buflen) {
#if defined(AMD64) || defined(IA32) || defined(X32)
const char* search_string = "model name";
#elif defined(M68K)
const char* search_string = "CPU";
#elif defined(PPC64)
const char* search_string = "cpu";
#elif defined(S390)

View File

@ -4462,6 +4462,16 @@ jint Arguments::apply_ergo() {
set_shared_spaces_flags();
#if defined(SPARC)
// BIS instructions require 'membar' instruction regardless of the number
// of CPUs because in virtualized/container environments which might use only 1
// CPU, BIS instructions may produce incorrect results.
if (FLAG_IS_DEFAULT(AssumeMP)) {
FLAG_SET_DEFAULT(AssumeMP, true);
}
#endif
// Check the GC selections again.
if (!check_gc_consistency()) {
return JNI_EINVAL;

View File

@ -43,9 +43,16 @@ public class ClassPathJimageEntry extends PathHandler.PathEntry {
return Arrays.stream(reader.getEntryNames())
.filter(name -> name.endsWith(".class"))
.filter(name -> !name.endsWith("module-info.class"))
.map(ClassPathJimageEntry::toFileName)
.map(Utils::fileNameToClassName);
}
private static String toFileName(String name) {
final char nameSeparator = '/';
assert name.charAt(0) == nameSeparator : name;
return name.substring(name.indexOf(nameSeparator, 1) + 1);
}
@Override
protected String description() {
return "# jimage: " + root;

View File

@ -203,18 +203,14 @@ public class Utils {
* Converts the filename to classname.
*
* @param filename filename to convert
* @return corresponding classname.
* @return corresponding classname
* @throws AssertionError if filename isn't valid filename for class file -
* {@link #isClassFile(String)}
*/
public static String fileNameToClassName(String filename) {
assert isClassFile(filename);
// workaround for the class naming in jimage : /<module>/<class_name>
final char nameSeparator = '/';
int nameStart = filename.charAt(0) == nameSeparator
? filename.indexOf(nameSeparator, 1) + 1
: 0;
return filename.substring(nameStart, filename.length() - CLASSFILE_EXT.length())
return filename.substring(0, filename.length() - CLASSFILE_EXT.length())
.replace(nameSeparator, '.');
}