8302358: Behavior of adler32 changes after JDK-8300208
Reviewed-by: kvn, jbhateja
This commit is contained in:
parent
86b9fce980
commit
7c60b9c98f
@ -110,7 +110,7 @@ address StubGenerator::generate_updateBytesAdler32() {
|
||||
const XMMRegister xtmp4 = xmm9;
|
||||
const XMMRegister xtmp5 = xmm10;
|
||||
|
||||
Label SLOOP1, SPRELOOP1A_AVX2, SLOOP1A_AVX2, SLOOP1A_AVX3, AVX3_REDUCE, SKIP_LOOP_1A;
|
||||
Label SLOOP1, SLOOP1A_AVX2, SLOOP1A_AVX3, AVX3_REDUCE, SKIP_LOOP_1A;
|
||||
Label SKIP_LOOP_1A_AVX3, FINISH, LT64, DO_FINAL, FINAL_LOOP, ZERO_SIZE, END;
|
||||
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
@ -133,6 +133,7 @@ address StubGenerator::generate_updateBytesAdler32() {
|
||||
__ movdl(xa, init_d); //vmovd - 32bit
|
||||
|
||||
__ bind(SLOOP1);
|
||||
__ vpxor(yb, yb, yb, VM_Version::supports_avx512vl() ? Assembler::AVX_512bit : Assembler::AVX_256bit);
|
||||
__ movl(s, LIMIT);
|
||||
__ cmpl(s, size);
|
||||
__ cmovl(Assembler::above, s, size); // s = min(size, LIMIT)
|
||||
@ -144,10 +145,8 @@ address StubGenerator::generate_updateBytesAdler32() {
|
||||
if (VM_Version::supports_avx512vl()) {
|
||||
// AVX2 performs better for smaller inputs because of leaner post loop reduction sequence..
|
||||
__ cmpl(s, MAX2(128, VM_Version::avx3_threshold()));
|
||||
__ jcc(Assembler::belowEqual, SPRELOOP1A_AVX2);
|
||||
|
||||
__ jcc(Assembler::belowEqual, SLOOP1A_AVX2);
|
||||
__ lea(end, Address(s, data, Address::times_1, - (2*CHUNKSIZE -1)));
|
||||
__ vpxor(yb, yb, yb, Assembler::AVX_512bit);
|
||||
|
||||
// Some notes on vectorized main loop algorithm.
|
||||
// Additions are performed in slices of 16 bytes in the main loop.
|
||||
@ -164,8 +163,8 @@ address StubGenerator::generate_updateBytesAdler32() {
|
||||
// Since addition was performed in chunks of 16 bytes, thus to match the scalar implementation
|
||||
// Oth lane element must be repeatedly added 16 times, 1st element 15 times and so on so forth.
|
||||
// Thus we first multiply yb by 16 followed by subtracting appropriately scaled ya value.
|
||||
// yb = 16 x yb - [0 - 15] x ya
|
||||
// = 64 x [0 - 15] + 48 x [16 - 31] + 32 x [32 - 47] + 16 x [48 - 63] - [0 - 15] x ya
|
||||
// yb = 16 x yb - [a0 - a15] x ya
|
||||
// = 64 x [a0 - a15] + 48 x [a16 - a31] + 32 x [a32 - a47] + 16 x [a48 - a63] - [a0 - a15] x ya
|
||||
// = 64 x a0 + 63 x a1 + 62 x a2 ...... + a63
|
||||
__ bind(SLOOP1A_AVX3);
|
||||
__ evpmovzxbd(ydata0, Address(data, 0), Assembler::AVX_512bit);
|
||||
@ -220,8 +219,6 @@ address StubGenerator::generate_updateBytesAdler32() {
|
||||
}
|
||||
|
||||
__ align32();
|
||||
__ bind(SPRELOOP1A_AVX2);
|
||||
__ vpxor(yb, yb, yb, Assembler::AVX_256bit);
|
||||
__ bind(SLOOP1A_AVX2);
|
||||
__ vbroadcastf128(ydata, Address(data, 0), Assembler::AVX_256bit);
|
||||
__ addptr(data, CHUNKSIZE);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2015, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -146,7 +146,7 @@ public class TestAdler32 {
|
||||
if (adler0.getValue() != adler1.getValue()) {
|
||||
System.err.printf("ERROR: adler0 = %08x, adler1 = %08x\n",
|
||||
adler0.getValue(), adler1.getValue());
|
||||
throw new AssertionError("TEST FAILED", null);
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -166,6 +166,7 @@ public class TestAdler32 {
|
||||
int len1 = 8; // the 8B/iteration loop
|
||||
int len2 = 32; // the 32B/iteration loop
|
||||
int len3 = 4096; // the 4KB/iteration loop
|
||||
int len4 = 5552; // the adler limit
|
||||
|
||||
byte[] b = initializedBytes(len3*16, 0);
|
||||
int[] offsets = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256, 512 };
|
||||
@ -176,6 +177,8 @@ public class TestAdler32 {
|
||||
len2*2, len2*4, len2*8, len2*16, len2*32, len2*64,
|
||||
len3, len3+1, len3+3, len3+5, len3+7,
|
||||
len3*2, len3*4, len3*8,
|
||||
len4, len4+1, len4+3, len4+5, len4+7, len4+len1, len4+len2, len4+len3,
|
||||
len4*2, len4*4, len4*2+1, len4*4+4,
|
||||
len1+len2, len1+len2+1, len1+len2+3, len1+len2+5, len1+len2+7,
|
||||
len1+len3, len1+len3+1, len1+len3+3, len1+len3+5, len1+len3+7,
|
||||
len2+len3, len2+len3+1, len2+len3+3, len2+len3+5, len2+len3+7,
|
||||
@ -214,8 +217,9 @@ public class TestAdler32 {
|
||||
for (i = 0; i < offsets.length; i++) {
|
||||
for (j = 0; j < sizes.length; j++) {
|
||||
if (!check(adler0[i*sizes.length + j], adler1[i*sizes.length + j])) {
|
||||
System.out.printf("offsets[%d] = %d", i, offsets[i]);
|
||||
System.out.printf("\tsizes[%d] = %d\n", j, sizes[j]);
|
||||
System.out.printf("Failed at: offsets[%d] = %d", i, offsets[i]);
|
||||
System.out.printf(", sizes[%d] = %d\n", j, sizes[j]);
|
||||
throw new AssertionError("TEST FAILED", null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user