8157495: SHA-3 Hash algorithm performance improvements (~12x speedup)

Various improvements on performance and memory footprint

Reviewed-by: ascarpino
This commit is contained in:
Valerie Peng 2016-06-10 22:39:40 +00:00
parent 2f658b12e2
commit 18e69df0e8

View File

@ -61,14 +61,14 @@ abstract class SHA3 extends DigestBase {
0x8000000000008080L, 0x80000001L, 0x8000000080008008L, 0x8000000000008080L, 0x80000001L, 0x8000000080008008L,
}; };
private byte[] state; private byte[] state = new byte[WIDTH];
private final long[] lanes = new long[DM*DM];
/** /**
* Creates a new SHA-3 object. * Creates a new SHA-3 object.
*/ */
SHA3(String name, int digestLength) { SHA3(String name, int digestLength) {
super(name, digestLength, (WIDTH - (2 * digestLength))); super(name, digestLength, (WIDTH - (2 * digestLength)));
implReset();
} }
/** /**
@ -79,7 +79,7 @@ abstract class SHA3 extends DigestBase {
for (int i = 0; i < buffer.length; i++) { for (int i = 0; i < buffer.length; i++) {
state[i] ^= b[ofs++]; state[i] ^= b[ofs++];
} }
state = keccak(state); keccak();
} }
/** /**
@ -95,7 +95,7 @@ abstract class SHA3 extends DigestBase {
for (int i = 0; i < buffer.length; i++) { for (int i = 0; i < buffer.length; i++) {
state[i] ^= buffer[i]; state[i] ^= buffer[i];
} }
state = keccak(state); keccak();
System.arraycopy(state, 0, out, ofs, engineGetDigestLength()); System.arraycopy(state, 0, out, ofs, engineGetDigestLength());
} }
@ -103,15 +103,8 @@ abstract class SHA3 extends DigestBase {
* Resets the internal state to start a new hash. * Resets the internal state to start a new hash.
*/ */
void implReset() { void implReset() {
state = new byte[WIDTH]; Arrays.fill(state, (byte)0);
} Arrays.fill(lanes, 0L);
/**
* Utility function for circular shift the specified long
* value to the left for n bits.
*/
private static long circularShiftLeft(long lane, int n) {
return ((lane << n) | (lane >>> (64 - n)));
} }
/** /**
@ -132,115 +125,119 @@ abstract class SHA3 extends DigestBase {
} }
/** /**
* Utility function for transforming the specified state from * Utility function for transforming the specified byte array 's'
* the byte array format into array of lanes as defined in * into array of lanes 'm' as defined in section 3.1.2.
* section 3.1.2.
*/ */
private static long[][] bytes2Lanes(byte[] s) { private static void bytes2Lanes(byte[] s, long[] m) {
if (s.length != WIDTH) {
throw new ProviderException("Error: incorrect input size " +
s.length);
}
// The conversion traverses along x-axis before y-axis. So, y is the
// first dimension and x is the second dimension.
long[][] s2 = new long[DM][DM];
int sOfs = 0; int sOfs = 0;
// Conversion traverses along x-axis before y-axis
for (int y = 0; y < DM; y++, sOfs += 40) { for (int y = 0; y < DM; y++, sOfs += 40) {
b2lLittle(s, sOfs, s2[y], 0, 40); b2lLittle(s, sOfs, m, DM*y, 40);
} }
return s2;
} }
/** /**
* Utility function for transforming the specified arrays of * Utility function for transforming the specified array of
* lanes into a byte array as defined in section 3.1.3. * lanes 'm' into a byte array 's' as defined in section 3.1.3.
*/ */
private static byte[] lanes2Bytes(long[][] m) { private static void lanes2Bytes(long[] m, byte[] s) {
byte[] s = new byte[WIDTH];
int sOfs = 0; int sOfs = 0;
// The conversion traverses along x-axis before y-axis. So, y is the // Conversion traverses along x-axis before y-axis
// first dimension and x is the second dimension.
for (int y = 0; y < DM; y++, sOfs += 40) { for (int y = 0; y < DM; y++, sOfs += 40) {
l2bLittle(m[y], 0, s, sOfs, 40); l2bLittle(m, DM*y, s, sOfs, 40);
} }
return s;
} }
/** /**
* Step mapping Theta as defined in section 3.2.1 . * Step mapping Theta as defined in section 3.2.1 .
*/ */
private static long[][] smTheta(long[][] a) { private static long[] smTheta(long[] a) {
long[] c = new long[DM]; long c0 = a[0]^a[5]^a[10]^a[15]^a[20];
for (int i = 0; i < DM; i++) { long c1 = a[1]^a[6]^a[11]^a[16]^a[21];
c[i] = a[0][i]^a[1][i]^a[2][i]^a[3][i]^a[4][i]; long c2 = a[2]^a[7]^a[12]^a[17]^a[22];
} long c3 = a[3]^a[8]^a[13]^a[18]^a[23];
long[] d = new long[DM]; long c4 = a[4]^a[9]^a[14]^a[19]^a[24];
for (int i = 0; i < DM; i++) { long d0 = c4 ^ Long.rotateLeft(c1, 1);
long c1 = c[(i + 4) % DM]; long d1 = c0 ^ Long.rotateLeft(c2, 1);
// left shift and wrap the leftmost bit into the rightmost bit long d2 = c1 ^ Long.rotateLeft(c3, 1);
long c2 = circularShiftLeft(c[(i + 1) % DM], 1); long d3 = c2 ^ Long.rotateLeft(c4, 1);
d[i] = c1^c2; long d4 = c3 ^ Long.rotateLeft(c0, 1);
} for (int y = 0; y < a.length; y += DM) {
for (int y = 0; y < DM; y++) { a[y] ^= d0;
for (int x = 0; x < DM; x++) { a[y+1] ^= d1;
a[y][x] ^= d[x]; a[y+2] ^= d2;
} a[y+3] ^= d3;
a[y+4] ^= d4;
} }
return a; return a;
} }
/** /**
* Step mapping Rho as defined in section 3.2.2. * Merged Step mapping Rho (section 3.2.2) and Pi (section 3.2.3).
* for performance. Optimization is achieved by precalculating
* shift constants for the following loop
* int xNext, yNext;
* for (int t = 0, x = 1, y = 0; t <= 23; t++, x = xNext, y = yNext) {
* int numberOfShift = ((t + 1)*(t + 2)/2) % 64;
* a[y][x] = Long.rotateLeft(a[y][x], numberOfShift);
* xNext = y;
* yNext = (2 * x + 3 * y) % DM;
* }
* and with inplace permutation.
*/ */
private static long[][] smRho(long[][] a) { private static long[] smPiRho(long[] a) {
long[][] a2 = new long[DM][DM]; long tmp = Long.rotateLeft(a[10], 3);
a2[0][0] = a[0][0]; a[10] = Long.rotateLeft(a[1], 1);
int xNext, yNext; a[1] = Long.rotateLeft(a[6], 44);
for (int t = 0, x = 1, y = 0; t <= 23; t++, x = xNext, y = yNext) { a[6] = Long.rotateLeft(a[9], 20);
int numberOfShift = ((t + 1)*(t + 2)/2) % 64; a[9] = Long.rotateLeft(a[22], 61);
a2[y][x] = circularShiftLeft(a[y][x], numberOfShift); a[22] = Long.rotateLeft(a[14], 39);
xNext = y; a[14] = Long.rotateLeft(a[20], 18);
yNext = (2 * x + 3 * y) % DM; a[20] = Long.rotateLeft(a[2], 62);
} a[2] = Long.rotateLeft(a[12], 43);
return a2; a[12] = Long.rotateLeft(a[13], 25);
} a[13] = Long.rotateLeft(a[19], 8);
a[19] = Long.rotateLeft(a[23], 56);
/** a[23] = Long.rotateLeft(a[15], 41);
* Step mapping Pi as defined in section 3.2.3. a[15] = Long.rotateLeft(a[4], 27);
*/ a[4] = Long.rotateLeft(a[24], 14);
private static long[][] smPi(long[][] a) { a[24] = Long.rotateLeft(a[21], 2);
long[][] a2 = new long[DM][DM]; a[21] = Long.rotateLeft(a[8], 55);
for (int y = 0; y < DM; y++) { a[8] = Long.rotateLeft(a[16], 45);
for (int x = 0; x < DM; x++) { a[16] = Long.rotateLeft(a[5], 36);
a2[y][x] = a[x][(x + 3 * y) % DM]; a[5] = Long.rotateLeft(a[3], 28);
} a[3] = Long.rotateLeft(a[18], 21);
} a[18] = Long.rotateLeft(a[17], 15);
return a2; a[17] = Long.rotateLeft(a[11], 10);
a[11] = Long.rotateLeft(a[7], 6);
a[7] = tmp;
return a;
} }
/** /**
* Step mapping Chi as defined in section 3.2.4. * Step mapping Chi as defined in section 3.2.4.
*/ */
private static long[][] smChi(long[][] a) { private static long[] smChi(long[] a) {
long[][] a2 = new long[DM][DM]; for (int y = 0; y < a.length; y+=DM) {
for (int y = 0; y < DM; y++) { long ay0 = a[y];
for (int x = 0; x < DM; x++) { long ay1 = a[y+1];
a2[y][x] = a[y][x] ^ long ay2 = a[y+2];
((a[y][(x + 1) % DM] ^ 0xFFFFFFFFFFFFFFFFL) & long ay3 = a[y+3];
a[y][(x + 2) % DM]); long ay4 = a[y+4];
} a[y] = ay0 ^ ((~ay1) & ay2);
a[y+1] = ay1 ^ ((~ay2) & ay3);
a[y+2] = ay2 ^ ((~ay3) & ay4);
a[y+3] = ay3 ^ ((~ay4) & ay0);
a[y+4] = ay4 ^ ((~ay0) & ay1);
} }
return a2; return a;
} }
/** /**
* Step mapping Iota as defined in section 3.2.5. * Step mapping Iota as defined in section 3.2.5.
*
* @return the processed state array
* @param state the state array to be processed
*/ */
private static long[][] smIota(long[][] a, int rndIndex) { private static long[] smIota(long[] a, int rndIndex) {
a[0][0] ^= RC_CONSTANTS[rndIndex]; a[0] ^= RC_CONSTANTS[rndIndex];
return a; return a;
} }
@ -248,12 +245,15 @@ abstract class SHA3 extends DigestBase {
* The function Keccak as defined in section 5.2 with * The function Keccak as defined in section 5.2 with
* rate r = 1600 and capacity c = (digest length x 2). * rate r = 1600 and capacity c = (digest length x 2).
*/ */
private static byte[] keccak(byte[] state) { private void keccak() {
long[][] lanes = bytes2Lanes(state); // convert the 200-byte state into 25 lanes
bytes2Lanes(state, lanes);
// process the lanes through step mappings
for (int ir = 0; ir < NR; ir++) { for (int ir = 0; ir < NR; ir++) {
lanes = smIota(smChi(smPi(smRho(smTheta(lanes)))), ir); smIota(smChi(smPiRho(smTheta(lanes))), ir);
} }
return lanes2Bytes(lanes); // convert the resulting 25 lanes back into 200-byte state
lanes2Bytes(lanes, state);
} }
public Object clone() throws CloneNotSupportedException { public Object clone() throws CloneNotSupportedException {