From 75bea280b9adb6dac9fefafbb3f4b212f100fbb5 Mon Sep 17 00:00:00 2001 From: Ferenc Rakoczi Date: Fri, 21 Jun 2024 14:16:23 +0000 Subject: [PATCH] 8333867: SHA3 performance can be improved Reviewed-by: kvn, valeriep --- src/hotspot/share/opto/library_call.cpp | 4 +- .../sun/security/provider/DigestBase.java | 4 +- .../classes/sun/security/provider/SHA3.java | 105 ++++++++---------- 3 files changed, 51 insertions(+), 62 deletions(-) diff --git a/src/hotspot/share/opto/library_call.cpp b/src/hotspot/share/opto/library_call.cpp index b3253a817a4..596e637652d 100644 --- a/src/hotspot/share/opto/library_call.cpp +++ b/src/hotspot/share/opto/library_call.cpp @@ -7681,7 +7681,7 @@ bool LibraryCallKit::inline_digestBase_implCompress(vmIntrinsics::ID id) { break; case vmIntrinsics::_sha3_implCompress: assert(UseSHA3Intrinsics, "need SHA3 instruction support"); - state = get_state_from_digest_object(digestBase_obj, T_BYTE); + state = get_state_from_digest_object(digestBase_obj, T_LONG); stubAddr = StubRoutines::sha3_implCompress(); stubName = "sha3_implCompress"; block_size = get_block_size_from_digest_object(digestBase_obj); @@ -7781,7 +7781,7 @@ bool LibraryCallKit::inline_digestBase_implCompressMB(int predicate) { klass_digestBase_name = "sun/security/provider/SHA3"; stub_name = "sha3_implCompressMB"; stub_addr = StubRoutines::sha3_implCompressMB(); - elem_type = T_BYTE; + elem_type = T_LONG; } break; default: diff --git a/src/java.base/share/classes/sun/security/provider/DigestBase.java b/src/java.base/share/classes/sun/security/provider/DigestBase.java index dbe59396ac0..2aaf0a2fac6 100644 --- a/src/java.base/share/classes/sun/security/provider/DigestBase.java +++ b/src/java.base/share/classes/sun/security/provider/DigestBase.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -61,7 +61,7 @@ abstract class DigestBase extends MessageDigestSpi implements Cloneable { private final int digestLength; // size of the input to the compression function in bytes - private final int blockSize; + protected final int blockSize; // buffer to store partial blocks, blockSize bytes large // Subclasses should not access this array directly except possibly in their // implDigest() method. See MD5.java as an example. diff --git a/src/java.base/share/classes/sun/security/provider/SHA3.java b/src/java.base/share/classes/sun/security/provider/SHA3.java index 2b8bf8afbed..eaccf2a88e9 100644 --- a/src/java.base/share/classes/sun/security/provider/SHA3.java +++ b/src/java.base/share/classes/sun/security/provider/SHA3.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2016, 2024, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,13 +25,14 @@ package sun.security.provider; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.nio.ByteOrder; import java.security.ProviderException; import java.util.Arrays; import java.util.Objects; import jdk.internal.vm.annotation.IntrinsicCandidate; -import static sun.security.provider.ByteArrayAccess.b2lLittle; -import static sun.security.provider.ByteArrayAccess.l2bLittle; /** * This class implements the Secure Hash Algorithm SHA-3 developed by @@ -48,7 +49,7 @@ import static sun.security.provider.ByteArrayAccess.l2bLittle; abstract class SHA3 extends DigestBase { private static final int WIDTH = 200; // in bytes, e.g. 1600 bits - private static final int DM = 5; // dimension of lanes + private static final int DM = 5; // dimension of state matrix private static final int NR = 24; // number of rounds @@ -65,8 +66,11 @@ abstract class SHA3 extends DigestBase { }; private final byte suffix; - private byte[] state = new byte[WIDTH]; - private long[] lanes = new long[DM*DM]; + private long[] state = new long[DM*DM]; + + static final VarHandle asLittleEndian + = MethodHandles.byteArrayViewVarHandle(long[].class, + ByteOrder.LITTLE_ENDIAN).withInvokeExactBehavior(); /** * Creates a new SHA-3 object. @@ -91,10 +95,12 @@ abstract class SHA3 extends DigestBase { @IntrinsicCandidate private void implCompress0(byte[] b, int ofs) { - for (int i = 0; i < buffer.length; i++) { - state[i] ^= b[ofs++]; - } - keccak(); + for (int i = 0; i < blockSize / 8; i++) { + state[i] ^= (long) asLittleEndian.get(b, ofs); + ofs += 8; + } + + keccak(); } /** @@ -102,29 +108,43 @@ abstract class SHA3 extends DigestBase { * DigestBase calls implReset() when necessary. */ void implDigest(byte[] out, int ofs) { + byte[] byteState = new byte[8]; int numOfPadding = - setPaddingBytes(suffix, buffer, (int)(bytesProcessed % buffer.length)); + setPaddingBytes(suffix, buffer, (int)(bytesProcessed % blockSize)); if (numOfPadding < 1) { throw new ProviderException("Incorrect pad size: " + numOfPadding); } implCompress(buffer, 0); - int availableBytes = buffer.length; + int availableBytes = blockSize; // i.e. buffer.length int numBytes = engineGetDigestLength(); while (numBytes > availableBytes) { - System.arraycopy(state, 0, out, ofs, availableBytes); + for (int i = 0; i < availableBytes / 8 ; i++) { + asLittleEndian.set(out, ofs, state[i]); + ofs += 8; + } numBytes -= availableBytes; - ofs += availableBytes; keccak(); } - System.arraycopy(state, 0, out, ofs, numBytes); + int numLongs = (numBytes + 7) / 8; + + for (int i = 0; i < numLongs - 1; i++) { + asLittleEndian.set(out, ofs, state[i]); + ofs += 8; + } + if (numBytes == numLongs * 8) { + asLittleEndian.set(out, ofs, state[numLongs - 1]); + } else { + asLittleEndian.set(byteState, 0, state[numLongs - 1]); + System.arraycopy(byteState, 0, + out, ofs, numBytes - (numLongs - 1) * 8); + } } /** * Resets the internal state to start a new hash. */ void implReset() { - Arrays.fill(state, (byte)0); - Arrays.fill(lanes, 0L); + Arrays.fill(state, 0L); } /** @@ -144,46 +164,19 @@ abstract class SHA3 extends DigestBase { return (in.length - len); } - /** - * Utility function for transforming the specified byte array 's' - * into array of lanes 'm' as defined in section 3.1.2. - */ - private static void bytes2Lanes(byte[] s, long[] m) { - int sOfs = 0; - // Conversion traverses along x-axis before y-axis - for (int y = 0; y < DM; y++, sOfs += 40) { - b2lLittle(s, sOfs, m, DM*y, 40); - } - } - - /** - * Utility function for transforming the specified array of - * lanes 'm' into a byte array 's' as defined in section 3.1.3. - */ - private static void lanes2Bytes(long[] m, byte[] s) { - int sOfs = 0; - // Conversion traverses along x-axis before y-axis - for (int y = 0; y < DM; y++, sOfs += 40) { - l2bLittle(m, DM*y, s, sOfs, 40); - } - } - /** * The function Keccak as defined in section 5.2 with * rate r = 1600 and capacity c. */ private void keccak() { - // convert the 200-byte state into 25 lanes - bytes2Lanes(state, lanes); - long a0, a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12; long a13, a14, a15, a16, a17, a18, a19, a20, a21, a22, a23, a24; // move data into local variables - a0 = lanes[0]; a1 = lanes[1]; a2 = lanes[2]; a3 = lanes[3]; a4 = lanes[4]; - a5 = lanes[5]; a6 = lanes[6]; a7 = lanes[7]; a8 = lanes[8]; a9 = lanes[9]; - a10 = lanes[10]; a11 = lanes[11]; a12 = lanes[12]; a13 = lanes[13]; a14 = lanes[14]; - a15 = lanes[15]; a16 = lanes[16]; a17 = lanes[17]; a18 = lanes[18]; a19 = lanes[19]; - a20 = lanes[20]; a21 = lanes[21]; a22 = lanes[22]; a23 = lanes[23]; a24 = lanes[24]; + a0 = state[0]; a1 = state[1]; a2 = state[2]; a3 = state[3]; a4 = state[4]; + a5 = state[5]; a6 = state[6]; a7 = state[7]; a8 = state[8]; a9 = state[9]; + a10 = state[10]; a11 = state[11]; a12 = state[12]; a13 = state[13]; a14 = state[14]; + a15 = state[15]; a16 = state[16]; a17 = state[17]; a18 = state[18]; a19 = state[19]; + a20 = state[20]; a21 = state[21]; a22 = state[22]; a23 = state[23]; a24 = state[24]; // process the lanes through step mappings for (int ir = 0; ir < NR; ir++) { @@ -287,20 +280,16 @@ abstract class SHA3 extends DigestBase { a0 ^= RC_CONSTANTS[ir]; } - lanes[0] = a0; lanes[1] = a1; lanes[2] = a2; lanes[3] = a3; lanes[4] = a4; - lanes[5] = a5; lanes[6] = a6; lanes[7] = a7; lanes[8] = a8; lanes[9] = a9; - lanes[10] = a10; lanes[11] = a11; lanes[12] = a12; lanes[13] = a13; lanes[14] = a14; - lanes[15] = a15; lanes[16] = a16; lanes[17] = a17; lanes[18] = a18; lanes[19] = a19; - lanes[20] = a20; lanes[21] = a21; lanes[22] = a22; lanes[23] = a23; lanes[24] = a24; - - // convert the resulting 25 lanes back into 200-byte state - lanes2Bytes(lanes, state); + state[0] = a0; state[1] = a1; state[2] = a2; state[3] = a3; state[4] = a4; + state[5] = a5; state[6] = a6; state[7] = a7; state[8] = a8; state[9] = a9; + state[10] = a10; state[11] = a11; state[12] = a12; state[13] = a13; state[14] = a14; + state[15] = a15; state[16] = a16; state[17] = a17; state[18] = a18; state[19] = a19; + state[20] = a20; state[21] = a21; state[22] = a22; state[23] = a23; state[24] = a24; } public Object clone() throws CloneNotSupportedException { SHA3 copy = (SHA3) super.clone(); copy.state = copy.state.clone(); - copy.lanes = new long[DM*DM]; return copy; }