8296507: GCM using more memory than necessary with in-place operations

Reviewed-by: jnimeh
This commit is contained in:
Anthony Scarpino 2022-12-06 21:37:12 +00:00
parent cd2182a996
commit b4da0ee706

View File

@ -576,36 +576,58 @@ abstract class GaloisCounterMode extends CipherSpi {
return j0; return j0;
} }
// Wrapper function around AES-GCM interleaved intrinsic that splits /**
// large chunks of data into 1MB sized chunks. This is to place * Wrapper function around Combined AES-GCM intrinsic method that splits
// an upper limit on the number of blocks encrypted in the intrinsic. * large chunks of data into 1MB sized chunks. This is to place
* an upper limit on the number of blocks encrypted in the intrinsic.
*
* The combined intrinsic is not used when decrypting in-place heap
* bytebuffers because 'ct' will be the same as 'in' and overwritten by
* GCTR before GHASH calculates the encrypted tag.
*/
private static int implGCMCrypt(byte[] in, int inOfs, int inLen, byte[] ct, private static int implGCMCrypt(byte[] in, int inOfs, int inLen, byte[] ct,
int ctOfs, byte[] out, int outOfs, int ctOfs, byte[] out, int outOfs,
GCTR gctr, GHASH ghash) { GCTR gctr, GHASH ghash) {
int len = 0; int len = 0;
if (inLen > SPLIT_LEN) { // Loop if input length is greater than the SPLIT_LEN
if (inLen > SPLIT_LEN && ct != null) {
int partlen;
while (inLen >= SPLIT_LEN) { while (inLen >= SPLIT_LEN) {
int partlen = implGCMCrypt0(in, inOfs + len, SPLIT_LEN, ct, partlen = implGCMCrypt0(in, inOfs + len, SPLIT_LEN, ct,
ctOfs + len, out, outOfs + len, gctr, ghash); ctOfs + len, out, outOfs + len, gctr, ghash);
len += partlen; len += partlen;
inLen -= partlen; inLen -= partlen;
} }
} }
// Finish any remaining data
if (inLen > 0) { if (inLen > 0) {
len += implGCMCrypt0(in, inOfs + len, inLen, ct, if (ct == null) {
ctOfs + len, out, outOfs + len, gctr, ghash); ghash.update(in, inOfs + len, inLen);
len += gctr.update(in, inOfs + len, inLen, out, outOfs);
} else {
len += implGCMCrypt0(in, inOfs + len, inLen, ct,
ctOfs + len, out, outOfs + len, gctr, ghash);
}
} }
return len; return len;
} }
/** /**
* Intrinsic for Vector AES Galois Counter Mode implementation. * Intrinsic for the combined AES Galois Counter Mode implementation.
* AES and GHASH operations are interleaved in the intrinsic implementation. * AES and GHASH operations are combined in the intrinsic implementation.
* return - number of processed bytes
* *
* Requires 768 bytes (48 AES blocks) to efficiently use the intrinsic. * Requires 768 bytes (48 AES blocks) to efficiently use the intrinsic.
* inLen that is less than 768 size block sizes, before or after this * inLen that is less than 768 size block sizes, before or after this
* intrinsic is used, will be done by the calling method * intrinsic is used, will be done by the calling method
*
* Note:
* Only Intel processors with AVX512 that support vaes, vpclmulqdq,
* avx512dq, and avx512vl trigger this intrinsic.
* Other processors will always use GHASH and GCTR which may have their own
* intrinsic support
*
* @param in input buffer * @param in input buffer
* @param inOfs input offset * @param inOfs input offset
* @param inLen input length * @param inLen input length
@ -614,7 +636,7 @@ abstract class GaloisCounterMode extends CipherSpi {
* @param out output buffer * @param out output buffer
* @param outOfs output offset * @param outOfs output offset
* @param gctr object for the GCTR operation * @param gctr object for the GCTR operation
* @param ghash object for the ghash operation * @param ghash object for the GHASH operation
* @return number of processed bytes * @return number of processed bytes
*/ */
@IntrinsicCandidate @IntrinsicCandidate
@ -670,6 +692,11 @@ abstract class GaloisCounterMode extends CipherSpi {
byte[] originalOut = null; byte[] originalOut = null;
int originalOutOfs = 0; int originalOutOfs = 0;
// True if ops is an in-place array decryption with the offset between
// input & output the same or the input greater. This is to
// avoid the AVX512 intrinsic.
boolean inPlaceArray = false;
GCMEngine(SymmetricCipher blockCipher) { GCMEngine(SymmetricCipher blockCipher) {
blockSize = blockCipher.getBlockSize(); blockSize = blockCipher.getBlockSize();
byte[] subkeyH = new byte[blockSize]; byte[] subkeyH = new byte[blockSize];
@ -736,7 +763,8 @@ abstract class GaloisCounterMode extends CipherSpi {
ByteBuffer ct = (encryption ? dst : src); ByteBuffer ct = (encryption ? dst : src);
len = GaloisCounterMode.implGCMCrypt(src.array(), len = GaloisCounterMode.implGCMCrypt(src.array(),
src.arrayOffset() + src.position(), srcLen, src.arrayOffset() + src.position(), srcLen,
ct.array(), ct.arrayOffset() + ct.position(), inPlaceArray ? null : ct.array(),
ct.arrayOffset() + ct.position(),
dst.array(), dst.arrayOffset() + dst.position(), dst.array(), dst.arrayOffset() + dst.position(),
gctr, ghash); gctr, ghash);
src.position(src.position() + len); src.position(src.position() + len);
@ -948,10 +976,13 @@ abstract class GaloisCounterMode extends CipherSpi {
// from the passed object. That gives up the true offset from // from the passed object. That gives up the true offset from
// the base address. As long as the src side is >= the dst // the base address. As long as the src side is >= the dst
// side, we are not in overlap. // side, we are not in overlap.
// NOTE: inPlaceArray does not apply here as direct buffers run
// through a byte[] to get to the combined intrinsic
if (((DirectBuffer) src).address() - srcaddr + src.position() >= if (((DirectBuffer) src).address() - srcaddr + src.position() >=
((DirectBuffer) dst).address() - dstaddr + dst.position()) { ((DirectBuffer) dst).address() - dstaddr + dst.position()) {
return dst; return dst;
} }
} else if (!src.isDirect() && !dst.isDirect()) { } else if (!src.isDirect() && !dst.isDirect()) {
// if src is read only, then we need a copy // if src is read only, then we need a copy
if (!src.isReadOnly()) { if (!src.isReadOnly()) {
@ -964,10 +995,12 @@ abstract class GaloisCounterMode extends CipherSpi {
// from the underlying byte[] address. // from the underlying byte[] address.
// If during encryption and the input offset is behind or // If during encryption and the input offset is behind or
// the same as the output offset, the same buffer can be // the same as the output offset, the same buffer can be
// used. But during decryption always create a new // used.
// buffer in case of a bad auth tag. // Set 'inPlaceArray' true for decryption operations to
if (encryption && src.position() + src.arrayOffset() >= // avoid the AVX512 combined intrinsic
if (src.position() + src.arrayOffset() >=
dst.position() + dst.arrayOffset()) { dst.position() + dst.arrayOffset()) {
inPlaceArray = (!encryption);
return dst; return dst;
} }
} }
@ -989,7 +1022,7 @@ abstract class GaloisCounterMode extends CipherSpi {
} }
/** /**
* This is used for both overlap detection for the data or decryption * This is used for both overlap detection for the data or decryption
* during in-place crypto, so to not overwrite the input if the auth tag * during in-place crypto, so to not overwrite the input if the auth tag
* is invalid. * is invalid.
* *
@ -997,10 +1030,13 @@ abstract class GaloisCounterMode extends CipherSpi {
* allocated because for code simplicity. * allocated because for code simplicity.
*/ */
byte[] overlapDetection(byte[] in, int inOfs, byte[] out, int outOfs) { byte[] overlapDetection(byte[] in, int inOfs, byte[] out, int outOfs) {
if (in == out && (!encryption || inOfs < outOfs)) { if (in == out) {
originalOut = out; if (inOfs < outOfs) {
originalOutOfs = outOfs; originalOut = out;
return new byte[out.length]; originalOutOfs = outOfs;
return new byte[out.length];
}
inPlaceArray = (!encryption);
} }
return out; return out;
} }
@ -1501,8 +1537,11 @@ abstract class GaloisCounterMode extends CipherSpi {
} }
if (mismatch != 0) { if (mismatch != 0) {
// Clear output data // If this is an in-place array, don't zero the input
Arrays.fill(out, outOfs, outOfs + len, (byte) 0); if (!inPlaceArray) {
// Clear output data
Arrays.fill(out, outOfs, outOfs + len, (byte) 0);
}
throw new AEADBadTagException("Tag mismatch"); throw new AEADBadTagException("Tag mismatch");
} }
@ -1586,16 +1625,21 @@ abstract class GaloisCounterMode extends CipherSpi {
if (mismatch != 0) { if (mismatch != 0) {
// Clear output data // Clear output data
dst.reset(); dst.reset();
if (dst.hasArray()) { // If this is an in-place array, don't zero the src
int ofs = dst.arrayOffset() + dst.position(); if (!inPlaceArray) {
Arrays.fill(dst.array(), ofs , ofs + len, (byte)0); if (dst.hasArray()) {
} else { int ofs = dst.arrayOffset() + dst.position();
NIO_ACCESS.acquireSession(dst); Arrays.fill(dst.array(), ofs, ofs + len,
try { (byte) 0);
Unsafe.getUnsafe().setMemory(((DirectBuffer)dst).address(), } else {
NIO_ACCESS.acquireSession(dst);
try {
Unsafe.getUnsafe().setMemory(
((DirectBuffer)dst).address(),
len + dst.position(), (byte) 0); len + dst.position(), (byte) 0);
} finally { } finally {
NIO_ACCESS.releaseSession(dst); NIO_ACCESS.releaseSession(dst);
}
} }
} }
throw new AEADBadTagException("Tag mismatch"); throw new AEADBadTagException("Tag mismatch");
@ -1807,8 +1851,11 @@ abstract class GaloisCounterMode extends CipherSpi {
int outOfs) { int outOfs) {
int len = 0; int len = 0;
if (inLen >= PARALLEL_LEN) { if (inLen >= PARALLEL_LEN) {
len += implGCMCrypt(in, inOfs, inLen, in, inOfs, out, outOfs, // Since GCMDecrypt.inPlaceArray cannot be accessed, check that
gctr, ghash); // 'in' and 'out' are the same. All other in-place situations
// have been resolved by overlapDetection()
len += implGCMCrypt(in, inOfs, inLen, (in == out ? null : in),
inOfs, out, outOfs, gctr, ghash);
} }
ghash.doFinal(in, inOfs + len, inLen - len); ghash.doFinal(in, inOfs + len, inLen - len);
return len + gctr.doFinal(in, inOfs + len, inLen - len, out, return len + gctr.doFinal(in, inOfs + len, inLen - len, out,