8292758: put support for UNSIGNED5 format into its own header file
Reviewed-by: dlong, coleenp
This commit is contained in:
parent
6677227301
commit
8d3399bf5f
@ -27,36 +27,45 @@
|
||||
#include "utilities/ostream.hpp"
|
||||
#include "utilities/moveBits.hpp"
|
||||
|
||||
// 32-bit self-inverse encoding of float bits
|
||||
// converts trailing zeroes (common in floats) to leading zeroes
|
||||
inline juint CompressedStream::reverse_int(juint i) {
|
||||
return reverse_bits(i);
|
||||
}
|
||||
|
||||
jint CompressedReadStream::read_signed_int() {
|
||||
return decode_sign(read_int());
|
||||
return UNSIGNED5::decode_sign(read_int());
|
||||
}
|
||||
|
||||
// Compressing floats is simple, because the only common pattern
|
||||
// is trailing zeroes. (Compare leading sign bits on ints.)
|
||||
// Since floats are left-justified, as opposed to right-justified
|
||||
// ints, we can bit-reverse them in order to take advantage of int
|
||||
// compression.
|
||||
|
||||
// compression. Since bit reversal converts trailing zeroes to
|
||||
// leading zeroes, effect is better compression of those common
|
||||
// 32-bit float values, such as integers or integers divided by
|
||||
// powers of two, that have many trailing zeroes.
|
||||
jfloat CompressedReadStream::read_float() {
|
||||
int rf = read_int();
|
||||
int f = reverse_int(rf);
|
||||
int f = reverse_bits(rf);
|
||||
return jfloat_cast(f);
|
||||
}
|
||||
|
||||
// The treatment of doubles is similar. We could bit-reverse each
|
||||
// entire 64-bit word, but it is almost as effective to bit-reverse
|
||||
// the individual halves. Since we are going to encode them
|
||||
// separately as 32-bit halves anyway, it seems slightly simpler
|
||||
// to reverse after splitting, and when reading reverse each
|
||||
// half before joining them together.
|
||||
jdouble CompressedReadStream::read_double() {
|
||||
jint rh = read_int();
|
||||
jint rl = read_int();
|
||||
jint h = reverse_int(rh);
|
||||
jint l = reverse_int(rl);
|
||||
jint h = reverse_bits(rh);
|
||||
jint l = reverse_bits(rl);
|
||||
return jdouble_cast(jlong_from(h, l));
|
||||
}
|
||||
|
||||
// A 64-bit long is encoded into distinct 32-bit halves. This saves
|
||||
// us from having to define a 64-bit encoding and is almost as
|
||||
// effective. A modified LEB128 could encode longs into 9 bytes, and
|
||||
// this technique maxes out at 10 bytes, so, if we didn't mind the
|
||||
// extra complexity of another coding system, we could process 64-bit
|
||||
// values as single units. But, the complexity does not seem
|
||||
// worthwhile.
|
||||
jlong CompressedReadStream::read_long() {
|
||||
jint low = read_signed_int();
|
||||
jint high = read_signed_int();
|
||||
@ -70,26 +79,31 @@ CompressedWriteStream::CompressedWriteStream(int initial_size) : CompressedStrea
|
||||
}
|
||||
|
||||
void CompressedWriteStream::grow() {
|
||||
u_char* _new_buffer = NEW_RESOURCE_ARRAY(u_char, _size * 2);
|
||||
int nsize = _size * 2;
|
||||
const int min_expansion = UNSIGNED5::MAX_LENGTH;
|
||||
if (nsize < min_expansion*2) {
|
||||
nsize = min_expansion*2;
|
||||
}
|
||||
u_char* _new_buffer = NEW_RESOURCE_ARRAY(u_char, nsize);
|
||||
memcpy(_new_buffer, _buffer, _position);
|
||||
_buffer = _new_buffer;
|
||||
_size = _size * 2;
|
||||
_size = nsize;
|
||||
}
|
||||
|
||||
void CompressedWriteStream::write_float(jfloat value) {
|
||||
juint f = jint_cast(value);
|
||||
juint rf = reverse_int(f);
|
||||
assert(f == reverse_int(rf), "can re-read same bits");
|
||||
juint rf = reverse_bits(f);
|
||||
assert(f == reverse_bits(rf), "can re-read same bits");
|
||||
write_int(rf);
|
||||
}
|
||||
|
||||
void CompressedWriteStream::write_double(jdouble value) {
|
||||
juint h = high(jlong_cast(value));
|
||||
juint l = low( jlong_cast(value));
|
||||
juint rh = reverse_int(h);
|
||||
juint rl = reverse_int(l);
|
||||
assert(h == reverse_int(rh), "can re-read same bits");
|
||||
assert(l == reverse_int(rl), "can re-read same bits");
|
||||
juint rh = reverse_bits(h);
|
||||
juint rl = reverse_bits(l);
|
||||
assert(h == reverse_bits(rh), "can re-read same bits");
|
||||
assert(l == reverse_bits(rl), "can re-read same bits");
|
||||
write_int(rh);
|
||||
write_int(rl);
|
||||
}
|
||||
|
@ -26,6 +26,7 @@
|
||||
#define SHARE_CODE_COMPRESSEDSTREAM_HPP
|
||||
|
||||
#include "memory/allocation.hpp"
|
||||
#include "utilities/unsigned5.hpp"
|
||||
|
||||
// Simple interface for filing out and filing in basic types
|
||||
// Used for writing out and reading in debugging information.
|
||||
@ -36,18 +37,6 @@ class CompressedStream : public ResourceObj {
|
||||
u_char* _buffer;
|
||||
int _position;
|
||||
|
||||
enum {
|
||||
// Constants for UNSIGNED5 coding of Pack200
|
||||
lg_H = 6, H = 1<<lg_H, // number of high codes (64)
|
||||
L = (1<<BitsPerByte)-H, // number of low codes (192)
|
||||
MAX_i = 4 // bytes are numbered in (0..4), max 5 bytes
|
||||
};
|
||||
|
||||
// 32-bit one-to-one sign encoding taken from Pack200
|
||||
// converts leading sign bits into leading zeroes with trailing sign bit
|
||||
static juint encode_sign(jint value) { return (value << 1) ^ (value >> 31); }
|
||||
static jint decode_sign(juint value) { return (value >> 1) ^ -(jint)(value & 1); }
|
||||
static juint reverse_int(juint i); // to trim trailing float 0's
|
||||
public:
|
||||
CompressedStream(u_char* buffer, int position = 0) {
|
||||
_buffer = buffer;
|
||||
@ -66,41 +55,6 @@ class CompressedReadStream : public CompressedStream {
|
||||
private:
|
||||
inline u_char read() { return _buffer[_position++]; }
|
||||
|
||||
// This encoding, called UNSIGNED5, is taken from J2SE Pack200.
|
||||
// It assumes that most values have lots of leading zeroes.
|
||||
// Very small values, in the range [0..191], code in one byte.
|
||||
// Any 32-bit value (including negatives) can be coded, in
|
||||
// up to five bytes. The grammar is:
|
||||
// low_byte = [0..191]
|
||||
// high_byte = [192..255]
|
||||
// any_byte = low_byte | high_byte
|
||||
// coding = low_byte
|
||||
// | high_byte low_byte
|
||||
// | high_byte high_byte low_byte
|
||||
// | high_byte high_byte high_byte low_byte
|
||||
// | high_byte high_byte high_byte high_byte any_byte
|
||||
// Each high_byte contributes six bits of payload.
|
||||
// The encoding is one-to-one (except for integer overflow)
|
||||
// and easy to parse and unparse.
|
||||
|
||||
jint read_int_mb(jint b0) {
|
||||
int pos = position() - 1;
|
||||
u_char* buf = buffer() + pos;
|
||||
assert(buf[0] == b0 && b0 >= L, "correctly called");
|
||||
jint sum = b0;
|
||||
// must collect more bytes: b[1]...b[4]
|
||||
int lg_H_i = lg_H;
|
||||
for (int i = 0; ; ) {
|
||||
jint b_i = buf[++i]; // b_i = read(); ++i;
|
||||
sum += b_i << lg_H_i; // sum += b[i]*(64**i)
|
||||
if (b_i < L || i == MAX_i) {
|
||||
set_position(pos+i+1);
|
||||
return sum;
|
||||
}
|
||||
lg_H_i += lg_H;
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
CompressedReadStream(u_char* buffer, int position = 0)
|
||||
: CompressedStream(buffer, position) {}
|
||||
@ -109,14 +63,14 @@ class CompressedReadStream : public CompressedStream {
|
||||
jbyte read_byte() { return (jbyte ) read(); }
|
||||
jchar read_char() { return (jchar ) read_int(); }
|
||||
jshort read_short() { return (jshort ) read_signed_int(); }
|
||||
jint read_int() { jint b0 = read();
|
||||
if (b0 < L) return b0;
|
||||
else return read_int_mb(b0);
|
||||
}
|
||||
jint read_signed_int();
|
||||
jfloat read_float(); // jfloat_cast(reverse_int(read_int()))
|
||||
jdouble read_double(); // jdouble_cast(2*reverse_int(read_int))
|
||||
jfloat read_float(); // jfloat_cast(reverse_bits(read_int()))
|
||||
jdouble read_double(); // jdouble_cast(2*reverse_bits(read_int))
|
||||
jlong read_long(); // jlong_from(2*read_signed_int())
|
||||
|
||||
jint read_int() {
|
||||
return UNSIGNED5::read_uint(_buffer, _position, 0);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -134,23 +88,6 @@ class CompressedWriteStream : public CompressedStream {
|
||||
}
|
||||
void grow();
|
||||
|
||||
// UNSIGNED5 coding, 1-5 byte cases
|
||||
void write_int_mb(jint value) {
|
||||
juint sum = value;
|
||||
for (int i = 0; ; ) {
|
||||
if (sum < L || i == MAX_i) {
|
||||
// remainder is either a "low code" or the 5th byte
|
||||
assert(sum == (u_char)sum, "valid byte");
|
||||
write((u_char)sum);
|
||||
break;
|
||||
}
|
||||
sum -= L;
|
||||
int b_i = L + (sum % H); // this is a "high code"
|
||||
sum >>= lg_H; // extracted 6 bits
|
||||
write(b_i); ++i;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
int _size;
|
||||
|
||||
@ -163,13 +100,15 @@ class CompressedWriteStream : public CompressedStream {
|
||||
void write_byte(jbyte value) { write(value); }
|
||||
void write_char(jchar value) { write_int(value); }
|
||||
void write_short(jshort value) { write_signed_int(value); }
|
||||
void write_int(jint value) { if ((juint)value < L && !full())
|
||||
store((u_char)value);
|
||||
else write_int_mb(value); }
|
||||
void write_signed_int(jint value) { write_int(encode_sign(value)); }
|
||||
void write_float(jfloat value); // write_int(reverse_int(jint_cast(v)))
|
||||
void write_double(jdouble value); // write_int(reverse_int(<low,high>))
|
||||
void write_signed_int(jint value) { write_int(UNSIGNED5::encode_sign(value)); }
|
||||
void write_float(jfloat value); // write_int(reverse_bits(jint_cast(v)))
|
||||
void write_double(jdouble value); // write_int(reverse_bits(<low,high>))
|
||||
void write_long(jlong value); // write_signed_int(<low,high>)
|
||||
|
||||
void write_int(juint value) {
|
||||
UNSIGNED5::write_uint_grow(value, _buffer, _position, _size,
|
||||
[&](int){ grow(); });
|
||||
}
|
||||
};
|
||||
|
||||
#endif // SHARE_CODE_COMPRESSEDSTREAM_HPP
|
||||
|
@ -61,6 +61,7 @@
|
||||
#include "utilities/formatBuffer.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
#include "utilities/unsigned5.hpp"
|
||||
#include "utilities/vmError.hpp"
|
||||
|
||||
#include <stdio.h>
|
||||
@ -648,6 +649,37 @@ extern "C" JNIEXPORT void findbcp(intptr_t method, intptr_t bcp) {
|
||||
}
|
||||
}
|
||||
|
||||
// check and decode a single u5 value
|
||||
extern "C" JNIEXPORT u4 u5decode(intptr_t addr) {
|
||||
Command c("u5decode");
|
||||
u1* arr = (u1*)addr;
|
||||
size_t off = 0, lim = 5;
|
||||
if (!UNSIGNED5::check_length(arr, off, lim)) {
|
||||
return 0;
|
||||
}
|
||||
return UNSIGNED5::read_uint(arr, off, lim);
|
||||
}
|
||||
|
||||
// Sets up a Reader from addr/limit and prints count items.
|
||||
// A limit of zero means no set limit; stop at the first null
|
||||
// or after count items are printed.
|
||||
// A count of zero or less is converted to -1, which means
|
||||
// there is no limit on the count of items printed; the
|
||||
// printing stops when an null is printed or at limit.
|
||||
// See documentation for UNSIGNED5::Reader::print(count).
|
||||
extern "C" JNIEXPORT intptr_t u5p(intptr_t addr,
|
||||
intptr_t limit,
|
||||
int count) {
|
||||
Command c("u5p");
|
||||
u1* arr = (u1*)addr;
|
||||
if (limit && limit < addr) limit = addr;
|
||||
size_t lim = !limit ? 0 : (limit - addr);
|
||||
size_t endpos = UNSIGNED5::print_count(count > 0 ? count : -1,
|
||||
arr, (size_t)0, lim);
|
||||
return addr + endpos;
|
||||
}
|
||||
|
||||
|
||||
// int versions of all methods to avoid having to type type casts in the debugger
|
||||
|
||||
void pp(intptr_t p) { pp((void*)p); }
|
||||
|
83
src/hotspot/share/utilities/unsigned5.cpp
Normal file
83
src/hotspot/share/utilities/unsigned5.cpp
Normal file
@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "memory/allocation.hpp"
|
||||
#include "utilities/unsigned5.hpp"
|
||||
|
||||
// Most of UNSIGNED5 is in the header file.
|
||||
// Let's put a few debug functions out-of-line here.
|
||||
|
||||
// For the record, UNSIGNED5 was defined around 2001 and was first
|
||||
// published in the initial Pack200 spec. See:
|
||||
// https://docs.oracle.com/en/java/javase/11/docs/specs/pack-spec.html
|
||||
// in Section 6.1, "Encoding of Small Whole Numbers".
|
||||
|
||||
PRAGMA_DIAG_PUSH
|
||||
PRAGMA_FORMAT_NONLITERAL_IGNORED
|
||||
|
||||
// For debugging, even in product builds (see debug.cpp).
|
||||
template<typename ARR, typename OFF, typename GET>
|
||||
void UNSIGNED5::Reader<ARR,OFF,GET>::
|
||||
print_on(outputStream* st, int count,
|
||||
const char* left, // "U5: ["
|
||||
const char* right // "] (values=%d/length=%d)\n"
|
||||
) {
|
||||
if (left == NULL) left = "U5: [";
|
||||
if (right == NULL) right = "] (values=%d/length=%d)\n";
|
||||
int printed = 0;
|
||||
st->print("%s", left);
|
||||
for (;;) {
|
||||
if (count >= 0 && printed >= count) break;
|
||||
if (!has_next()) {
|
||||
if ((_limit == 0 || _position < _limit) && _array[_position] == 0) {
|
||||
st->print(" null");
|
||||
++_position; // skip null byte
|
||||
++printed;
|
||||
if (_limit != 0) continue; // keep going to explicit limit
|
||||
}
|
||||
break;
|
||||
}
|
||||
u4 value = next_uint();
|
||||
if (printed == 0)
|
||||
st->print("%d", value);
|
||||
else
|
||||
st->print(" %d", value);
|
||||
++printed;
|
||||
}
|
||||
st->print(right,
|
||||
// these arguments may or may not be used in the format string:
|
||||
printed,
|
||||
(int)_position);
|
||||
}
|
||||
|
||||
PRAGMA_DIAG_POP
|
||||
|
||||
// Explicit instantiation for supported types.
|
||||
template void UNSIGNED5::Reader<char*,int>::
|
||||
print_on(outputStream* st, int count, const char* left, const char* right);
|
||||
template void UNSIGNED5::Reader<u1*,int>::
|
||||
print_on(outputStream* st, int count, const char* left, const char* right);
|
||||
template void UNSIGNED5::Reader<address,size_t>::
|
||||
print_on(outputStream* st, int count, const char* left, const char* right);
|
419
src/hotspot/share/utilities/unsigned5.hpp
Normal file
419
src/hotspot/share/utilities/unsigned5.hpp
Normal file
@ -0,0 +1,419 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef SHARE_UTILITIES_UNSIGNED5_HPP
|
||||
#define SHARE_UTILITIES_UNSIGNED5_HPP
|
||||
|
||||
#include "memory/allStatic.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
#include "utilities/ostream.hpp"
|
||||
|
||||
// Low-level interface for [de-]coding compressed uint32_t (u4) values.
|
||||
|
||||
// A uint32_t value (32-bit unsigned int) can be encoded very quickly into
|
||||
// one to five bytes, and decoded back again, again very quickly.
|
||||
// This is useful for storing data, like offsets or access flags, that
|
||||
// is usually simple (fits in fewer bytes usually) but sometimes has
|
||||
// to be complicated (uses all five bytes when necessary).
|
||||
|
||||
// Notable features:
|
||||
// - represents all 32-bit uint32_t values
|
||||
// - never reads or writes beyond 5 bytes
|
||||
// - values up to 0xBE (0x307E/0xC207E/0x308207F) code in 1 byte (2/3/4 bytes)
|
||||
// - longer encodings are always of larger values (length grows monotonically)
|
||||
// - encodings are little-endian numerals in a modifed base-64 system
|
||||
// - "negatives" ((u4)-1) need 5 bytes (but see also UNSIGNED5::encode_sign)
|
||||
// - different encodings decode to different values (excepting overflow)
|
||||
// - zero bytes are *never* used, so it interoperates with null termination
|
||||
// - the algorithms are templates and cooperate well with your own types
|
||||
// - one writer algorithm can grow your resizable buffer on the fly
|
||||
|
||||
// The encoding, taken from J2SE Pack200, is called UNSIGNED5.
|
||||
// It expects the uint32_t values you give it will have many leading zeroes.
|
||||
//
|
||||
// More details:
|
||||
// Very small values, in the range [0..190], code in one byte.
|
||||
// Any 32-bit value (including negatives) can be coded, in
|
||||
// up to five bytes. The grammar is:
|
||||
// low_byte = [1..191]
|
||||
// high_byte = [192..255]
|
||||
// any_byte = low_byte | high_byte
|
||||
// coding = low_byte
|
||||
// | high_byte low_byte
|
||||
// | high_byte high_byte low_byte
|
||||
// | high_byte high_byte high_byte low_byte
|
||||
// | high_byte high_byte high_byte high_byte any_byte
|
||||
// Each high_byte contributes six bits of payload.
|
||||
// The encoding is one-to-one (except for integer overflow)
|
||||
// and easy to parse and unparse. Longer sequences always
|
||||
// decode to larger numbers. Sequences of the same length
|
||||
// compares as little-endian numerals decode to numbers which
|
||||
// are ordered in the same sense as those numerals.
|
||||
|
||||
// Parsing (reading) consists of doing a limit test to see if the byte
|
||||
// is a low-byte or a high-byte, and also unconditionally adding the
|
||||
// digit value of the byte, multiplied by its 64-bit place value, to
|
||||
// an accumulator. The accumulator is returned after either 5 bytes
|
||||
// are seen, or the first low-byte is seen. Oddly enough, this is
|
||||
// enough to create a dense var-int format, which is why it was
|
||||
// adopted for Pack200. By comparison, the more common LEB128 format
|
||||
// is less dense (for many typical workloads) and does not guarantee a
|
||||
// length limit.
|
||||
|
||||
class UNSIGNED5 : AllStatic {
|
||||
private:
|
||||
// Math constants for the modified UNSIGNED5 coding of Pack200
|
||||
static const int lg_H = 6; // log-base-2 of H (lg 64 == 6)
|
||||
static const int H = 1<<lg_H; // number of "high" bytes (64)
|
||||
static const int X = 1 ; // there is one excluded byte ('\0')
|
||||
static const int MAX_b = (1<<BitsPerByte)-1; // largest byte value
|
||||
static const int L = (MAX_b+1)-X-H; // number of "low" bytes (191)
|
||||
|
||||
public:
|
||||
static const int MAX_LENGTH = 5; // lengths are in [1..5]
|
||||
static const uint32_t MAX_VALUE = (uint32_t)-1; // 2^^32-1
|
||||
|
||||
// The default method for reading and writing bytes is simply
|
||||
// b=a[i] and a[i]=b, as defined by this helpful functor.
|
||||
template<typename ARR, typename OFF>
|
||||
struct ArrayGetSet {
|
||||
uint8_t operator()(ARR a, OFF i) const { return a[i]; };
|
||||
void operator()(ARR a, OFF i, uint8_t b) const { a[i] = b; };
|
||||
// So, an expression ArrayGetSet() acts like these lambdas:
|
||||
//auto get = [&](ARR a, OFF i){ return a[i]; };
|
||||
//auto set = [&](ARR a, OFF i, uint8_t x){ a[i] = x; };
|
||||
};
|
||||
|
||||
// decode a single unsigned 32-bit int from an array-like base address
|
||||
// returns the decoded value, updates offset_rw
|
||||
// that is, offset_rw is both read and written
|
||||
// warning: caller must ensure there is at least one byte available
|
||||
// the limit is either zero meaning no limit check, or an exclusive offset
|
||||
// in PRODUCT builds, limit is ignored
|
||||
template<typename ARR, typename OFF, typename GET = ArrayGetSet<ARR,OFF>>
|
||||
static uint32_t read_uint(ARR array, OFF& offset_rw, OFF limit, GET get = GET()) {
|
||||
const OFF pos = offset_rw;
|
||||
STATIC_ASSERT(sizeof(get(array, pos)) == 1); // must be a byte-getter
|
||||
const uint32_t b_0 = (uint8_t) get(array, pos); //b_0 = a[0]
|
||||
assert(b_0 >= X, "avoid excluded bytes");
|
||||
uint32_t sum = b_0 - X;
|
||||
if (sum < L) { // common case
|
||||
offset_rw = pos + 1;
|
||||
return sum;
|
||||
}
|
||||
// must collect more bytes: b[1]...b[4]
|
||||
int lg_H_i = lg_H; // lg(H)*i == lg(H^^i)
|
||||
for (int i = 1; ; i++) { // for i in [1..4]
|
||||
assert(limit == 0 || pos + i < limit, "oob");
|
||||
const uint32_t b_i = (uint8_t) get(array, pos + i); //b_i = a[i]
|
||||
assert(b_i >= X, "avoid excluded bytes");
|
||||
sum += (b_i - X) << lg_H_i; // sum += (b[i]-X)*(64^^i)
|
||||
if (b_i < X+L || i == MAX_LENGTH-1) {
|
||||
offset_rw = pos + i + 1;
|
||||
return sum;
|
||||
}
|
||||
lg_H_i += lg_H;
|
||||
}
|
||||
}
|
||||
|
||||
// encode a single unsigned 32-bit int into an array-like span
|
||||
// offset_rw is both read and written
|
||||
// the limit is either zero meaning no limit check, or an exclusive offset
|
||||
// warning: caller must ensure there is available space
|
||||
template<typename ARR, typename OFF, typename SET = ArrayGetSet<ARR,OFF>>
|
||||
static void write_uint(uint32_t value, ARR array, OFF& offset_rw, OFF limit, SET set = SET()) {
|
||||
const OFF pos = offset_rw;
|
||||
if (value < L) {
|
||||
const uint32_t b_0 = X + value;
|
||||
assert(b_0 == (uint8_t)b_0, "valid byte");
|
||||
set(array, pos, (uint8_t)b_0); //a[0] = b_0
|
||||
offset_rw = pos + 1;
|
||||
return;
|
||||
}
|
||||
uint32_t sum = value;
|
||||
for (int i = 0; ; i++) { // for i in [0..4]
|
||||
if (sum < L || i == MAX_LENGTH-1) {
|
||||
// remainder is either a "low code" or the 5th byte
|
||||
uint32_t b_i = X + sum;
|
||||
assert(b_i == (uint8_t)b_i, "valid byte");
|
||||
set(array, pos + i, (uint8_t)b_i); //a[i] = b_i
|
||||
offset_rw = pos + i + 1;
|
||||
return;
|
||||
}
|
||||
sum -= L;
|
||||
uint32_t b_i = X + L + (sum % H); // this is a "high code"
|
||||
assert(b_i == (uint8_t)b_i, "valid byte");
|
||||
set(array, pos + i, (uint8_t)b_i); //a[i] = b_i
|
||||
sum >>= lg_H; // extracted 6 bits
|
||||
}
|
||||
}
|
||||
|
||||
// returns the encoded byte length of an unsigned 32-bit int
|
||||
static constexpr int encoded_length(uint32_t value) {
|
||||
// model the reading of [0..5] high-bytes, followed possibly by a low-byte
|
||||
// Be careful: the constexpr magic evaporates if undefined behavior
|
||||
// results from any of these expressions. Beware of signed overflow!
|
||||
uint32_t sum = 0;
|
||||
uint32_t lg_H_i = 0;
|
||||
for (uint32_t i = 0; ; i++) { // for i in [1..4]
|
||||
if (value <= sum + ((L-1) << lg_H_i) || i == MAX_LENGTH-1) {
|
||||
return i + 1; // stopping at byte i implies length is i+1
|
||||
}
|
||||
sum += (MAX_b - X) << lg_H_i;
|
||||
lg_H_i += lg_H;
|
||||
}
|
||||
}
|
||||
|
||||
// reports the largest uint32_t value that can be encoded using len bytes
|
||||
// len must be in the range [1..5]
|
||||
static constexpr uint32_t max_encoded_in_length(uint32_t len) {
|
||||
assert(len >= 1 && len <= MAX_LENGTH, "invalid length");
|
||||
if (len >= MAX_LENGTH) return MAX_VALUE; // largest non-overflow value
|
||||
// Be careful: the constexpr magic evaporates if undefined behavior
|
||||
// results from any of these expressions. Beware of signed overflow!
|
||||
uint32_t all_combinations = 0;
|
||||
uint32_t combinations_i = L; // L * H^i
|
||||
for (uint32_t i = 0; i < len; i++) {
|
||||
// count combinations of <H*L> that end at byte i
|
||||
all_combinations += combinations_i;
|
||||
combinations_i <<= lg_H;
|
||||
}
|
||||
return all_combinations - 1;
|
||||
}
|
||||
|
||||
// tells if a value, when encoded, would fit between the offset and limit
|
||||
template<typename OFF>
|
||||
static constexpr bool fits_in_limit(uint32_t value, OFF offset, OFF limit) {
|
||||
assert(limit != 0, "");
|
||||
return (offset + MAX_LENGTH <= limit ||
|
||||
offset + encoded_length(value) <= limit);
|
||||
}
|
||||
|
||||
// parses one encoded value for correctness and returns the size,
|
||||
// or else returns zero if there is a problem (bad limit or excluded byte)
|
||||
// the limit is either zero meaning no limit check, or an exclusive offset
|
||||
template<typename ARR, typename OFF, typename GET = ArrayGetSet<ARR,OFF>>
|
||||
static int check_length(ARR array, OFF offset, OFF limit = 0,
|
||||
GET get = GET()) {
|
||||
const OFF pos = offset;
|
||||
STATIC_ASSERT(sizeof(get(array, pos)) == 1); // must be a byte-getter
|
||||
const uint32_t b_0 = (uint8_t) get(array, pos); //b_0 = a[0]
|
||||
if (b_0 < X+L) {
|
||||
return (b_0 < X) ? 0 : 1;
|
||||
}
|
||||
// parse more bytes: b[1]...b[4]
|
||||
for (int i = 1; ; i++) { // for i in [1..4]
|
||||
if (limit != 0 && pos + i >= limit) return 0; // limit failure
|
||||
const uint32_t b_i = (uint8_t) get(array, pos + i); //b_i = a[i]
|
||||
if (b_i < X) return 0; // excluded byte found
|
||||
if (b_i < X+L || i == MAX_LENGTH-1) {
|
||||
return i + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename ARR, typename OFF, typename GFN,
|
||||
typename SET = ArrayGetSet<ARR,OFF>>
|
||||
static void write_uint_grow(uint32_t value,
|
||||
ARR& array, OFF& offset, OFF& limit,
|
||||
GFN grow, SET set = SET()) {
|
||||
assert(limit != 0, "limit required");
|
||||
const OFF pos = offset;
|
||||
if (!fits_in_limit(value, pos, limit)) {
|
||||
grow(MAX_LENGTH); // caller must ensure it somehow fixes array/limit span
|
||||
assert(pos + MAX_LENGTH <= limit, "should have grown");
|
||||
}
|
||||
write_uint(value, array, offset, limit, set);
|
||||
}
|
||||
|
||||
/// Handy state machines for that will help you with reading,
|
||||
/// sizing, and writing (with optional growth).
|
||||
|
||||
// Reader example use:
|
||||
// struct MyReaderHelper {
|
||||
// char operator()(char* a, int i) const { return a[i]; }
|
||||
// };
|
||||
// using MyReader = UNSIGNED5::Reader<char*, int, MyReaderHelper>;
|
||||
// MyReader r(array); while (r.has_next()) print(r.next_uint());
|
||||
template<typename ARR, typename OFF, typename GET = ArrayGetSet<ARR,OFF>>
|
||||
class Reader {
|
||||
const ARR _array;
|
||||
const OFF _limit;
|
||||
OFF _position;
|
||||
int next_length() {
|
||||
return UNSIGNED5::check_length(_array, _position, _limit, GET());
|
||||
}
|
||||
public:
|
||||
Reader(ARR array, OFF limit = 0)
|
||||
: _array(array), _limit(limit) { _position = 0; }
|
||||
uint32_t next_uint() {
|
||||
return UNSIGNED5::read_uint(_array, _position, _limit, GET());
|
||||
}
|
||||
bool has_next() {
|
||||
return next_length() != 0;
|
||||
}
|
||||
// tries to skip count logical entries; returns actual number skipped
|
||||
int try_skip(int count) {
|
||||
int actual = 0;
|
||||
while (actual < count && has_next()) {
|
||||
int len = next_length(); // 0 or length in [1..5]
|
||||
if (len == 0) break;
|
||||
_position += len;
|
||||
}
|
||||
return actual;
|
||||
}
|
||||
ARR array() { return _array; }
|
||||
OFF limit() { return _limit; }
|
||||
OFF position() { return _position; }
|
||||
void set_position(OFF position) { _position = position; }
|
||||
|
||||
// For debugging, even in product builds (see debug.cpp).
|
||||
// Checks and decodes a series of u5 values from the reader.
|
||||
// Sets position just after the last decoded byte or null byte.
|
||||
// If this reader has a limit, stop before that limit.
|
||||
// If this reader has no limit, stop after the first null byte.
|
||||
// In any case, if count is non-negative, print no more than
|
||||
// count items (uint32_t values or "null").
|
||||
// A negative count means we stop only at the limit or null,
|
||||
// kind of like strlen.
|
||||
void print(int count = -1) { print_on(tty, count); }
|
||||
|
||||
// The character strings are printed before and after the
|
||||
// series of values (which are separated only by spaces).
|
||||
// If they are null they default to something like "U5:[ "
|
||||
// and " ] (values=%d/length=%d)\n".
|
||||
// The %d formats are for the number of printed items and
|
||||
// their length in bytes, if you want to see that also.
|
||||
void print_on(outputStream* st, int count = -1,
|
||||
const char* left = NULL, const char* right = NULL);
|
||||
};
|
||||
|
||||
// Writer example use
|
||||
// struct MyWriterHelper {
|
||||
// char operator()(char* a, int i, char b) const { a[i] = b; }
|
||||
// };
|
||||
// using MyWriter = UNSIGNED5::Writer<char*, int, MyWriterHelper>;
|
||||
// MyWriter w(array);
|
||||
// for (auto i = ...) w.accept_uint(i);
|
||||
template<typename ARR, typename OFF, typename SET = ArrayGetSet<ARR,OFF>>
|
||||
class Writer {
|
||||
ARR& _array;
|
||||
OFF* const _limit_ptr;
|
||||
OFF _position;
|
||||
public:
|
||||
Writer(const ARR& array)
|
||||
: _array(const_cast<ARR&>(array)), _limit_ptr(NULL), _position(0) {
|
||||
// Note: if _limit_ptr is NULL, the ARR& is never reassigned,
|
||||
// because has_limit is false. So the const_cast here is safe.
|
||||
assert(!has_limit(), "this writer cannot be growable");
|
||||
}
|
||||
Writer(ARR& array, OFF& limit)
|
||||
: _array(array), _limit_ptr(&limit), _position(0) {
|
||||
// Writable array argument can be rewritten by accept_grow.
|
||||
// So we need a legitimate (non-zero) limit to work with.
|
||||
// As a result, a writer's initial buffer must not be empty.
|
||||
assert(this->limit() != 0, "limit required");
|
||||
}
|
||||
void accept_uint(uint32_t value) {
|
||||
const OFF lim = has_limit() ? limit() : 0;
|
||||
UNSIGNED5::write_uint(value, _array, _position, lim, SET());
|
||||
}
|
||||
template<typename GFN>
|
||||
void accept_grow(uint32_t value, GFN grow) {
|
||||
assert(has_limit(), "must track growing limit");
|
||||
UNSIGNED5::write_uint_grow(value, _array, _position, *_limit_ptr,
|
||||
grow, SET());
|
||||
}
|
||||
// Ensure that remaining() >= r, grow if needed. Suggested
|
||||
// expression for r is (n*MAX_LENGTH)+1, where n is the number of
|
||||
// values you are about to write.
|
||||
template<typename GFN>
|
||||
void ensure_remaining_grow(int request_remaining, GFN grow) {
|
||||
const OFF have = remaining();
|
||||
if (have < request_remaining) {
|
||||
grow(have - request_remaining); // caller must fix array/limit span
|
||||
assert(remaining() >= request_remaining, "should have grown");
|
||||
}
|
||||
}
|
||||
// use to add a terminating null or other data
|
||||
void end_byte(uint8_t extra_byte = 0) {
|
||||
SET()(_array, _position++, extra_byte);
|
||||
}
|
||||
ARR array() { return _array; }
|
||||
OFF position() { return _position; }
|
||||
void set_position(OFF position) { _position = position; }
|
||||
bool has_limit() { return _limit_ptr != NULL; }
|
||||
OFF limit() { assert(has_limit(), "needs limit"); return *_limit_ptr; }
|
||||
OFF remaining() { return limit() - position(); }
|
||||
};
|
||||
|
||||
// Sizer example use
|
||||
// UNSIGNED5::Sizer s;
|
||||
// for (auto i = ...) s.accept_uint(i);
|
||||
// printf("%d items occupying %d bytes", s.count(), s.position());
|
||||
// auto buf = new char[s.position() + 1];
|
||||
// UNSIGNED5::Writer<char*, int> w(buf);
|
||||
// for (auto i = ...) w.accept_uint(i);
|
||||
// w.add_byte();
|
||||
// assert(w.position() == s.position(), "s and w agree");
|
||||
template<typename OFF = int>
|
||||
class Sizer {
|
||||
OFF _position;
|
||||
int _count;
|
||||
public:
|
||||
Sizer() { _position = 0; _count = 0; }
|
||||
// The accept_uint() API is the same as for Writer, which allows
|
||||
// templated code to work equally well on sizers and writers.
|
||||
// This in turn makes it easier to write code which runs a
|
||||
// sizing preflight pass before actually storing the data.
|
||||
void accept_uint(uint32_t value) {
|
||||
_position += encoded_length(value);
|
||||
_count++;
|
||||
}
|
||||
OFF position() { return _position; }
|
||||
int count() { return _count; }
|
||||
};
|
||||
|
||||
// 32-bit one-to-one sign encoding taken from Pack200
|
||||
// converts leading sign bits into leading zeroes with trailing sign bit
|
||||
// use this to better compress 32-bit values that might be negative
|
||||
static uint32_t encode_sign(int32_t value) { return ((uint32_t)value << 1) ^ (value >> 31); }
|
||||
static int32_t decode_sign(uint32_t value) { return (value >> 1) ^ -(int32_t)(value & 1); }
|
||||
|
||||
template<typename ARR, typename OFF, typename GET = ArrayGetSet<ARR,OFF>>
|
||||
static OFF print(ARR array, OFF offset = 0, OFF limit = 0,
|
||||
GET get = GET()) {
|
||||
print_count(-1, array, offset, limit, get);
|
||||
}
|
||||
template<typename ARR, typename OFF, typename GET = ArrayGetSet<ARR,OFF>>
|
||||
static OFF print_count(int count,
|
||||
ARR array, OFF offset = 0, OFF limit = 0,
|
||||
GET get = GET()) {
|
||||
Reader<ARR,OFF,GET> r(array, offset);
|
||||
|
||||
r.print_on(tty, count);
|
||||
return r.position();
|
||||
}
|
||||
};
|
||||
#endif // SHARE_UTILITIES_UNSIGNED5_HPP
|
@ -24,7 +24,10 @@
|
||||
|
||||
package sun.jvm.hotspot.code;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
import sun.jvm.hotspot.debugger.*;
|
||||
import sun.jvm.hotspot.utilities.*;
|
||||
|
||||
public class CompressedReadStream extends CompressedStream {
|
||||
/** Equivalent to CompressedReadStream(buffer, 0) */
|
||||
@ -56,16 +59,6 @@ public class CompressedReadStream extends CompressedStream {
|
||||
return decodeSign(readInt());
|
||||
}
|
||||
|
||||
public int readInt() {
|
||||
int b0 = read();
|
||||
if (b0 < L) {
|
||||
return b0;
|
||||
} else {
|
||||
return readIntMb(b0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public float readFloat() {
|
||||
return Float.intBitsToFloat(reverseInt(readInt()));
|
||||
}
|
||||
@ -85,41 +78,13 @@ public class CompressedReadStream extends CompressedStream {
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
// Internals only below this point
|
||||
//
|
||||
|
||||
|
||||
// This encoding, called UNSIGNED5, is taken from J2SE Pack200.
|
||||
// It assumes that most values have lots of leading zeroes.
|
||||
// Very small values, in the range [0..191], code in one byte.
|
||||
// Any 32-bit value (including negatives) can be coded, in
|
||||
// up to five bytes. The grammar is:
|
||||
// low_byte = [0..191]
|
||||
// high_byte = [192..255]
|
||||
// any_byte = low_byte | high_byte
|
||||
// coding = low_byte
|
||||
// | high_byte low_byte
|
||||
// | high_byte high_byte low_byte
|
||||
// | high_byte high_byte high_byte low_byte
|
||||
// | high_byte high_byte high_byte high_byte any_byte
|
||||
// Each high_byte contributes six bits of payload.
|
||||
// The encoding is one-to-one (except for integer overflow)
|
||||
// and easy to parse and unparse.
|
||||
|
||||
private int readIntMb(int b0) {
|
||||
int pos = position - 1;
|
||||
int sum = b0;
|
||||
// must collect more bytes: b[1]...b[4]
|
||||
int lg_H_i = lg_H;
|
||||
for (int i = 0; ;) {
|
||||
int b_i = read(pos + (++i));
|
||||
sum += b_i << lg_H_i; // sum += b[i]*(64**i)
|
||||
if (b_i < L || i == MAX_i) {
|
||||
setPosition(pos+i+1);
|
||||
return sum;
|
||||
}
|
||||
lg_H_i += lg_H;
|
||||
}
|
||||
public int readInt() {
|
||||
// UNSIGNED5::read_uint(_buffer, &_position, limit=0)
|
||||
return (int) Unsigned5.readUint(this, position,
|
||||
// bytes are fetched here:
|
||||
CompressedReadStream::read,
|
||||
// updated position comes through here:
|
||||
CompressedReadStream::setPosition);
|
||||
}
|
||||
|
||||
private short read(int index) {
|
||||
@ -132,4 +97,22 @@ public class CompressedReadStream extends CompressedStream {
|
||||
++position;
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Dumps the stream, making an assumption that all items are encoded
|
||||
* as UNSIGNED5. The sizeLimit argument tells the dumper when to
|
||||
* stop trying to read bytes; if it is zero, the dumper goes as long
|
||||
* as it can until it encounters a null byte.
|
||||
*
|
||||
* This class mixes UNSIGNED5 with other formats. Stray bytes are
|
||||
* decoded either as "null" (0x00), one less than the byte value
|
||||
* (0x01..0xBF) or as part of a spurious multi-byte encoding.
|
||||
* Proceed with caution.
|
||||
*/
|
||||
public void dump() { dumpOn(System.out, 0); }
|
||||
public void dump(int sizeLimit) { dumpOn(System.out, sizeLimit); }
|
||||
public void dumpOn(PrintStream tty, int sizeLimit) {
|
||||
new Unsigned5(buffer, sizeLimit).dumpOn(tty, -1);
|
||||
}
|
||||
}
|
||||
|
@ -25,6 +25,7 @@
|
||||
package sun.jvm.hotspot.code;
|
||||
|
||||
import sun.jvm.hotspot.debugger.*;
|
||||
import sun.jvm.hotspot.utilities.*;
|
||||
|
||||
/** NOTE that this class takes the address of a buffer. This means
|
||||
that it can read previously-generated debug information directly
|
||||
@ -54,12 +55,6 @@ public class CompressedStream {
|
||||
public static final int LogBitsPerByte = 3;
|
||||
public static final int BitsPerByte = 1 << 3;
|
||||
|
||||
// Constants for UNSIGNED5 coding of Pack200
|
||||
public static final int lg_H = 6;
|
||||
public static final int H = 1<<lg_H; // number of high codes (64)
|
||||
public static final int L = (1<<BitsPerByte) - H; // number of low codes (192)
|
||||
public static final int MAX_i = 4; // bytes are numbered in (0..4)
|
||||
|
||||
// Positioning
|
||||
public int getPosition() {
|
||||
return position;
|
||||
@ -68,24 +63,17 @@ public class CompressedStream {
|
||||
this.position = position;
|
||||
}
|
||||
|
||||
// 32-bit one-to-one sign encoding taken from Pack200
|
||||
// converts leading sign bits into leading zeros with trailing sign bit
|
||||
public int encodeSign(int value) {
|
||||
return (value << 1) ^ (value >> 31);
|
||||
return Unsigned5.encodeSign(value);
|
||||
}
|
||||
|
||||
public int decodeSign(int value) {
|
||||
return (value >>> 1) ^ -(value & 1);
|
||||
return Unsigned5.decodeSign(value);
|
||||
}
|
||||
|
||||
// 32-bit self-inverse encoding of float bits
|
||||
// converts trailing zeros (common in floats) to leading zeros
|
||||
public int reverseInt(int i) {
|
||||
// Hacker's Delight, Figure 7-1
|
||||
i = (i & 0x55555555) << 1 | (i >>> 1) & 0x55555555;
|
||||
i = (i & 0x33333333) << 3 | (i >>> 2) & 0x33333333;
|
||||
i = (i & 0x0f0f0f0f) << 4 | (i >>> 4) & 0x0f0f0f0f;
|
||||
i = (i << 24) | ((i & 0xff00) << 8) | ((i >>> 8) & 0xff00) | (i >>> 24);
|
||||
return i;
|
||||
return Integer.reverse(i);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,211 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
package sun.jvm.hotspot.utilities;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
||||
import sun.jvm.hotspot.debugger.*;
|
||||
|
||||
/**
|
||||
* Decompression algorithm from utilities/unsigned5.hpp.
|
||||
*/
|
||||
public class Unsigned5 {
|
||||
public static final int LogBitsPerByte = 3;
|
||||
public static final int BitsPerByte = 1 << 3;
|
||||
|
||||
// Constants for UNSIGNED5 coding of Pack200
|
||||
private static final int lg_H = 6; // log-base-2 of H (lg 64 == 6)
|
||||
private static final int H = 1<<lg_H; // number of "high" bytes (64)
|
||||
private static final int X = 1; // there is one excluded byte ('\0')
|
||||
private static final int MAX_b = (1<<BitsPerByte)-1; // largest byte value
|
||||
private static final int L = (MAX_b+1)-X-H; // number of "low" bytes (191)
|
||||
public static final int MAX_LENGTH = 5; // lengths are in [1..5]
|
||||
|
||||
// Note: Previous versions of HotSpot used X=0 (not 1) and L=192 (not 191)
|
||||
//
|
||||
// Using this SA code on old versions of HotSpot, or older SA code
|
||||
// on newer versions of HotSpot, will decode compressed data
|
||||
// wrongly. One might consider using vmStructs to communicate this
|
||||
// particular change between the SA and VM, but it is mostly futile.
|
||||
// There are a myriad of new changes in any version of HotSpot. You
|
||||
// have to use the right SA and VM versions together.
|
||||
|
||||
public interface GetByte<ARR> {
|
||||
short getByte(ARR array, int position);
|
||||
}
|
||||
public interface SetPosition<ARR> {
|
||||
void setPosition(ARR array, int position);
|
||||
}
|
||||
|
||||
// UNSIGNED5::read_uint(_buffer, &_position, limit=0)
|
||||
// In C++ this is a generic algorithm, templated with "holes"
|
||||
// for array (ARR), offset (OFF), and fetch behavior (GET).
|
||||
// In addition, the position is updated by reference.
|
||||
// Let us mimic these conditions with two lambdas, both
|
||||
// on the ARR parameter. We will hardwire the position
|
||||
// type (OFF) to int (sorry, not long), and omit the extra
|
||||
// limit feature.
|
||||
public static
|
||||
<ARR> long readUint(ARR base, int position,
|
||||
GetByte<ARR> getByte,
|
||||
SetPosition<ARR> setPosition) {
|
||||
int pos = position;
|
||||
int b_0 = getByte.getByte(base, pos);
|
||||
int sum = b_0 - X;
|
||||
// VM throws assert if b0<X; we just return -1 here instead
|
||||
if (sum < L) { // common case
|
||||
setPosition.setPosition(base, pos+1);
|
||||
return Integer.toUnsignedLong(sum);
|
||||
}
|
||||
// must collect more bytes: b[1]...b[4]
|
||||
int lg_H_i = lg_H; // lg(H)*i == lg(H^^i)
|
||||
for (int i = 1; ; i++) { // for i in [1..4]
|
||||
int b_i = getByte.getByte(base, pos + i);
|
||||
if (b_i < X) { // avoid excluded bytes
|
||||
// VM throws assert here; should not happen
|
||||
setPosition.setPosition(base, pos+i); // do not consume the bad byte
|
||||
return Integer.toUnsignedLong(sum); // return whatever we have parsed so far
|
||||
}
|
||||
sum += (b_i - X) << lg_H_i; // sum += (b[i]-X)*(64^^i)
|
||||
if (b_i < X+L || i == MAX_LENGTH-1) {
|
||||
setPosition.setPosition(base, pos+i+1);
|
||||
return Integer.toUnsignedLong(sum);
|
||||
}
|
||||
lg_H_i += lg_H;
|
||||
}
|
||||
}
|
||||
|
||||
// 32-bit one-to-one sign encoding taken from Pack200
|
||||
// converts leading sign bits into leading zeros with trailing sign bit
|
||||
// uint32_t encode_sign(int32_t value)
|
||||
public static int encodeSign(int value) {
|
||||
return (value << 1) ^ (value >> 31);
|
||||
}
|
||||
|
||||
// int32_t decode_sign(uint32_t value)
|
||||
public static int decodeSign(int value) {
|
||||
return (value >>> 1) ^ -(value & 1);
|
||||
}
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
// constructor and instance methods for convenience
|
||||
|
||||
// You can read and print a stream directly from memory if you like.
|
||||
// First wrap these up, then call read or print.
|
||||
private final Address base;
|
||||
private final int limit;
|
||||
|
||||
// There is no C++ instance of UNSIGNED5 but it seems useful to
|
||||
// allow this class to serve as a holder for an address and optional
|
||||
// limit, to point at a place where U5 encodings might be stored.
|
||||
// Compare with Unsigned5::Reader(ARR array, OFF limit = 0).
|
||||
public Unsigned5(Address base) {
|
||||
this(base, 0); // limit=0 means unlimited (proceed with caution)
|
||||
}
|
||||
public Unsigned5(Address base, int limit) {
|
||||
this.base = base;
|
||||
this.limit = limit;
|
||||
}
|
||||
|
||||
public Address base() { return base; }
|
||||
public short getByte(int pos) {
|
||||
return (short) base.getCIntegerAt(pos, 1, true);
|
||||
}
|
||||
|
||||
// An UNSIGNED5::Reader gadget has a settable, auto-incremented
|
||||
// position field and can read through a stream of encoded values.
|
||||
// Java can model this as an inner class: var r = myU5.new Reader()
|
||||
// or var r = new Unsigned5(myaddr).new Reader()
|
||||
public class Reader {
|
||||
private int position = 0; // this is for Unsigned5::Reader behavior
|
||||
public int position() { return position; }
|
||||
public void setPosition(int pos) { position = pos; }
|
||||
// UNSIGNED5::Reader::next_uint
|
||||
public long nextUint() {
|
||||
if (!hasNext()) return -1;
|
||||
return readUint(this, position, Reader::getByte, Reader::setPosition);
|
||||
}
|
||||
// UNSIGNED5::Reader::has_next
|
||||
public boolean hasNext() { return Unsigned5.this.hasNext(position); }
|
||||
// delegate reads to outer object:
|
||||
private short getByte(int pos) { return Unsigned5.this.getByte(pos); }
|
||||
}
|
||||
|
||||
// UNSIGNED5::read_uint (no position update)
|
||||
public long readUint(int pos) {
|
||||
if (!hasNext(pos)) return -1;
|
||||
return readUint(this, pos, Unsigned5::getByte, (a,i)->{});
|
||||
}
|
||||
private boolean hasNext(int pos) {
|
||||
// 1. there must be a non-excluded byte at the read position
|
||||
// 2. the position must be less than any non-zero limit
|
||||
return ((X == 0 || getByte(pos) >= X) &&
|
||||
(limit == 0 || pos < limit));
|
||||
}
|
||||
|
||||
// debug.cpp: u5decode(intptr_t addr)
|
||||
public void print() {
|
||||
printOn(System.out);
|
||||
}
|
||||
public void printOn(PrintStream tty) {
|
||||
tty.print("U5 " + readUint(0) + ", ");
|
||||
}
|
||||
|
||||
// debug.cpp: u5p(intptr_t addr, intptr_t limit, int count)
|
||||
// check and decode a series of u5 values
|
||||
// return the address after the last decoded byte
|
||||
// if limit is non-zero stop before limit
|
||||
// if count is non-negative stop when count is reached
|
||||
// if count is negative stop on null (works kind of like strlen)
|
||||
public void dumpOn(PrintStream tty, int count) {
|
||||
Reader r = new Reader();
|
||||
int printed = 0;
|
||||
tty.print("U5: [");
|
||||
for (;;) {
|
||||
if (count >= 0 && printed >= count) break;
|
||||
if (!r.hasNext()) {
|
||||
if ((r.position < limit || limit == 0) && getByte(r.position) == 0) {
|
||||
tty.print(" null");
|
||||
++r.position; // skip null byte
|
||||
++printed;
|
||||
if (limit != 0) continue; // keep going to explicit limit
|
||||
}
|
||||
break;
|
||||
}
|
||||
int value = (int) r.nextUint();
|
||||
tty.print(" ");
|
||||
tty.print(value);
|
||||
++printed;
|
||||
}
|
||||
tty.println(" ] (values=" + printed + "/length=" + r.position + ")");
|
||||
}
|
||||
public void dump(int count) {
|
||||
dumpOn(System.out, count);
|
||||
}
|
||||
public void dump() {
|
||||
// dump as many as possible, up to any nonzero limit
|
||||
dumpOn(System.out, -1);
|
||||
}
|
||||
}
|
303
test/hotspot/gtest/utilities/test_unsigned5.cpp
Normal file
303
test/hotspot/gtest/utilities/test_unsigned5.cpp
Normal file
@ -0,0 +1,303 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "memory/allocation.hpp"
|
||||
#include "utilities/unsigned5.hpp"
|
||||
#include "unittest.hpp"
|
||||
|
||||
TEST_VM(unsigned5, max_encoded_in_length) {
|
||||
int maxlen = UNSIGNED5::MAX_LENGTH;
|
||||
EXPECT_EQ(maxlen, 5);
|
||||
for (int i = 0; i <= 190; i++) {
|
||||
uint32_t interesting = i;
|
||||
EXPECT_EQ(UNSIGNED5::encoded_length(interesting), 1);
|
||||
EXPECT_EQ(UNSIGNED5::encoded_length(~interesting), maxlen);
|
||||
}
|
||||
for (int len = 1; len <= maxlen; len++) {
|
||||
uint32_t interesting = UNSIGNED5::max_encoded_in_length(len);
|
||||
EXPECT_EQ(UNSIGNED5::encoded_length(interesting-1), len);
|
||||
EXPECT_EQ(UNSIGNED5::encoded_length(interesting), len);
|
||||
if (len < 5) {
|
||||
EXPECT_EQ(UNSIGNED5::encoded_length(interesting+1), len+1);
|
||||
EXPECT_EQ(UNSIGNED5::encoded_length(interesting*2), len+1);
|
||||
}
|
||||
const int offset = -123;
|
||||
const int good_limit = offset + len;
|
||||
const int bad_limit = good_limit - 1;
|
||||
EXPECT_TRUE(UNSIGNED5::fits_in_limit(interesting, offset, good_limit));
|
||||
EXPECT_TRUE(!UNSIGNED5::fits_in_limit(interesting, offset, bad_limit));
|
||||
}
|
||||
}
|
||||
|
||||
// Call FN on a nice list of "interesting" uint32_t values to encode/decode.
|
||||
// For each length in [1..5], the maximum encodable value of that
|
||||
// length is "interesting", as are one more and one less than that
|
||||
// value. For each nybble (aligned 4-bit field) of a uint32_t, each
|
||||
// possible value (in [0..15]) stored in that nybble is "interesting".
|
||||
// Also "interesting" are some other values created by perturbing
|
||||
// lower bits of that nybble-bearing number, by subtracting a power
|
||||
// of -7 (up to -7^7). That makes just over 1000 distinct numbers.
|
||||
//
|
||||
// Calls to this function are repeatable, so you can call it to pack
|
||||
// an output array, and then call it again to read an input array
|
||||
// verifying that the retrieved values match the stored ones.
|
||||
template<typename FN>
|
||||
inline int enumerate_cases(FN fn) {
|
||||
// boundary values around the maximum encoded in each byte-length
|
||||
for (int len = 1; len <= 5; len++) {
|
||||
uint32_t interesting = UNSIGNED5::max_encoded_in_length(len);
|
||||
int res = fn(interesting-1);
|
||||
if (res) return res;
|
||||
res = fn(interesting);
|
||||
if (res) return res;
|
||||
if (interesting < (uint32_t)-1) {
|
||||
res = fn(interesting+1);
|
||||
if (res) return res;
|
||||
}
|
||||
}
|
||||
// for each nybble, for each value in the nybble
|
||||
for (uint32_t npos = 0; npos < 32; npos += 4) {
|
||||
for (uint32_t nval = 0; nval <= 15; nval++) {
|
||||
uint32_t interesting = nval << npos;
|
||||
int res = fn(interesting);
|
||||
if (res) return res;
|
||||
// mix in some crazy-looking values: powers of -7 to -7^7
|
||||
for (int pon7 = 1; pon7 < 1000000; pon7 *= -7) {
|
||||
uint32_t interesting2 = interesting - pon7;
|
||||
res = fn(interesting2);
|
||||
if (res) return res;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
TEST_VM(unsigned5, transcode_single) {
|
||||
const int limit = UNSIGNED5::MAX_LENGTH;
|
||||
u_char buffer[limit + 1];
|
||||
auto each_case = [&](uint32_t value) -> uint32_t {
|
||||
//printf("case %08X len=%d\n", value, UNSIGNED5::encoded_length(value));
|
||||
int offset = 0;
|
||||
UNSIGNED5::write_uint(value, buffer, offset, limit);
|
||||
int length = offset;
|
||||
EXPECT_TRUE(length <= UNSIGNED5::MAX_LENGTH);
|
||||
EXPECT_EQ(length, UNSIGNED5::encoded_length(value)) << "for value=" << value;
|
||||
buffer[length] = 0;
|
||||
offset = 0;
|
||||
uint32_t check = UNSIGNED5::read_uint(buffer, offset, limit);
|
||||
EXPECT_EQ(offset, length) << "for value=" << value;
|
||||
EXPECT_EQ(value, check);
|
||||
return 0;
|
||||
};
|
||||
auto z = enumerate_cases(each_case);
|
||||
EXPECT_TRUE(!z);
|
||||
}
|
||||
|
||||
static int count_cases() {
|
||||
int case_count = 0;
|
||||
auto inc_case_count = [&](uint32_t){ ++case_count; return 0; };
|
||||
enumerate_cases(inc_case_count);
|
||||
return case_count;
|
||||
}
|
||||
|
||||
TEST_VM(unsigned5, transcode_multiple) {
|
||||
int case_count = count_cases();
|
||||
const int limit = 200;
|
||||
ASSERT_TRUE(limit < case_count*UNSIGNED5::MAX_LENGTH);
|
||||
u_char buffer[limit + 1];
|
||||
//printf("%d cases total\n", case_count); //1166 cases total
|
||||
for (int sublimit = limit - 20; sublimit < limit; sublimit++) {
|
||||
int offset = 0;
|
||||
int count = 0;
|
||||
// write each number into an array
|
||||
auto write_case = [&](uint32_t value) -> uint32_t {
|
||||
if (!UNSIGNED5::fits_in_limit(value, offset, sublimit))
|
||||
return value|1;
|
||||
UNSIGNED5::write_uint(value, buffer, offset, sublimit);
|
||||
count++;
|
||||
return 0;
|
||||
};
|
||||
auto done = enumerate_cases(write_case);
|
||||
EXPECT_TRUE(done) << "must have hit the sublimit";
|
||||
EXPECT_TRUE(count < case_count);
|
||||
int length = offset;
|
||||
EXPECT_TRUE(length <= sublimit && length + UNSIGNED5::MAX_LENGTH > sublimit)
|
||||
<< "length=" << length << " sublimit=" << sublimit;
|
||||
for (int i = length; i <= sublimit; i++) {
|
||||
buffer[i] = 0;
|
||||
}
|
||||
if (sublimit == limit-1) {
|
||||
UNSIGNED5::print_count(case_count + 1, &buffer[0], sublimit);
|
||||
}
|
||||
//printf("encoded %d values in %d bytes: [[%s]]\n", count, length, buffer);
|
||||
// now read it all back
|
||||
offset = 0;
|
||||
int count2 = 0;
|
||||
auto read_back_case = [&](uint32_t value) -> uint32_t {
|
||||
int clen = UNSIGNED5::check_length(buffer, offset, sublimit);
|
||||
if (clen == 0) return value|1;
|
||||
EXPECT_EQ(clen, UNSIGNED5::encoded_length(value));
|
||||
int begin = offset;
|
||||
uint32_t check = UNSIGNED5::read_uint(buffer, offset, sublimit);
|
||||
EXPECT_EQ(offset, begin + clen);
|
||||
EXPECT_EQ(value, check);
|
||||
count2++;
|
||||
return 0;
|
||||
};
|
||||
auto done2 = enumerate_cases(read_back_case);
|
||||
EXPECT_EQ(done, done2);
|
||||
EXPECT_EQ(count, count2);
|
||||
EXPECT_EQ(offset, length);
|
||||
}
|
||||
}
|
||||
|
||||
inline void init_ints(int len, int* ints) {
|
||||
for (int i = 0; i < len; i++) {
|
||||
ints[i] = (i * ((i&2) ? i : 1001)) ^ -(i & 1);
|
||||
}
|
||||
}
|
||||
|
||||
struct MyReaderHelper {
|
||||
uint8_t operator()(char* a, int i) const { return a[i]; }
|
||||
};
|
||||
using MyReader = UNSIGNED5::Reader<char*, int, MyReaderHelper>;
|
||||
|
||||
TEST_VM(unsigned5, reader) {
|
||||
const int LEN = 100;
|
||||
int ints[LEN];
|
||||
init_ints(LEN, ints);
|
||||
int i;
|
||||
UNSIGNED5::Sizer<> szr;
|
||||
for (i = 0; i < LEN; i++) {
|
||||
szr.accept_uint(ints[i]);
|
||||
}
|
||||
//printf("count=%d, size=%d\n", szr.count(), szr.position());
|
||||
char buf[LEN * UNSIGNED5::MAX_LENGTH + 1];
|
||||
int buflen;
|
||||
{
|
||||
int pos = 0;
|
||||
for (int i = 0; i < LEN; i++) {
|
||||
UNSIGNED5::write_uint(ints[i], buf, pos, 0);
|
||||
}
|
||||
EXPECT_TRUE(pos+1 < (int)sizeof(buf)) << pos;
|
||||
buflen = pos;
|
||||
buf[buflen] = 0;
|
||||
}
|
||||
EXPECT_EQ(szr.position(), buflen);
|
||||
MyReader r1(buf);
|
||||
i = 0;
|
||||
while (r1.has_next()) {
|
||||
int x = r1.next_uint();
|
||||
int y = ints[i++];
|
||||
ASSERT_EQ(x, y) << i;
|
||||
}
|
||||
ASSERT_EQ(i, LEN);
|
||||
MyReader r2(buf, buflen / 2);
|
||||
i = 0;
|
||||
while (r2.has_next()) {
|
||||
int x = r2.next_uint();
|
||||
int y = ints[i++];
|
||||
ASSERT_EQ(x, y) << i;
|
||||
}
|
||||
ASSERT_TRUE(i < LEN);
|
||||
// copy from reader to writer
|
||||
UNSIGNED5::Reader<char*,int> r3(buf);
|
||||
int array_limit = 1;
|
||||
char* array = new char[array_limit + 1];
|
||||
auto array_grow = [&](int){
|
||||
array[array_limit] = 0;
|
||||
auto oal = array_limit;
|
||||
array_limit += 10;
|
||||
//printf("growing array from %d to %d\n", oal, array_limit);
|
||||
auto na = new char[array_limit + 1];
|
||||
strcpy(na, array);
|
||||
array = na;
|
||||
};
|
||||
UNSIGNED5::Writer<char*,int> w3(array, array_limit);
|
||||
while (r3.has_next()) {
|
||||
w3.accept_grow(r3.next_uint(), array_grow);
|
||||
}
|
||||
w3.end_byte(); // we always allocated one more than the limit!
|
||||
std::string buf_s(buf, buflen);
|
||||
std::string arr_s(array, strlen(array));
|
||||
ASSERT_EQ(buf_s, arr_s);
|
||||
|
||||
// try printing:
|
||||
{
|
||||
char stbuf[1000];
|
||||
stringStream st(stbuf, sizeof(stbuf)-1);
|
||||
UNSIGNED5::Reader<char*,int> printer(buf);
|
||||
printer.print_on(&st, 4, "(", ")");
|
||||
std::string st_s(st.base(), st.size());
|
||||
char buf2[sizeof(stbuf)];
|
||||
sprintf(buf2, "(%d %d %d %d)", ints[0], ints[1], ints[2], ints[3]);
|
||||
std::string exp_s(buf2, strlen(buf2));
|
||||
ASSERT_EQ(exp_s, st_s);
|
||||
}
|
||||
}
|
||||
|
||||
// Here is some object code to look at if we want to do a manual
|
||||
// study. One could find the build file named test_unsigned5.o.cmdline
|
||||
// and hand-edit the command line to produce assembly code in
|
||||
// test_unsigned5.s.
|
||||
//
|
||||
// Or, given the two empty "fence functions", one could do a
|
||||
// quick scan like this:
|
||||
//
|
||||
// $ objdump -D $(find build/*release -name test_unsigned5.o) \
|
||||
// | sed -n /start_code_quality/,/end_code_quality/p \
|
||||
// | egrep -B10 bswap # or grep -B20 cfi_endproc
|
||||
|
||||
void start_code_quality_unsigned5() { }
|
||||
|
||||
uint32_t code_quality_max_encoded_in_length(int i) {
|
||||
return UNSIGNED5::max_encoded_in_length(i); // should compile like 5-switch
|
||||
}
|
||||
|
||||
int code_quality_encoded_length(uint32_t x) {
|
||||
return UNSIGNED5::encoded_length(x); // should compile to 4-way comparison
|
||||
}
|
||||
|
||||
int code_quality_check_length(char* a) {
|
||||
return UNSIGNED5::check_length(a, 0); // should compile with fast-path
|
||||
}
|
||||
|
||||
int code_quality_read_int(char* a) {
|
||||
int i = 0;
|
||||
return UNSIGNED5::read_uint(a, i, 0); // should compile with fast-path
|
||||
}
|
||||
|
||||
int code_quality_int_reader(char* a) {
|
||||
MyReader r1(a);
|
||||
if (!r1.has_next()) return -1;
|
||||
return r1.next_uint();
|
||||
}
|
||||
|
||||
int code_quality_int_sizer(int* a, int n) {
|
||||
UNSIGNED5::Sizer<> s;
|
||||
for (int i = 0; i < n; i++) s.accept_uint(a[i]);
|
||||
return s.position();
|
||||
}
|
||||
|
||||
void end_code_quality_unsigned5() { }
|
Loading…
Reference in New Issue
Block a user