8292758: put support for UNSIGNED5 format into its own header file

Reviewed-by: dlong, coleenp
This commit is contained in:
John R Rose 2022-09-08 07:36:53 +00:00
parent 6677227301
commit 8d3399bf5f
9 changed files with 1129 additions and 157 deletions

View File

@ -27,36 +27,45 @@
#include "utilities/ostream.hpp"
#include "utilities/moveBits.hpp"
// 32-bit self-inverse encoding of float bits
// converts trailing zeroes (common in floats) to leading zeroes
inline juint CompressedStream::reverse_int(juint i) {
return reverse_bits(i);
}
jint CompressedReadStream::read_signed_int() {
return decode_sign(read_int());
return UNSIGNED5::decode_sign(read_int());
}
// Compressing floats is simple, because the only common pattern
// is trailing zeroes. (Compare leading sign bits on ints.)
// Since floats are left-justified, as opposed to right-justified
// ints, we can bit-reverse them in order to take advantage of int
// compression.
// compression. Since bit reversal converts trailing zeroes to
// leading zeroes, effect is better compression of those common
// 32-bit float values, such as integers or integers divided by
// powers of two, that have many trailing zeroes.
jfloat CompressedReadStream::read_float() {
int rf = read_int();
int f = reverse_int(rf);
int f = reverse_bits(rf);
return jfloat_cast(f);
}
// The treatment of doubles is similar. We could bit-reverse each
// entire 64-bit word, but it is almost as effective to bit-reverse
// the individual halves. Since we are going to encode them
// separately as 32-bit halves anyway, it seems slightly simpler
// to reverse after splitting, and when reading reverse each
// half before joining them together.
jdouble CompressedReadStream::read_double() {
jint rh = read_int();
jint rl = read_int();
jint h = reverse_int(rh);
jint l = reverse_int(rl);
jint h = reverse_bits(rh);
jint l = reverse_bits(rl);
return jdouble_cast(jlong_from(h, l));
}
// A 64-bit long is encoded into distinct 32-bit halves. This saves
// us from having to define a 64-bit encoding and is almost as
// effective. A modified LEB128 could encode longs into 9 bytes, and
// this technique maxes out at 10 bytes, so, if we didn't mind the
// extra complexity of another coding system, we could process 64-bit
// values as single units. But, the complexity does not seem
// worthwhile.
jlong CompressedReadStream::read_long() {
jint low = read_signed_int();
jint high = read_signed_int();
@ -70,26 +79,31 @@ CompressedWriteStream::CompressedWriteStream(int initial_size) : CompressedStrea
}
void CompressedWriteStream::grow() {
u_char* _new_buffer = NEW_RESOURCE_ARRAY(u_char, _size * 2);
int nsize = _size * 2;
const int min_expansion = UNSIGNED5::MAX_LENGTH;
if (nsize < min_expansion*2) {
nsize = min_expansion*2;
}
u_char* _new_buffer = NEW_RESOURCE_ARRAY(u_char, nsize);
memcpy(_new_buffer, _buffer, _position);
_buffer = _new_buffer;
_size = _size * 2;
_size = nsize;
}
void CompressedWriteStream::write_float(jfloat value) {
juint f = jint_cast(value);
juint rf = reverse_int(f);
assert(f == reverse_int(rf), "can re-read same bits");
juint rf = reverse_bits(f);
assert(f == reverse_bits(rf), "can re-read same bits");
write_int(rf);
}
void CompressedWriteStream::write_double(jdouble value) {
juint h = high(jlong_cast(value));
juint l = low( jlong_cast(value));
juint rh = reverse_int(h);
juint rl = reverse_int(l);
assert(h == reverse_int(rh), "can re-read same bits");
assert(l == reverse_int(rl), "can re-read same bits");
juint rh = reverse_bits(h);
juint rl = reverse_bits(l);
assert(h == reverse_bits(rh), "can re-read same bits");
assert(l == reverse_bits(rl), "can re-read same bits");
write_int(rh);
write_int(rl);
}

View File

@ -26,6 +26,7 @@
#define SHARE_CODE_COMPRESSEDSTREAM_HPP
#include "memory/allocation.hpp"
#include "utilities/unsigned5.hpp"
// Simple interface for filing out and filing in basic types
// Used for writing out and reading in debugging information.
@ -36,18 +37,6 @@ class CompressedStream : public ResourceObj {
u_char* _buffer;
int _position;
enum {
// Constants for UNSIGNED5 coding of Pack200
lg_H = 6, H = 1<<lg_H, // number of high codes (64)
L = (1<<BitsPerByte)-H, // number of low codes (192)
MAX_i = 4 // bytes are numbered in (0..4), max 5 bytes
};
// 32-bit one-to-one sign encoding taken from Pack200
// converts leading sign bits into leading zeroes with trailing sign bit
static juint encode_sign(jint value) { return (value << 1) ^ (value >> 31); }
static jint decode_sign(juint value) { return (value >> 1) ^ -(jint)(value & 1); }
static juint reverse_int(juint i); // to trim trailing float 0's
public:
CompressedStream(u_char* buffer, int position = 0) {
_buffer = buffer;
@ -66,41 +55,6 @@ class CompressedReadStream : public CompressedStream {
private:
inline u_char read() { return _buffer[_position++]; }
// This encoding, called UNSIGNED5, is taken from J2SE Pack200.
// It assumes that most values have lots of leading zeroes.
// Very small values, in the range [0..191], code in one byte.
// Any 32-bit value (including negatives) can be coded, in
// up to five bytes. The grammar is:
// low_byte = [0..191]
// high_byte = [192..255]
// any_byte = low_byte | high_byte
// coding = low_byte
// | high_byte low_byte
// | high_byte high_byte low_byte
// | high_byte high_byte high_byte low_byte
// | high_byte high_byte high_byte high_byte any_byte
// Each high_byte contributes six bits of payload.
// The encoding is one-to-one (except for integer overflow)
// and easy to parse and unparse.
jint read_int_mb(jint b0) {
int pos = position() - 1;
u_char* buf = buffer() + pos;
assert(buf[0] == b0 && b0 >= L, "correctly called");
jint sum = b0;
// must collect more bytes: b[1]...b[4]
int lg_H_i = lg_H;
for (int i = 0; ; ) {
jint b_i = buf[++i]; // b_i = read(); ++i;
sum += b_i << lg_H_i; // sum += b[i]*(64**i)
if (b_i < L || i == MAX_i) {
set_position(pos+i+1);
return sum;
}
lg_H_i += lg_H;
}
}
public:
CompressedReadStream(u_char* buffer, int position = 0)
: CompressedStream(buffer, position) {}
@ -109,14 +63,14 @@ class CompressedReadStream : public CompressedStream {
jbyte read_byte() { return (jbyte ) read(); }
jchar read_char() { return (jchar ) read_int(); }
jshort read_short() { return (jshort ) read_signed_int(); }
jint read_int() { jint b0 = read();
if (b0 < L) return b0;
else return read_int_mb(b0);
}
jint read_signed_int();
jfloat read_float(); // jfloat_cast(reverse_int(read_int()))
jdouble read_double(); // jdouble_cast(2*reverse_int(read_int))
jfloat read_float(); // jfloat_cast(reverse_bits(read_int()))
jdouble read_double(); // jdouble_cast(2*reverse_bits(read_int))
jlong read_long(); // jlong_from(2*read_signed_int())
jint read_int() {
return UNSIGNED5::read_uint(_buffer, _position, 0);
}
};
@ -134,23 +88,6 @@ class CompressedWriteStream : public CompressedStream {
}
void grow();
// UNSIGNED5 coding, 1-5 byte cases
void write_int_mb(jint value) {
juint sum = value;
for (int i = 0; ; ) {
if (sum < L || i == MAX_i) {
// remainder is either a "low code" or the 5th byte
assert(sum == (u_char)sum, "valid byte");
write((u_char)sum);
break;
}
sum -= L;
int b_i = L + (sum % H); // this is a "high code"
sum >>= lg_H; // extracted 6 bits
write(b_i); ++i;
}
}
protected:
int _size;
@ -163,13 +100,15 @@ class CompressedWriteStream : public CompressedStream {
void write_byte(jbyte value) { write(value); }
void write_char(jchar value) { write_int(value); }
void write_short(jshort value) { write_signed_int(value); }
void write_int(jint value) { if ((juint)value < L && !full())
store((u_char)value);
else write_int_mb(value); }
void write_signed_int(jint value) { write_int(encode_sign(value)); }
void write_float(jfloat value); // write_int(reverse_int(jint_cast(v)))
void write_double(jdouble value); // write_int(reverse_int(<low,high>))
void write_signed_int(jint value) { write_int(UNSIGNED5::encode_sign(value)); }
void write_float(jfloat value); // write_int(reverse_bits(jint_cast(v)))
void write_double(jdouble value); // write_int(reverse_bits(<low,high>))
void write_long(jlong value); // write_signed_int(<low,high>)
void write_int(juint value) {
UNSIGNED5::write_uint_grow(value, _buffer, _position, _size,
[&](int){ grow(); });
}
};
#endif // SHARE_CODE_COMPRESSEDSTREAM_HPP

View File

@ -61,6 +61,7 @@
#include "utilities/formatBuffer.hpp"
#include "utilities/globalDefinitions.hpp"
#include "utilities/macros.hpp"
#include "utilities/unsigned5.hpp"
#include "utilities/vmError.hpp"
#include <stdio.h>
@ -648,6 +649,37 @@ extern "C" JNIEXPORT void findbcp(intptr_t method, intptr_t bcp) {
}
}
// check and decode a single u5 value
extern "C" JNIEXPORT u4 u5decode(intptr_t addr) {
Command c("u5decode");
u1* arr = (u1*)addr;
size_t off = 0, lim = 5;
if (!UNSIGNED5::check_length(arr, off, lim)) {
return 0;
}
return UNSIGNED5::read_uint(arr, off, lim);
}
// Sets up a Reader from addr/limit and prints count items.
// A limit of zero means no set limit; stop at the first null
// or after count items are printed.
// A count of zero or less is converted to -1, which means
// there is no limit on the count of items printed; the
// printing stops when an null is printed or at limit.
// See documentation for UNSIGNED5::Reader::print(count).
extern "C" JNIEXPORT intptr_t u5p(intptr_t addr,
intptr_t limit,
int count) {
Command c("u5p");
u1* arr = (u1*)addr;
if (limit && limit < addr) limit = addr;
size_t lim = !limit ? 0 : (limit - addr);
size_t endpos = UNSIGNED5::print_count(count > 0 ? count : -1,
arr, (size_t)0, lim);
return addr + endpos;
}
// int versions of all methods to avoid having to type type casts in the debugger
void pp(intptr_t p) { pp((void*)p); }

View File

@ -0,0 +1,83 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#include "precompiled.hpp"
#include "memory/allocation.hpp"
#include "utilities/unsigned5.hpp"
// Most of UNSIGNED5 is in the header file.
// Let's put a few debug functions out-of-line here.
// For the record, UNSIGNED5 was defined around 2001 and was first
// published in the initial Pack200 spec. See:
// https://docs.oracle.com/en/java/javase/11/docs/specs/pack-spec.html
// in Section 6.1, "Encoding of Small Whole Numbers".
PRAGMA_DIAG_PUSH
PRAGMA_FORMAT_NONLITERAL_IGNORED
// For debugging, even in product builds (see debug.cpp).
template<typename ARR, typename OFF, typename GET>
void UNSIGNED5::Reader<ARR,OFF,GET>::
print_on(outputStream* st, int count,
const char* left, // "U5: ["
const char* right // "] (values=%d/length=%d)\n"
) {
if (left == NULL) left = "U5: [";
if (right == NULL) right = "] (values=%d/length=%d)\n";
int printed = 0;
st->print("%s", left);
for (;;) {
if (count >= 0 && printed >= count) break;
if (!has_next()) {
if ((_limit == 0 || _position < _limit) && _array[_position] == 0) {
st->print(" null");
++_position; // skip null byte
++printed;
if (_limit != 0) continue; // keep going to explicit limit
}
break;
}
u4 value = next_uint();
if (printed == 0)
st->print("%d", value);
else
st->print(" %d", value);
++printed;
}
st->print(right,
// these arguments may or may not be used in the format string:
printed,
(int)_position);
}
PRAGMA_DIAG_POP
// Explicit instantiation for supported types.
template void UNSIGNED5::Reader<char*,int>::
print_on(outputStream* st, int count, const char* left, const char* right);
template void UNSIGNED5::Reader<u1*,int>::
print_on(outputStream* st, int count, const char* left, const char* right);
template void UNSIGNED5::Reader<address,size_t>::
print_on(outputStream* st, int count, const char* left, const char* right);

View File

@ -0,0 +1,419 @@
/*
* Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifndef SHARE_UTILITIES_UNSIGNED5_HPP
#define SHARE_UTILITIES_UNSIGNED5_HPP
#include "memory/allStatic.hpp"
#include "utilities/debug.hpp"
#include "utilities/ostream.hpp"
// Low-level interface for [de-]coding compressed uint32_t (u4) values.
// A uint32_t value (32-bit unsigned int) can be encoded very quickly into
// one to five bytes, and decoded back again, again very quickly.
// This is useful for storing data, like offsets or access flags, that
// is usually simple (fits in fewer bytes usually) but sometimes has
// to be complicated (uses all five bytes when necessary).
// Notable features:
// - represents all 32-bit uint32_t values
// - never reads or writes beyond 5 bytes
// - values up to 0xBE (0x307E/0xC207E/0x308207F) code in 1 byte (2/3/4 bytes)
// - longer encodings are always of larger values (length grows monotonically)
// - encodings are little-endian numerals in a modifed base-64 system
// - "negatives" ((u4)-1) need 5 bytes (but see also UNSIGNED5::encode_sign)
// - different encodings decode to different values (excepting overflow)
// - zero bytes are *never* used, so it interoperates with null termination
// - the algorithms are templates and cooperate well with your own types
// - one writer algorithm can grow your resizable buffer on the fly
// The encoding, taken from J2SE Pack200, is called UNSIGNED5.
// It expects the uint32_t values you give it will have many leading zeroes.
//
// More details:
// Very small values, in the range [0..190], code in one byte.
// Any 32-bit value (including negatives) can be coded, in
// up to five bytes. The grammar is:
// low_byte = [1..191]
// high_byte = [192..255]
// any_byte = low_byte | high_byte
// coding = low_byte
// | high_byte low_byte
// | high_byte high_byte low_byte
// | high_byte high_byte high_byte low_byte
// | high_byte high_byte high_byte high_byte any_byte
// Each high_byte contributes six bits of payload.
// The encoding is one-to-one (except for integer overflow)
// and easy to parse and unparse. Longer sequences always
// decode to larger numbers. Sequences of the same length
// compares as little-endian numerals decode to numbers which
// are ordered in the same sense as those numerals.
// Parsing (reading) consists of doing a limit test to see if the byte
// is a low-byte or a high-byte, and also unconditionally adding the
// digit value of the byte, multiplied by its 64-bit place value, to
// an accumulator. The accumulator is returned after either 5 bytes
// are seen, or the first low-byte is seen. Oddly enough, this is
// enough to create a dense var-int format, which is why it was
// adopted for Pack200. By comparison, the more common LEB128 format
// is less dense (for many typical workloads) and does not guarantee a
// length limit.
class UNSIGNED5 : AllStatic {
private:
// Math constants for the modified UNSIGNED5 coding of Pack200
static const int lg_H = 6; // log-base-2 of H (lg 64 == 6)
static const int H = 1<<lg_H; // number of "high" bytes (64)
static const int X = 1 ; // there is one excluded byte ('\0')
static const int MAX_b = (1<<BitsPerByte)-1; // largest byte value
static const int L = (MAX_b+1)-X-H; // number of "low" bytes (191)
public:
static const int MAX_LENGTH = 5; // lengths are in [1..5]
static const uint32_t MAX_VALUE = (uint32_t)-1; // 2^^32-1
// The default method for reading and writing bytes is simply
// b=a[i] and a[i]=b, as defined by this helpful functor.
template<typename ARR, typename OFF>
struct ArrayGetSet {
uint8_t operator()(ARR a, OFF i) const { return a[i]; };
void operator()(ARR a, OFF i, uint8_t b) const { a[i] = b; };
// So, an expression ArrayGetSet() acts like these lambdas:
//auto get = [&](ARR a, OFF i){ return a[i]; };
//auto set = [&](ARR a, OFF i, uint8_t x){ a[i] = x; };
};
// decode a single unsigned 32-bit int from an array-like base address
// returns the decoded value, updates offset_rw
// that is, offset_rw is both read and written
// warning: caller must ensure there is at least one byte available
// the limit is either zero meaning no limit check, or an exclusive offset
// in PRODUCT builds, limit is ignored
template<typename ARR, typename OFF, typename GET = ArrayGetSet<ARR,OFF>>
static uint32_t read_uint(ARR array, OFF& offset_rw, OFF limit, GET get = GET()) {
const OFF pos = offset_rw;
STATIC_ASSERT(sizeof(get(array, pos)) == 1); // must be a byte-getter
const uint32_t b_0 = (uint8_t) get(array, pos); //b_0 = a[0]
assert(b_0 >= X, "avoid excluded bytes");
uint32_t sum = b_0 - X;
if (sum < L) { // common case
offset_rw = pos + 1;
return sum;
}
// must collect more bytes: b[1]...b[4]
int lg_H_i = lg_H; // lg(H)*i == lg(H^^i)
for (int i = 1; ; i++) { // for i in [1..4]
assert(limit == 0 || pos + i < limit, "oob");
const uint32_t b_i = (uint8_t) get(array, pos + i); //b_i = a[i]
assert(b_i >= X, "avoid excluded bytes");
sum += (b_i - X) << lg_H_i; // sum += (b[i]-X)*(64^^i)
if (b_i < X+L || i == MAX_LENGTH-1) {
offset_rw = pos + i + 1;
return sum;
}
lg_H_i += lg_H;
}
}
// encode a single unsigned 32-bit int into an array-like span
// offset_rw is both read and written
// the limit is either zero meaning no limit check, or an exclusive offset
// warning: caller must ensure there is available space
template<typename ARR, typename OFF, typename SET = ArrayGetSet<ARR,OFF>>
static void write_uint(uint32_t value, ARR array, OFF& offset_rw, OFF limit, SET set = SET()) {
const OFF pos = offset_rw;
if (value < L) {
const uint32_t b_0 = X + value;
assert(b_0 == (uint8_t)b_0, "valid byte");
set(array, pos, (uint8_t)b_0); //a[0] = b_0
offset_rw = pos + 1;
return;
}
uint32_t sum = value;
for (int i = 0; ; i++) { // for i in [0..4]
if (sum < L || i == MAX_LENGTH-1) {
// remainder is either a "low code" or the 5th byte
uint32_t b_i = X + sum;
assert(b_i == (uint8_t)b_i, "valid byte");
set(array, pos + i, (uint8_t)b_i); //a[i] = b_i
offset_rw = pos + i + 1;
return;
}
sum -= L;
uint32_t b_i = X + L + (sum % H); // this is a "high code"
assert(b_i == (uint8_t)b_i, "valid byte");
set(array, pos + i, (uint8_t)b_i); //a[i] = b_i
sum >>= lg_H; // extracted 6 bits
}
}
// returns the encoded byte length of an unsigned 32-bit int
static constexpr int encoded_length(uint32_t value) {
// model the reading of [0..5] high-bytes, followed possibly by a low-byte
// Be careful: the constexpr magic evaporates if undefined behavior
// results from any of these expressions. Beware of signed overflow!
uint32_t sum = 0;
uint32_t lg_H_i = 0;
for (uint32_t i = 0; ; i++) { // for i in [1..4]
if (value <= sum + ((L-1) << lg_H_i) || i == MAX_LENGTH-1) {
return i + 1; // stopping at byte i implies length is i+1
}
sum += (MAX_b - X) << lg_H_i;
lg_H_i += lg_H;
}
}
// reports the largest uint32_t value that can be encoded using len bytes
// len must be in the range [1..5]
static constexpr uint32_t max_encoded_in_length(uint32_t len) {
assert(len >= 1 && len <= MAX_LENGTH, "invalid length");
if (len >= MAX_LENGTH) return MAX_VALUE; // largest non-overflow value
// Be careful: the constexpr magic evaporates if undefined behavior
// results from any of these expressions. Beware of signed overflow!
uint32_t all_combinations = 0;
uint32_t combinations_i = L; // L * H^i
for (uint32_t i = 0; i < len; i++) {
// count combinations of <H*L> that end at byte i
all_combinations += combinations_i;
combinations_i <<= lg_H;
}
return all_combinations - 1;
}
// tells if a value, when encoded, would fit between the offset and limit
template<typename OFF>
static constexpr bool fits_in_limit(uint32_t value, OFF offset, OFF limit) {
assert(limit != 0, "");
return (offset + MAX_LENGTH <= limit ||
offset + encoded_length(value) <= limit);
}
// parses one encoded value for correctness and returns the size,
// or else returns zero if there is a problem (bad limit or excluded byte)
// the limit is either zero meaning no limit check, or an exclusive offset
template<typename ARR, typename OFF, typename GET = ArrayGetSet<ARR,OFF>>
static int check_length(ARR array, OFF offset, OFF limit = 0,
GET get = GET()) {
const OFF pos = offset;
STATIC_ASSERT(sizeof(get(array, pos)) == 1); // must be a byte-getter
const uint32_t b_0 = (uint8_t) get(array, pos); //b_0 = a[0]
if (b_0 < X+L) {
return (b_0 < X) ? 0 : 1;
}
// parse more bytes: b[1]...b[4]
for (int i = 1; ; i++) { // for i in [1..4]
if (limit != 0 && pos + i >= limit) return 0; // limit failure
const uint32_t b_i = (uint8_t) get(array, pos + i); //b_i = a[i]
if (b_i < X) return 0; // excluded byte found
if (b_i < X+L || i == MAX_LENGTH-1) {
return i + 1;
}
}
}
template<typename ARR, typename OFF, typename GFN,
typename SET = ArrayGetSet<ARR,OFF>>
static void write_uint_grow(uint32_t value,
ARR& array, OFF& offset, OFF& limit,
GFN grow, SET set = SET()) {
assert(limit != 0, "limit required");
const OFF pos = offset;
if (!fits_in_limit(value, pos, limit)) {
grow(MAX_LENGTH); // caller must ensure it somehow fixes array/limit span
assert(pos + MAX_LENGTH <= limit, "should have grown");
}
write_uint(value, array, offset, limit, set);
}
/// Handy state machines for that will help you with reading,
/// sizing, and writing (with optional growth).
// Reader example use:
// struct MyReaderHelper {
// char operator()(char* a, int i) const { return a[i]; }
// };
// using MyReader = UNSIGNED5::Reader<char*, int, MyReaderHelper>;
// MyReader r(array); while (r.has_next()) print(r.next_uint());
template<typename ARR, typename OFF, typename GET = ArrayGetSet<ARR,OFF>>
class Reader {
const ARR _array;
const OFF _limit;
OFF _position;
int next_length() {
return UNSIGNED5::check_length(_array, _position, _limit, GET());
}
public:
Reader(ARR array, OFF limit = 0)
: _array(array), _limit(limit) { _position = 0; }
uint32_t next_uint() {
return UNSIGNED5::read_uint(_array, _position, _limit, GET());
}
bool has_next() {
return next_length() != 0;
}
// tries to skip count logical entries; returns actual number skipped
int try_skip(int count) {
int actual = 0;
while (actual < count && has_next()) {
int len = next_length(); // 0 or length in [1..5]
if (len == 0) break;
_position += len;
}
return actual;
}
ARR array() { return _array; }
OFF limit() { return _limit; }
OFF position() { return _position; }
void set_position(OFF position) { _position = position; }
// For debugging, even in product builds (see debug.cpp).
// Checks and decodes a series of u5 values from the reader.
// Sets position just after the last decoded byte or null byte.
// If this reader has a limit, stop before that limit.
// If this reader has no limit, stop after the first null byte.
// In any case, if count is non-negative, print no more than
// count items (uint32_t values or "null").
// A negative count means we stop only at the limit or null,
// kind of like strlen.
void print(int count = -1) { print_on(tty, count); }
// The character strings are printed before and after the
// series of values (which are separated only by spaces).
// If they are null they default to something like "U5:[ "
// and " ] (values=%d/length=%d)\n".
// The %d formats are for the number of printed items and
// their length in bytes, if you want to see that also.
void print_on(outputStream* st, int count = -1,
const char* left = NULL, const char* right = NULL);
};
// Writer example use
// struct MyWriterHelper {
// char operator()(char* a, int i, char b) const { a[i] = b; }
// };
// using MyWriter = UNSIGNED5::Writer<char*, int, MyWriterHelper>;
// MyWriter w(array);
// for (auto i = ...) w.accept_uint(i);
template<typename ARR, typename OFF, typename SET = ArrayGetSet<ARR,OFF>>
class Writer {
ARR& _array;
OFF* const _limit_ptr;
OFF _position;
public:
Writer(const ARR& array)
: _array(const_cast<ARR&>(array)), _limit_ptr(NULL), _position(0) {
// Note: if _limit_ptr is NULL, the ARR& is never reassigned,
// because has_limit is false. So the const_cast here is safe.
assert(!has_limit(), "this writer cannot be growable");
}
Writer(ARR& array, OFF& limit)
: _array(array), _limit_ptr(&limit), _position(0) {
// Writable array argument can be rewritten by accept_grow.
// So we need a legitimate (non-zero) limit to work with.
// As a result, a writer's initial buffer must not be empty.
assert(this->limit() != 0, "limit required");
}
void accept_uint(uint32_t value) {
const OFF lim = has_limit() ? limit() : 0;
UNSIGNED5::write_uint(value, _array, _position, lim, SET());
}
template<typename GFN>
void accept_grow(uint32_t value, GFN grow) {
assert(has_limit(), "must track growing limit");
UNSIGNED5::write_uint_grow(value, _array, _position, *_limit_ptr,
grow, SET());
}
// Ensure that remaining() >= r, grow if needed. Suggested
// expression for r is (n*MAX_LENGTH)+1, where n is the number of
// values you are about to write.
template<typename GFN>
void ensure_remaining_grow(int request_remaining, GFN grow) {
const OFF have = remaining();
if (have < request_remaining) {
grow(have - request_remaining); // caller must fix array/limit span
assert(remaining() >= request_remaining, "should have grown");
}
}
// use to add a terminating null or other data
void end_byte(uint8_t extra_byte = 0) {
SET()(_array, _position++, extra_byte);
}
ARR array() { return _array; }
OFF position() { return _position; }
void set_position(OFF position) { _position = position; }
bool has_limit() { return _limit_ptr != NULL; }
OFF limit() { assert(has_limit(), "needs limit"); return *_limit_ptr; }
OFF remaining() { return limit() - position(); }
};
// Sizer example use
// UNSIGNED5::Sizer s;
// for (auto i = ...) s.accept_uint(i);
// printf("%d items occupying %d bytes", s.count(), s.position());
// auto buf = new char[s.position() + 1];
// UNSIGNED5::Writer<char*, int> w(buf);
// for (auto i = ...) w.accept_uint(i);
// w.add_byte();
// assert(w.position() == s.position(), "s and w agree");
template<typename OFF = int>
class Sizer {
OFF _position;
int _count;
public:
Sizer() { _position = 0; _count = 0; }
// The accept_uint() API is the same as for Writer, which allows
// templated code to work equally well on sizers and writers.
// This in turn makes it easier to write code which runs a
// sizing preflight pass before actually storing the data.
void accept_uint(uint32_t value) {
_position += encoded_length(value);
_count++;
}
OFF position() { return _position; }
int count() { return _count; }
};
// 32-bit one-to-one sign encoding taken from Pack200
// converts leading sign bits into leading zeroes with trailing sign bit
// use this to better compress 32-bit values that might be negative
static uint32_t encode_sign(int32_t value) { return ((uint32_t)value << 1) ^ (value >> 31); }
static int32_t decode_sign(uint32_t value) { return (value >> 1) ^ -(int32_t)(value & 1); }
template<typename ARR, typename OFF, typename GET = ArrayGetSet<ARR,OFF>>
static OFF print(ARR array, OFF offset = 0, OFF limit = 0,
GET get = GET()) {
print_count(-1, array, offset, limit, get);
}
template<typename ARR, typename OFF, typename GET = ArrayGetSet<ARR,OFF>>
static OFF print_count(int count,
ARR array, OFF offset = 0, OFF limit = 0,
GET get = GET()) {
Reader<ARR,OFF,GET> r(array, offset);
r.print_on(tty, count);
return r.position();
}
};
#endif // SHARE_UTILITIES_UNSIGNED5_HPP

View File

@ -24,7 +24,10 @@
package sun.jvm.hotspot.code;
import java.io.PrintStream;
import sun.jvm.hotspot.debugger.*;
import sun.jvm.hotspot.utilities.*;
public class CompressedReadStream extends CompressedStream {
/** Equivalent to CompressedReadStream(buffer, 0) */
@ -56,16 +59,6 @@ public class CompressedReadStream extends CompressedStream {
return decodeSign(readInt());
}
public int readInt() {
int b0 = read();
if (b0 < L) {
return b0;
} else {
return readIntMb(b0);
}
}
public float readFloat() {
return Float.intBitsToFloat(reverseInt(readInt()));
}
@ -85,41 +78,13 @@ public class CompressedReadStream extends CompressedStream {
}
//--------------------------------------------------------------------------------
// Internals only below this point
//
// This encoding, called UNSIGNED5, is taken from J2SE Pack200.
// It assumes that most values have lots of leading zeroes.
// Very small values, in the range [0..191], code in one byte.
// Any 32-bit value (including negatives) can be coded, in
// up to five bytes. The grammar is:
// low_byte = [0..191]
// high_byte = [192..255]
// any_byte = low_byte | high_byte
// coding = low_byte
// | high_byte low_byte
// | high_byte high_byte low_byte
// | high_byte high_byte high_byte low_byte
// | high_byte high_byte high_byte high_byte any_byte
// Each high_byte contributes six bits of payload.
// The encoding is one-to-one (except for integer overflow)
// and easy to parse and unparse.
private int readIntMb(int b0) {
int pos = position - 1;
int sum = b0;
// must collect more bytes: b[1]...b[4]
int lg_H_i = lg_H;
for (int i = 0; ;) {
int b_i = read(pos + (++i));
sum += b_i << lg_H_i; // sum += b[i]*(64**i)
if (b_i < L || i == MAX_i) {
setPosition(pos+i+1);
return sum;
}
lg_H_i += lg_H;
}
public int readInt() {
// UNSIGNED5::read_uint(_buffer, &_position, limit=0)
return (int) Unsigned5.readUint(this, position,
// bytes are fetched here:
CompressedReadStream::read,
// updated position comes through here:
CompressedReadStream::setPosition);
}
private short read(int index) {
@ -132,4 +97,22 @@ public class CompressedReadStream extends CompressedStream {
++position;
return retval;
}
/**
* Dumps the stream, making an assumption that all items are encoded
* as UNSIGNED5. The sizeLimit argument tells the dumper when to
* stop trying to read bytes; if it is zero, the dumper goes as long
* as it can until it encounters a null byte.
*
* This class mixes UNSIGNED5 with other formats. Stray bytes are
* decoded either as "null" (0x00), one less than the byte value
* (0x01..0xBF) or as part of a spurious multi-byte encoding.
* Proceed with caution.
*/
public void dump() { dumpOn(System.out, 0); }
public void dump(int sizeLimit) { dumpOn(System.out, sizeLimit); }
public void dumpOn(PrintStream tty, int sizeLimit) {
new Unsigned5(buffer, sizeLimit).dumpOn(tty, -1);
}
}

View File

@ -25,6 +25,7 @@
package sun.jvm.hotspot.code;
import sun.jvm.hotspot.debugger.*;
import sun.jvm.hotspot.utilities.*;
/** NOTE that this class takes the address of a buffer. This means
that it can read previously-generated debug information directly
@ -54,12 +55,6 @@ public class CompressedStream {
public static final int LogBitsPerByte = 3;
public static final int BitsPerByte = 1 << 3;
// Constants for UNSIGNED5 coding of Pack200
public static final int lg_H = 6;
public static final int H = 1<<lg_H; // number of high codes (64)
public static final int L = (1<<BitsPerByte) - H; // number of low codes (192)
public static final int MAX_i = 4; // bytes are numbered in (0..4)
// Positioning
public int getPosition() {
return position;
@ -68,24 +63,17 @@ public class CompressedStream {
this.position = position;
}
// 32-bit one-to-one sign encoding taken from Pack200
// converts leading sign bits into leading zeros with trailing sign bit
public int encodeSign(int value) {
return (value << 1) ^ (value >> 31);
return Unsigned5.encodeSign(value);
}
public int decodeSign(int value) {
return (value >>> 1) ^ -(value & 1);
return Unsigned5.decodeSign(value);
}
// 32-bit self-inverse encoding of float bits
// converts trailing zeros (common in floats) to leading zeros
public int reverseInt(int i) {
// Hacker's Delight, Figure 7-1
i = (i & 0x55555555) << 1 | (i >>> 1) & 0x55555555;
i = (i & 0x33333333) << 3 | (i >>> 2) & 0x33333333;
i = (i & 0x0f0f0f0f) << 4 | (i >>> 4) & 0x0f0f0f0f;
i = (i << 24) | ((i & 0xff00) << 8) | ((i >>> 8) & 0xff00) | (i >>> 24);
return i;
return Integer.reverse(i);
}
}

View File

@ -0,0 +1,211 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
package sun.jvm.hotspot.utilities;
import java.io.PrintStream;
import sun.jvm.hotspot.debugger.*;
/**
* Decompression algorithm from utilities/unsigned5.hpp.
*/
public class Unsigned5 {
public static final int LogBitsPerByte = 3;
public static final int BitsPerByte = 1 << 3;
// Constants for UNSIGNED5 coding of Pack200
private static final int lg_H = 6; // log-base-2 of H (lg 64 == 6)
private static final int H = 1<<lg_H; // number of "high" bytes (64)
private static final int X = 1; // there is one excluded byte ('\0')
private static final int MAX_b = (1<<BitsPerByte)-1; // largest byte value
private static final int L = (MAX_b+1)-X-H; // number of "low" bytes (191)
public static final int MAX_LENGTH = 5; // lengths are in [1..5]
// Note: Previous versions of HotSpot used X=0 (not 1) and L=192 (not 191)
//
// Using this SA code on old versions of HotSpot, or older SA code
// on newer versions of HotSpot, will decode compressed data
// wrongly. One might consider using vmStructs to communicate this
// particular change between the SA and VM, but it is mostly futile.
// There are a myriad of new changes in any version of HotSpot. You
// have to use the right SA and VM versions together.
public interface GetByte<ARR> {
short getByte(ARR array, int position);
}
public interface SetPosition<ARR> {
void setPosition(ARR array, int position);
}
// UNSIGNED5::read_uint(_buffer, &_position, limit=0)
// In C++ this is a generic algorithm, templated with "holes"
// for array (ARR), offset (OFF), and fetch behavior (GET).
// In addition, the position is updated by reference.
// Let us mimic these conditions with two lambdas, both
// on the ARR parameter. We will hardwire the position
// type (OFF) to int (sorry, not long), and omit the extra
// limit feature.
public static
<ARR> long readUint(ARR base, int position,
GetByte<ARR> getByte,
SetPosition<ARR> setPosition) {
int pos = position;
int b_0 = getByte.getByte(base, pos);
int sum = b_0 - X;
// VM throws assert if b0<X; we just return -1 here instead
if (sum < L) { // common case
setPosition.setPosition(base, pos+1);
return Integer.toUnsignedLong(sum);
}
// must collect more bytes: b[1]...b[4]
int lg_H_i = lg_H; // lg(H)*i == lg(H^^i)
for (int i = 1; ; i++) { // for i in [1..4]
int b_i = getByte.getByte(base, pos + i);
if (b_i < X) { // avoid excluded bytes
// VM throws assert here; should not happen
setPosition.setPosition(base, pos+i); // do not consume the bad byte
return Integer.toUnsignedLong(sum); // return whatever we have parsed so far
}
sum += (b_i - X) << lg_H_i; // sum += (b[i]-X)*(64^^i)
if (b_i < X+L || i == MAX_LENGTH-1) {
setPosition.setPosition(base, pos+i+1);
return Integer.toUnsignedLong(sum);
}
lg_H_i += lg_H;
}
}
// 32-bit one-to-one sign encoding taken from Pack200
// converts leading sign bits into leading zeros with trailing sign bit
// uint32_t encode_sign(int32_t value)
public static int encodeSign(int value) {
return (value << 1) ^ (value >> 31);
}
// int32_t decode_sign(uint32_t value)
public static int decodeSign(int value) {
return (value >>> 1) ^ -(value & 1);
}
//--------------------------------------------------------------------------------
// constructor and instance methods for convenience
// You can read and print a stream directly from memory if you like.
// First wrap these up, then call read or print.
private final Address base;
private final int limit;
// There is no C++ instance of UNSIGNED5 but it seems useful to
// allow this class to serve as a holder for an address and optional
// limit, to point at a place where U5 encodings might be stored.
// Compare with Unsigned5::Reader(ARR array, OFF limit = 0).
public Unsigned5(Address base) {
this(base, 0); // limit=0 means unlimited (proceed with caution)
}
public Unsigned5(Address base, int limit) {
this.base = base;
this.limit = limit;
}
public Address base() { return base; }
public short getByte(int pos) {
return (short) base.getCIntegerAt(pos, 1, true);
}
// An UNSIGNED5::Reader gadget has a settable, auto-incremented
// position field and can read through a stream of encoded values.
// Java can model this as an inner class: var r = myU5.new Reader()
// or var r = new Unsigned5(myaddr).new Reader()
public class Reader {
private int position = 0; // this is for Unsigned5::Reader behavior
public int position() { return position; }
public void setPosition(int pos) { position = pos; }
// UNSIGNED5::Reader::next_uint
public long nextUint() {
if (!hasNext()) return -1;
return readUint(this, position, Reader::getByte, Reader::setPosition);
}
// UNSIGNED5::Reader::has_next
public boolean hasNext() { return Unsigned5.this.hasNext(position); }
// delegate reads to outer object:
private short getByte(int pos) { return Unsigned5.this.getByte(pos); }
}
// UNSIGNED5::read_uint (no position update)
public long readUint(int pos) {
if (!hasNext(pos)) return -1;
return readUint(this, pos, Unsigned5::getByte, (a,i)->{});
}
private boolean hasNext(int pos) {
// 1. there must be a non-excluded byte at the read position
// 2. the position must be less than any non-zero limit
return ((X == 0 || getByte(pos) >= X) &&
(limit == 0 || pos < limit));
}
// debug.cpp: u5decode(intptr_t addr)
public void print() {
printOn(System.out);
}
public void printOn(PrintStream tty) {
tty.print("U5 " + readUint(0) + ", ");
}
// debug.cpp: u5p(intptr_t addr, intptr_t limit, int count)
// check and decode a series of u5 values
// return the address after the last decoded byte
// if limit is non-zero stop before limit
// if count is non-negative stop when count is reached
// if count is negative stop on null (works kind of like strlen)
public void dumpOn(PrintStream tty, int count) {
Reader r = new Reader();
int printed = 0;
tty.print("U5: [");
for (;;) {
if (count >= 0 && printed >= count) break;
if (!r.hasNext()) {
if ((r.position < limit || limit == 0) && getByte(r.position) == 0) {
tty.print(" null");
++r.position; // skip null byte
++printed;
if (limit != 0) continue; // keep going to explicit limit
}
break;
}
int value = (int) r.nextUint();
tty.print(" ");
tty.print(value);
++printed;
}
tty.println(" ] (values=" + printed + "/length=" + r.position + ")");
}
public void dump(int count) {
dumpOn(System.out, count);
}
public void dump() {
// dump as many as possible, up to any nonzero limit
dumpOn(System.out, -1);
}
}

View File

@ -0,0 +1,303 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include "precompiled.hpp"
#include "memory/allocation.hpp"
#include "utilities/unsigned5.hpp"
#include "unittest.hpp"
TEST_VM(unsigned5, max_encoded_in_length) {
int maxlen = UNSIGNED5::MAX_LENGTH;
EXPECT_EQ(maxlen, 5);
for (int i = 0; i <= 190; i++) {
uint32_t interesting = i;
EXPECT_EQ(UNSIGNED5::encoded_length(interesting), 1);
EXPECT_EQ(UNSIGNED5::encoded_length(~interesting), maxlen);
}
for (int len = 1; len <= maxlen; len++) {
uint32_t interesting = UNSIGNED5::max_encoded_in_length(len);
EXPECT_EQ(UNSIGNED5::encoded_length(interesting-1), len);
EXPECT_EQ(UNSIGNED5::encoded_length(interesting), len);
if (len < 5) {
EXPECT_EQ(UNSIGNED5::encoded_length(interesting+1), len+1);
EXPECT_EQ(UNSIGNED5::encoded_length(interesting*2), len+1);
}
const int offset = -123;
const int good_limit = offset + len;
const int bad_limit = good_limit - 1;
EXPECT_TRUE(UNSIGNED5::fits_in_limit(interesting, offset, good_limit));
EXPECT_TRUE(!UNSIGNED5::fits_in_limit(interesting, offset, bad_limit));
}
}
// Call FN on a nice list of "interesting" uint32_t values to encode/decode.
// For each length in [1..5], the maximum encodable value of that
// length is "interesting", as are one more and one less than that
// value. For each nybble (aligned 4-bit field) of a uint32_t, each
// possible value (in [0..15]) stored in that nybble is "interesting".
// Also "interesting" are some other values created by perturbing
// lower bits of that nybble-bearing number, by subtracting a power
// of -7 (up to -7^7). That makes just over 1000 distinct numbers.
//
// Calls to this function are repeatable, so you can call it to pack
// an output array, and then call it again to read an input array
// verifying that the retrieved values match the stored ones.
template<typename FN>
inline int enumerate_cases(FN fn) {
// boundary values around the maximum encoded in each byte-length
for (int len = 1; len <= 5; len++) {
uint32_t interesting = UNSIGNED5::max_encoded_in_length(len);
int res = fn(interesting-1);
if (res) return res;
res = fn(interesting);
if (res) return res;
if (interesting < (uint32_t)-1) {
res = fn(interesting+1);
if (res) return res;
}
}
// for each nybble, for each value in the nybble
for (uint32_t npos = 0; npos < 32; npos += 4) {
for (uint32_t nval = 0; nval <= 15; nval++) {
uint32_t interesting = nval << npos;
int res = fn(interesting);
if (res) return res;
// mix in some crazy-looking values: powers of -7 to -7^7
for (int pon7 = 1; pon7 < 1000000; pon7 *= -7) {
uint32_t interesting2 = interesting - pon7;
res = fn(interesting2);
if (res) return res;
}
}
}
return 0;
}
TEST_VM(unsigned5, transcode_single) {
const int limit = UNSIGNED5::MAX_LENGTH;
u_char buffer[limit + 1];
auto each_case = [&](uint32_t value) -> uint32_t {
//printf("case %08X len=%d\n", value, UNSIGNED5::encoded_length(value));
int offset = 0;
UNSIGNED5::write_uint(value, buffer, offset, limit);
int length = offset;
EXPECT_TRUE(length <= UNSIGNED5::MAX_LENGTH);
EXPECT_EQ(length, UNSIGNED5::encoded_length(value)) << "for value=" << value;
buffer[length] = 0;
offset = 0;
uint32_t check = UNSIGNED5::read_uint(buffer, offset, limit);
EXPECT_EQ(offset, length) << "for value=" << value;
EXPECT_EQ(value, check);
return 0;
};
auto z = enumerate_cases(each_case);
EXPECT_TRUE(!z);
}
static int count_cases() {
int case_count = 0;
auto inc_case_count = [&](uint32_t){ ++case_count; return 0; };
enumerate_cases(inc_case_count);
return case_count;
}
TEST_VM(unsigned5, transcode_multiple) {
int case_count = count_cases();
const int limit = 200;
ASSERT_TRUE(limit < case_count*UNSIGNED5::MAX_LENGTH);
u_char buffer[limit + 1];
//printf("%d cases total\n", case_count); //1166 cases total
for (int sublimit = limit - 20; sublimit < limit; sublimit++) {
int offset = 0;
int count = 0;
// write each number into an array
auto write_case = [&](uint32_t value) -> uint32_t {
if (!UNSIGNED5::fits_in_limit(value, offset, sublimit))
return value|1;
UNSIGNED5::write_uint(value, buffer, offset, sublimit);
count++;
return 0;
};
auto done = enumerate_cases(write_case);
EXPECT_TRUE(done) << "must have hit the sublimit";
EXPECT_TRUE(count < case_count);
int length = offset;
EXPECT_TRUE(length <= sublimit && length + UNSIGNED5::MAX_LENGTH > sublimit)
<< "length=" << length << " sublimit=" << sublimit;
for (int i = length; i <= sublimit; i++) {
buffer[i] = 0;
}
if (sublimit == limit-1) {
UNSIGNED5::print_count(case_count + 1, &buffer[0], sublimit);
}
//printf("encoded %d values in %d bytes: [[%s]]\n", count, length, buffer);
// now read it all back
offset = 0;
int count2 = 0;
auto read_back_case = [&](uint32_t value) -> uint32_t {
int clen = UNSIGNED5::check_length(buffer, offset, sublimit);
if (clen == 0) return value|1;
EXPECT_EQ(clen, UNSIGNED5::encoded_length(value));
int begin = offset;
uint32_t check = UNSIGNED5::read_uint(buffer, offset, sublimit);
EXPECT_EQ(offset, begin + clen);
EXPECT_EQ(value, check);
count2++;
return 0;
};
auto done2 = enumerate_cases(read_back_case);
EXPECT_EQ(done, done2);
EXPECT_EQ(count, count2);
EXPECT_EQ(offset, length);
}
}
inline void init_ints(int len, int* ints) {
for (int i = 0; i < len; i++) {
ints[i] = (i * ((i&2) ? i : 1001)) ^ -(i & 1);
}
}
struct MyReaderHelper {
uint8_t operator()(char* a, int i) const { return a[i]; }
};
using MyReader = UNSIGNED5::Reader<char*, int, MyReaderHelper>;
TEST_VM(unsigned5, reader) {
const int LEN = 100;
int ints[LEN];
init_ints(LEN, ints);
int i;
UNSIGNED5::Sizer<> szr;
for (i = 0; i < LEN; i++) {
szr.accept_uint(ints[i]);
}
//printf("count=%d, size=%d\n", szr.count(), szr.position());
char buf[LEN * UNSIGNED5::MAX_LENGTH + 1];
int buflen;
{
int pos = 0;
for (int i = 0; i < LEN; i++) {
UNSIGNED5::write_uint(ints[i], buf, pos, 0);
}
EXPECT_TRUE(pos+1 < (int)sizeof(buf)) << pos;
buflen = pos;
buf[buflen] = 0;
}
EXPECT_EQ(szr.position(), buflen);
MyReader r1(buf);
i = 0;
while (r1.has_next()) {
int x = r1.next_uint();
int y = ints[i++];
ASSERT_EQ(x, y) << i;
}
ASSERT_EQ(i, LEN);
MyReader r2(buf, buflen / 2);
i = 0;
while (r2.has_next()) {
int x = r2.next_uint();
int y = ints[i++];
ASSERT_EQ(x, y) << i;
}
ASSERT_TRUE(i < LEN);
// copy from reader to writer
UNSIGNED5::Reader<char*,int> r3(buf);
int array_limit = 1;
char* array = new char[array_limit + 1];
auto array_grow = [&](int){
array[array_limit] = 0;
auto oal = array_limit;
array_limit += 10;
//printf("growing array from %d to %d\n", oal, array_limit);
auto na = new char[array_limit + 1];
strcpy(na, array);
array = na;
};
UNSIGNED5::Writer<char*,int> w3(array, array_limit);
while (r3.has_next()) {
w3.accept_grow(r3.next_uint(), array_grow);
}
w3.end_byte(); // we always allocated one more than the limit!
std::string buf_s(buf, buflen);
std::string arr_s(array, strlen(array));
ASSERT_EQ(buf_s, arr_s);
// try printing:
{
char stbuf[1000];
stringStream st(stbuf, sizeof(stbuf)-1);
UNSIGNED5::Reader<char*,int> printer(buf);
printer.print_on(&st, 4, "(", ")");
std::string st_s(st.base(), st.size());
char buf2[sizeof(stbuf)];
sprintf(buf2, "(%d %d %d %d)", ints[0], ints[1], ints[2], ints[3]);
std::string exp_s(buf2, strlen(buf2));
ASSERT_EQ(exp_s, st_s);
}
}
// Here is some object code to look at if we want to do a manual
// study. One could find the build file named test_unsigned5.o.cmdline
// and hand-edit the command line to produce assembly code in
// test_unsigned5.s.
//
// Or, given the two empty "fence functions", one could do a
// quick scan like this:
//
// $ objdump -D $(find build/*release -name test_unsigned5.o) \
// | sed -n /start_code_quality/,/end_code_quality/p \
// | egrep -B10 bswap # or grep -B20 cfi_endproc
void start_code_quality_unsigned5() { }
uint32_t code_quality_max_encoded_in_length(int i) {
return UNSIGNED5::max_encoded_in_length(i); // should compile like 5-switch
}
int code_quality_encoded_length(uint32_t x) {
return UNSIGNED5::encoded_length(x); // should compile to 4-way comparison
}
int code_quality_check_length(char* a) {
return UNSIGNED5::check_length(a, 0); // should compile with fast-path
}
int code_quality_read_int(char* a) {
int i = 0;
return UNSIGNED5::read_uint(a, i, 0); // should compile with fast-path
}
int code_quality_int_reader(char* a) {
MyReader r1(a);
if (!r1.has_next()) return -1;
return r1.next_uint();
}
int code_quality_int_sizer(int* a, int n) {
UNSIGNED5::Sizer<> s;
for (int i = 0; i < n; i++) s.accept_uint(a[i]);
return s.position();
}
void end_code_quality_unsigned5() { }