Adjust rolling hash function to operate at int-scale rather than byte-scale. Ensure 8-byte alignment in key buffer for faster comparisons. (#523)
This commit is contained in:
parent
d135bd9521
commit
47269cf30b
@ -32,11 +32,10 @@ public class CalculateAverage_ebarlas {
|
|||||||
|
|
||||||
private static final Arena ARENA = Arena.global();
|
private static final Arena ARENA = Arena.global();
|
||||||
|
|
||||||
private static final int MAX_KEY_SIZE = 100;
|
private static final int MAX_KEY_SIZE = 104; // 4 additional bytes to allow for single-int overflow due to padding
|
||||||
private static final int MAX_VAL_SIZE = 5; // -dd.d
|
private static final int MAX_VAL_SIZE = 5; // -dd.d
|
||||||
private static final int MAX_LINE_SIZE = MAX_KEY_SIZE + MAX_VAL_SIZE + 2; // key, semicolon, val, newline
|
private static final int MAX_LINE_SIZE = MAX_KEY_SIZE + MAX_VAL_SIZE + 2; // key, semicolon, val, newline
|
||||||
private static final int HASH_FACTOR = 433;
|
private static final int HASH_TBL_SIZE = 131_071; // range of allowed hash values, inclusive
|
||||||
private static final int HASH_TBL_SIZE = 32_767; // range of allowed hash values, inclusive
|
|
||||||
|
|
||||||
private static final Unsafe UNSAFE = makeUnsafe();
|
private static final Unsafe UNSAFE = makeUnsafe();
|
||||||
|
|
||||||
@ -182,7 +181,6 @@ public class CalculateAverage_ebarlas {
|
|||||||
long lineStart = cursor; // start of key in segment used for footer calc
|
long lineStart = cursor; // start of key in segment used for footer calc
|
||||||
long limit = ms.address() + (complete ? ms.byteSize() : ms.byteSize() - MAX_LINE_SIZE); // stop short of longest line, sweep up at the end
|
long limit = ms.address() + (complete ? ms.byteSize() : ms.byteSize() - MAX_LINE_SIZE); // stop short of longest line, sweep up at the end
|
||||||
while (cursor < limit) { // one line per iteration
|
while (cursor < limit) { // one line per iteration
|
||||||
lineStart = cursor; // preserve line start
|
|
||||||
int keyHash = 0; // key hash code
|
int keyHash = 0; // key hash code
|
||||||
long keyAddr = keyBaseAddr; // address for next int
|
long keyAddr = keyBaseAddr; // address for next int
|
||||||
int keyArrLen = 0; // number of key 4-byte ints
|
int keyArrLen = 0; // number of key 4-byte ints
|
||||||
@ -196,6 +194,7 @@ public class CalculateAverage_ebarlas {
|
|||||||
b2 = (byte) ((n >> 16) & 0xFF);
|
b2 = (byte) ((n >> 16) & 0xFF);
|
||||||
b3 = (byte) ((n >> 24) & 0xFF);
|
b3 = (byte) ((n >> 24) & 0xFF);
|
||||||
if (b0 == ';') { // ...;1.1
|
if (b0 == ';') { // ...;1.1
|
||||||
|
UNSAFE.putInt(keyAddr, 0); // always pad with extra int to facilitate 8-byte aligned comparisons
|
||||||
keyLastBytes = 4;
|
keyLastBytes = 4;
|
||||||
b0 = b1;
|
b0 = b1;
|
||||||
b1 = b2;
|
b1 = b2;
|
||||||
@ -205,10 +204,10 @@ public class CalculateAverage_ebarlas {
|
|||||||
}
|
}
|
||||||
else if (b1 == ';') { // ...a;1.1
|
else if (b1 == ';') { // ...a;1.1
|
||||||
int k = n & 0xFF;
|
int k = n & 0xFF;
|
||||||
UNSAFE.putInt(keyAddr, k);
|
UNSAFE.putLong(keyAddr, k); // pad with extra int for comparison alignment
|
||||||
keyLastBytes = 1;
|
keyLastBytes = 1;
|
||||||
keyArrLen++;
|
keyArrLen++;
|
||||||
keyHash = HASH_FACTOR * keyHash + b0;
|
keyHash += k;
|
||||||
b0 = b2;
|
b0 = b2;
|
||||||
b1 = b3;
|
b1 = b3;
|
||||||
b2 = (byte) (UNSAFE.getByte(cursor++) & 0xFF);
|
b2 = (byte) (UNSAFE.getByte(cursor++) & 0xFF);
|
||||||
@ -217,10 +216,10 @@ public class CalculateAverage_ebarlas {
|
|||||||
}
|
}
|
||||||
else if (b2 == ';') { // ...ab;1.1
|
else if (b2 == ';') { // ...ab;1.1
|
||||||
int k = n & 0xFFFF;
|
int k = n & 0xFFFF;
|
||||||
UNSAFE.putInt(keyAddr, k);
|
UNSAFE.putLong(keyAddr, k); // pad with extra int for comparison alignment
|
||||||
keyLastBytes = 2;
|
keyLastBytes = 2;
|
||||||
keyArrLen++;
|
keyArrLen++;
|
||||||
keyHash = HASH_FACTOR * (HASH_FACTOR * keyHash + b0) + b1;
|
keyHash += k;
|
||||||
b0 = b3;
|
b0 = b3;
|
||||||
b1 = (byte) (UNSAFE.getByte(cursor++) & 0xFF);
|
b1 = (byte) (UNSAFE.getByte(cursor++) & 0xFF);
|
||||||
b2 = (byte) (UNSAFE.getByte(cursor++) & 0xFF);
|
b2 = (byte) (UNSAFE.getByte(cursor++) & 0xFF);
|
||||||
@ -229,10 +228,10 @@ public class CalculateAverage_ebarlas {
|
|||||||
}
|
}
|
||||||
else if (b3 == ';') { // ...abc;1.1
|
else if (b3 == ';') { // ...abc;1.1
|
||||||
int k = n & 0xFFFFFF;
|
int k = n & 0xFFFFFF;
|
||||||
UNSAFE.putInt(keyAddr, k);
|
UNSAFE.putLong(keyAddr, k); // pad with extra int for comparison alignment
|
||||||
keyLastBytes = 3;
|
keyLastBytes = 3;
|
||||||
keyArrLen++;
|
keyArrLen++;
|
||||||
keyHash = HASH_FACTOR * (HASH_FACTOR * (HASH_FACTOR * keyHash + b0) + b1) + b2;
|
keyHash += k;
|
||||||
n = UNSAFE.getInt(cursor);
|
n = UNSAFE.getInt(cursor);
|
||||||
cursor += 4;
|
cursor += 4;
|
||||||
b0 = (byte) (n & 0xFF);
|
b0 = (byte) (n & 0xFF);
|
||||||
@ -245,9 +244,10 @@ public class CalculateAverage_ebarlas {
|
|||||||
UNSAFE.putInt(keyAddr, n);
|
UNSAFE.putInt(keyAddr, n);
|
||||||
keyArrLen++;
|
keyArrLen++;
|
||||||
keyAddr += 4;
|
keyAddr += 4;
|
||||||
keyHash = HASH_FACTOR * (HASH_FACTOR * (HASH_FACTOR * (HASH_FACTOR * keyHash + b0) + b1) + b2) + b3;
|
keyHash += n;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
keyHash ^= keyHash >>> 13;
|
||||||
var idx = keyHash & HASH_TBL_SIZE;
|
var idx = keyHash & HASH_TBL_SIZE;
|
||||||
var st = stats[idx];
|
var st = stats[idx];
|
||||||
if (st == null) { // nothing in table, eagerly claim spot
|
if (st == null) { // nothing in table, eagerly claim spot
|
||||||
@ -281,6 +281,7 @@ public class CalculateAverage_ebarlas {
|
|||||||
st.max = Math.max(st.max, val);
|
st.max = Math.max(st.max, val);
|
||||||
st.sum += val;
|
st.sum += val;
|
||||||
st.count++;
|
st.count++;
|
||||||
|
lineStart = cursor; // preserve line start
|
||||||
}
|
}
|
||||||
return lineStart - ms.address();
|
return lineStart - ms.address();
|
||||||
}
|
}
|
||||||
@ -289,21 +290,15 @@ public class CalculateAverage_ebarlas {
|
|||||||
if (len1 != len2) {
|
if (len1 != len2) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (len1 == 2) {
|
if (len1 <= 2) {
|
||||||
return UNSAFE.getLong(key1) == UNSAFE.getLong(key2);
|
return UNSAFE.getLong(key1) == UNSAFE.getLong(key2);
|
||||||
}
|
}
|
||||||
if (len1 == 3) {
|
if (len1 <= 4) {
|
||||||
return UNSAFE.getLong(key1) == UNSAFE.getLong(key2) && UNSAFE.getInt(key1 + 8) == UNSAFE.getInt(key2 + 8);
|
|
||||||
}
|
|
||||||
if (len1 == 1) {
|
|
||||||
return UNSAFE.getInt(key1) == UNSAFE.getInt(key2);
|
|
||||||
}
|
|
||||||
if (len1 == 4) {
|
|
||||||
return UNSAFE.getLong(key1) == UNSAFE.getLong(key2) && UNSAFE.getLong(key1 + 8) == UNSAFE.getLong(key2 + 8);
|
return UNSAFE.getLong(key1) == UNSAFE.getLong(key2) && UNSAFE.getLong(key1 + 8) == UNSAFE.getLong(key2 + 8);
|
||||||
}
|
}
|
||||||
for (int i = 0; i < len1; i++) {
|
for (int i = 0; i < len1; i += 2) {
|
||||||
var offset = i << 2;
|
var offset = i << 2;
|
||||||
if (UNSAFE.getInt(key1 + offset) != UNSAFE.getInt(key2 + offset)) {
|
if (UNSAFE.getLong(key1 + offset) != UNSAFE.getLong(key2 + offset)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -324,7 +319,7 @@ public class CalculateAverage_ebarlas {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static Stats newStats(long keyAddr, int keyLen, int keyLastBytes, int hash) {
|
private static Stats newStats(long keyAddr, int keyLen, int keyLastBytes, int hash) {
|
||||||
var bytes = keyLen << 2;
|
var bytes = (keyLen + 1) << 2; // include overflow chunk
|
||||||
long k = UNSAFE.allocateMemory(bytes);
|
long k = UNSAFE.allocateMemory(bytes);
|
||||||
UNSAFE.copyMemory(keyAddr, k, bytes);
|
UNSAFE.copyMemory(keyAddr, k, bytes);
|
||||||
return new Stats(k, keyLen, keyLastBytes, hash);
|
return new Stats(k, keyLen, keyLastBytes, hash);
|
||||||
|
Loading…
Reference in New Issue
Block a user