Continue unrolling and inlining value parser. Make targeted use of ByteBuffer.getInt() instead of ByteBuffer.get(). Switch from GraalVM CE to GraalVM. (#201)
This commit is contained in:
parent
aa0395d01b
commit
c13997c9e0
@ -16,6 +16,6 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
source "$HOME/.sdkman/bin/sdkman-init.sh"
|
source "$HOME/.sdkman/bin/sdkman-init.sh"
|
||||||
sdk use java 21.0.1-graalce 1>&2
|
sdk use java 21.0.1-graal 1>&2
|
||||||
JAVA_OPTS=""
|
JAVA_OPTS=""
|
||||||
time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_ebarlas measurements.txt 8
|
time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_ebarlas measurements.txt 8
|
||||||
|
@ -18,6 +18,7 @@ package dev.morling.onebrc;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.BufferUnderflowException;
|
import java.nio.BufferUnderflowException;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.ByteOrder;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
@ -28,7 +29,7 @@ import java.util.TreeMap;
|
|||||||
|
|
||||||
public class CalculateAverage_ebarlas {
|
public class CalculateAverage_ebarlas {
|
||||||
|
|
||||||
private static final int MAX_KEY_SIZE = 100 * 4; // max 4 bytes per UTF-8 char
|
private static final int MAX_KEY_SIZE = 100;
|
||||||
private static final int HASH_FACTOR = 433;
|
private static final int HASH_FACTOR = 433;
|
||||||
private static final int HASH_TBL_SIZE = 16_383; // range of allowed hash values, inclusive
|
private static final int HASH_TBL_SIZE = 16_383; // range of allowed hash values, inclusive
|
||||||
|
|
||||||
@ -52,7 +53,7 @@ public class CalculateAverage_ebarlas {
|
|||||||
var pSize = pEnd - pStart;
|
var pSize = pEnd - pStart;
|
||||||
Runnable r = () -> {
|
Runnable r = () -> {
|
||||||
try {
|
try {
|
||||||
var buffer = channel.map(FileChannel.MapMode.READ_ONLY, pStart, pSize);
|
var buffer = channel.map(FileChannel.MapMode.READ_ONLY, pStart, pSize).order(ByteOrder.LITTLE_ENDIAN);
|
||||||
partitions[pIdx] = processBuffer(buffer, pIdx == 0);
|
partitions[pIdx] = processBuffer(buffer, pIdx == 0);
|
||||||
}
|
}
|
||||||
catch (IOException e) {
|
catch (IOException e) {
|
||||||
@ -113,7 +114,7 @@ public class CalculateAverage_ebarlas {
|
|||||||
var merged = mergeFooterAndHeader(pPrev.footer, pNext.header);
|
var merged = mergeFooterAndHeader(pPrev.footer, pNext.header);
|
||||||
if (merged != null) {
|
if (merged != null) {
|
||||||
if (merged[merged.length - 1] == '\n') { // fold into prev partition
|
if (merged[merged.length - 1] == '\n') { // fold into prev partition
|
||||||
doProcessBuffer(ByteBuffer.wrap(merged), true, pPrev.stats);
|
doProcessBuffer(ByteBuffer.wrap(merged).order(ByteOrder.LITTLE_ENDIAN), true, pPrev.stats);
|
||||||
}
|
}
|
||||||
else { // no newline appeared in partition, carry forward
|
else { // no newline appeared in partition, carry forward
|
||||||
pNext.footer = merged;
|
pNext.footer = merged;
|
||||||
@ -142,23 +143,42 @@ public class CalculateAverage_ebarlas {
|
|||||||
private static Partition doProcessBuffer(ByteBuffer buffer, boolean first, Stats[] stats) {
|
private static Partition doProcessBuffer(ByteBuffer buffer, boolean first, Stats[] stats) {
|
||||||
var header = first ? null : readHeader(buffer);
|
var header = first ? null : readHeader(buffer);
|
||||||
var keyStart = reallyDoProcessBuffer(buffer, stats);
|
var keyStart = reallyDoProcessBuffer(buffer, stats);
|
||||||
var footer = keyStart < buffer.position() ? readFooter(buffer, keyStart) : null;
|
var footer = keyStart < buffer.limit() ? readFooter(buffer, keyStart) : null;
|
||||||
return new Partition(header, footer, stats);
|
return new Partition(header, footer, stats);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int reallyDoProcessBuffer(ByteBuffer buffer, Stats[] stats) {
|
private static int reallyDoProcessBuffer(ByteBuffer buffer, Stats[] stats) {
|
||||||
var keyBuf = new byte[MAX_KEY_SIZE]; // buffer for key
|
var keyBuf = new byte[MAX_KEY_SIZE]; // buffer for key
|
||||||
var keyPos = 0; // current position in key buffer
|
int keyStart = 0; // start of key in buffer used for footer calc
|
||||||
var keyHash = 0; // accumulating hash of key
|
try { // abort with exception to allow optimistic line processing
|
||||||
var keyStart = buffer.position(); // start of key in buffer used for footer calc
|
while (true) { // one line per iteration
|
||||||
try { // abort with exception to avoid hasRemaining() calls
|
keyStart = buffer.position(); // preserve line start
|
||||||
while (true) {
|
int n = buffer.getInt(); // first four bytes of key
|
||||||
var b = buffer.get();
|
byte b1 = (byte) (n & 0xFF);
|
||||||
if (b != ';') {
|
byte b2 = (byte) ((n >> 8) & 0xFF);
|
||||||
|
byte b3 = (byte) ((n >> 16) & 0xFF);
|
||||||
|
byte b = (byte) ((n >> 24) & 0xFF);
|
||||||
|
int keyPos;
|
||||||
|
int keyHash = keyBuf[0] = b1;
|
||||||
|
if (b2 != ';' && b3 != ';') { // true for keys of length 3 or more
|
||||||
|
keyBuf[1] = b2;
|
||||||
|
keyBuf[2] = b3;
|
||||||
|
keyHash = HASH_FACTOR * (HASH_FACTOR * keyHash + b2) + b3;
|
||||||
|
keyPos = 3;
|
||||||
|
while (b != ';') {
|
||||||
|
keyHash = HASH_FACTOR * keyHash + b;
|
||||||
|
keyBuf[keyPos++] = b;
|
||||||
|
b = buffer.get();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else { // slow path, rewind and consume byte-by-byte
|
||||||
|
buffer.position(keyStart + 1);
|
||||||
|
keyPos = 1;
|
||||||
|
while ((b = buffer.get()) != ';') {
|
||||||
keyHash = HASH_FACTOR * keyHash + b;
|
keyHash = HASH_FACTOR * keyHash + b;
|
||||||
keyBuf[keyPos++] = b;
|
keyBuf[keyPos++] = b;
|
||||||
}
|
}
|
||||||
else {
|
}
|
||||||
var idx = keyHash & HASH_TBL_SIZE;
|
var idx = keyHash & HASH_TBL_SIZE;
|
||||||
var st = stats[idx];
|
var st = stats[idx];
|
||||||
if (st == null) { // nothing in table, eagerly claim spot
|
if (st == null) { // nothing in table, eagerly claim spot
|
||||||
@ -167,31 +187,46 @@ public class CalculateAverage_ebarlas {
|
|||||||
else if (!Arrays.equals(st.key, 0, st.key.length, keyBuf, 0, keyPos)) {
|
else if (!Arrays.equals(st.key, 0, st.key.length, keyBuf, 0, keyPos)) {
|
||||||
st = findInTable(stats, keyHash, keyBuf, keyPos);
|
st = findInTable(stats, keyHash, keyBuf, keyPos);
|
||||||
}
|
}
|
||||||
var negative = false;
|
var value = buffer.getInt();
|
||||||
b = buffer.get(); // digit or dash
|
b = (byte) (value & 0xFF); // digit or dash
|
||||||
if (b == '-') {
|
int val;
|
||||||
negative = true;
|
if (b == '-') { // dash branch
|
||||||
b = buffer.get(); // digit after neg
|
val = ((byte) ((value >> 8) & 0xFF)) - '0'; // digit after dash
|
||||||
|
b = (byte) ((value >> 16) & 0xFF); // second digit or decimal
|
||||||
|
if (b != '.') { // second digit
|
||||||
|
val = val * 10 + (b - '0'); // calc second digit
|
||||||
|
// skip decimal (at >> 24)
|
||||||
|
b = buffer.get(); // digit after decimal
|
||||||
|
val = val * 10 + (b - '0'); // calc digit after decimal
|
||||||
}
|
}
|
||||||
var val = b - '0';
|
else { // decimal branch
|
||||||
b = buffer.get(); // second digit or decimal
|
// skip decimal (at >> 16)
|
||||||
if (b != '.') {
|
b = (byte) ((value >> 24) & 0xFF); // digit after decimal
|
||||||
val = val * 10 + (b - '0');
|
val = val * 10 + (b - '0'); // calc digit after decimal
|
||||||
buffer.get(); // decimal
|
|
||||||
}
|
}
|
||||||
val = val * 10 + (buffer.get() - '0'); // digit after decimal
|
|
||||||
buffer.get(); // newline
|
buffer.get(); // newline
|
||||||
var v = negative ? -val : val;
|
val = -val;
|
||||||
st.min = Math.min(st.min, v);
|
|
||||||
st.max = Math.max(st.max, v);
|
|
||||||
st.sum += v;
|
|
||||||
st.count++;
|
|
||||||
keyStart = buffer.position(); // preserve line start
|
|
||||||
b = buffer.get(); // first byte of key
|
|
||||||
keyHash = b;
|
|
||||||
keyBuf[0] = b;
|
|
||||||
keyPos = 1;
|
|
||||||
}
|
}
|
||||||
|
else { // first digit branch
|
||||||
|
val = b - '0'; // calc first digit
|
||||||
|
b = (byte) ((value >> 8) & 0xFF); // second digit or decimal
|
||||||
|
if (b != '.') { // second digit branch
|
||||||
|
val = val * 10 + (b - '0'); // calc second digit
|
||||||
|
// skip decimal (at >> 16)
|
||||||
|
b = (byte) ((value >> 24) & 0xFF); // digit after decimal
|
||||||
|
val = val * 10 + (b - '0'); // calc digit after decimal
|
||||||
|
buffer.get(); // newline
|
||||||
|
}
|
||||||
|
else { // decimal branch
|
||||||
|
b = (byte) ((value >> 16) & 0xFF); // digit after decimal
|
||||||
|
val = val * 10 + (b - '0'); // calc digit after decimal
|
||||||
|
// skip newline (at >> 24)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
st.min = Math.min(st.min, val);
|
||||||
|
st.max = Math.max(st.max, val);
|
||||||
|
st.sum += val;
|
||||||
|
st.count++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
catch (BufferUnderflowException ignore) {
|
catch (BufferUnderflowException ignore) {
|
||||||
@ -220,7 +255,7 @@ public class CalculateAverage_ebarlas {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static byte[] readFooter(ByteBuffer buffer, int lineStart) { // read from line start to current pos (end-of-input)
|
private static byte[] readFooter(ByteBuffer buffer, int lineStart) { // read from line start to current pos (end-of-input)
|
||||||
var footer = new byte[buffer.position() - lineStart];
|
var footer = new byte[buffer.limit() - lineStart];
|
||||||
buffer.get(lineStart, footer, 0, footer.length);
|
buffer.get(lineStart, footer, 0, footer.length);
|
||||||
return footer;
|
return footer;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user