Implement imperative state machine for floating point parser rather then generic, adaptive loop.

This commit is contained in:
Elliot Barlas 2024-01-05 07:08:31 -08:00 committed by Gunnar Morling
parent a1a9a19324
commit 99b453334c
2 changed files with 36 additions and 28 deletions

View File

@ -15,6 +15,7 @@
# limitations under the License. # limitations under the License.
# #
source "$HOME/.sdkman/bin/sdkman-init.sh"
sdk use java 21.0.1-graalce sdk use java 21.0.1-graalce
JAVA_OPTS="" JAVA_OPTS=""
time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_ebarlas measurements.txt 8 time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_ebarlas measurements.txt 8

View File

@ -16,6 +16,7 @@
package dev.morling.onebrc; package dev.morling.onebrc;
import java.io.IOException; import java.io.IOException;
import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets; import java.nio.charset.StandardCharsets;
@ -27,7 +28,7 @@ import java.util.TreeMap;
public class CalculateAverage_ebarlas { public class CalculateAverage_ebarlas {
private static final int MAX_KEY_SIZE = 100; private static final int MAX_KEY_SIZE = 100 * 4; // max 4 bytes per UTF-8 char
private static final int HASH_FACTOR = 433; private static final int HASH_FACTOR = 433;
private static final int HASH_TBL_SIZE = 16_383; // range of allowed hash values, inclusive private static final int HASH_TBL_SIZE = 16_383; // range of allowed hash values, inclusive
@ -140,57 +141,63 @@ public class CalculateAverage_ebarlas {
private static Partition doProcessBuffer(ByteBuffer buffer, boolean first, Stats[] stats) { private static Partition doProcessBuffer(ByteBuffer buffer, boolean first, Stats[] stats) {
var header = first ? null : readHeader(buffer); var header = first ? null : readHeader(buffer);
var readingKey = true; // reading key or value? var keyStart = reallyDoProcessBuffer(buffer, stats);
var footer = keyStart < buffer.position() ? readFooter(buffer, keyStart) : null;
return new Partition(header, footer, stats);
}
private static int reallyDoProcessBuffer(ByteBuffer buffer, Stats[] stats) {
var keyBuf = new byte[MAX_KEY_SIZE]; // buffer for key var keyBuf = new byte[MAX_KEY_SIZE]; // buffer for key
var keyPos = 0; // current position in key buffer var keyPos = 0; // current position in key buffer
var keyHash = 0; // accumulating hash of key var keyHash = 0; // accumulating hash of key
var keyStart = buffer.position(); // start of key in buffer used for footer calc var keyStart = buffer.position(); // start of key in buffer used for footer calc
var negative = false; // is value negative? try { // abort with exception to avoid hasRemaining() calls
var val = 0; // accumulating value while (true) {
Stats st = null; var b = buffer.get();
while (buffer.hasRemaining()) {
var b = buffer.get();
if (readingKey) {
if (b != ';') { if (b != ';') {
keyHash = HASH_FACTOR * keyHash + b; keyHash = HASH_FACTOR * keyHash + b;
keyBuf[keyPos++] = b; keyBuf[keyPos++] = b;
} }
else { else {
var idx = keyHash & HASH_TBL_SIZE; var idx = keyHash & HASH_TBL_SIZE;
st = stats[idx]; var st = stats[idx];
if (st == null) { // nothing in table, eagerly claim spot if (st == null) { // nothing in table, eagerly claim spot
st = stats[idx] = newStats(keyBuf, keyPos, keyHash); st = stats[idx] = newStats(keyBuf, keyPos, keyHash);
} }
else if (!Arrays.equals(st.key, 0, st.key.length, keyBuf, 0, keyPos)) { else if (!Arrays.equals(st.key, 0, st.key.length, keyBuf, 0, keyPos)) {
st = findInTable(stats, keyHash, keyBuf, keyPos); st = findInTable(stats, keyHash, keyBuf, keyPos);
} }
readingKey = false; var negative = false;
} b = buffer.get(); // digit or dash
} if (b == '-') {
else { negative = true;
if (b == '\n') { b = buffer.get(); // digit after neg
}
var val = b - '0';
b = buffer.get(); // second digit or decimal
if (b != '.') {
val = val * 10 + (b - '0');
buffer.get(); // decimal
}
val = val * 10 + (buffer.get() - '0'); // digit after decimal
buffer.get(); // newline
var v = negative ? -val : val; var v = negative ? -val : val;
st.min = Math.min(st.min, v); st.min = Math.min(st.min, v);
st.max = Math.max(st.max, v); st.max = Math.max(st.max, v);
st.sum += v; st.sum += v;
st.count++; st.count++;
readingKey = true; keyStart = buffer.position(); // preserve line start
keyHash = 0; b = buffer.get(); // first byte of key
val = 0; keyHash = b;
negative = false; keyBuf[0] = b;
keyStart = buffer.position(); keyPos = 1;
keyPos = 0;
}
else if (b == '-') {
negative = true;
}
else if (b != '.') { // skip '.' since fractional tenth unit after decimal point is assumed
val = val * 10 + (b - '0');
} }
} }
} }
var footer = keyStart < buffer.position() ? readFooter(buffer, keyStart) : null; catch (BufferUnderflowException ignore) {
return new Partition(header, footer, stats);
}
return keyStart;
} }
private static Stats findInTable(Stats[] stats, int hash, byte[] key, int len) { // open-addressing scan private static Stats findInTable(Stats[] stats, int hash, byte[] key, int len) { // open-addressing scan