diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_richardstartin.java b/src/main/java/dev/morling/onebrc/CalculateAverage_richardstartin.java index c7f430b..911ecf9 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_richardstartin.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_richardstartin.java @@ -52,6 +52,26 @@ public class CalculateAverage_richardstartin { return new String(bytes, StandardCharsets.UTF_8); } + static double parseTemperature(ByteBuffer slice) { + // credit: adapted from spullara's submission + int value = 0; + int negative = 1; + int i = 0; + while (i != slice.limit()) { + byte b = slice.get(i++); + switch (b) { + case '-': + negative = -1; + case '.': + break; + default: + value = 10 * value + (b - '0'); + } + } + value *= negative; + return value / 10.0; + } + @FunctionalInterface interface IndexedStringConsumer { void accept(String value, int index); @@ -60,7 +80,7 @@ public class CalculateAverage_richardstartin { /** Maps text to an integer encoding. Adapted from async-profiler. */ public static class Dictionary { - private static final int ROW_BITS = 7; + private static final int ROW_BITS = 12; private static final int ROWS = (1 << ROW_BITS); private static final int CELLS = 3; private static final int TABLE_CAPACITY = (ROWS * CELLS); @@ -90,10 +110,10 @@ public class CalculateAverage_richardstartin { forEach(this.table, consumer); } - public int encode(long hash, ByteBuffer slice) { + public int encode(int hash, ByteBuffer slice) { Table table = this.table; while (true) { - int rowIndex = (int)(Math.abs(hash) % ROWS); + int rowIndex = Math.abs(hash) % ROWS; Row row = table.rows[rowIndex]; for (int c = 0; c < CELLS; c++) { ByteBuffer storedKey = row.keys.get(c); @@ -111,7 +131,7 @@ public class CalculateAverage_richardstartin { } } table = row.getOrCreateNextTable(this::nextBaseIndex); - hash = Long.rotateRight(hash, ROW_BITS); + hash = Integer.rotateRight(hash, ROW_BITS); } } @@ -207,43 +227,6 @@ public class CalculateAverage_richardstartin { return buffer.limit(); } - private static long hash(ByteBuffer slice) { - long hash = slice.limit() + PRIME_5 + 0x123456789abcdef1L; - int i = 0; - for (; i + Long.BYTES < slice.limit(); i += Long.BYTES) { - hash = hashLong(hash, slice.getLong(i)); - } - long part = 0L; - for (; i < slice.limit(); i++) { - part = (part >>> 8) | ((slice.get(i) & 0xFFL) << 56); - } - hash = hashLong(hash, part); - return mix(hash); - } - - static final long PRIME_1 = 0x9E3779B185EBCA87L; - static final long PRIME_2 = 0xC2B2AE3D27D4EB4FL; - static final long PRIME_3 = 0x165667B19E3779F9L; - static final long PRIME_4 = 0x85EBCA77C2B2AE63L; - static final long PRIME_5 = 0x27D4EB2F165667C5L; - - private static long hashLong(long hash, long k) { - k *= PRIME_2; - k = Long.rotateLeft(k, 31); - k *= PRIME_1; - hash ^= k; - return Long.rotateLeft(hash, 27) * PRIME_1 + PRIME_4; - } - - private static long mix(long hash) { - hash ^= hash >>> 33; - hash *= PRIME_2; - hash ^= hash >>> 29; - hash *= PRIME_3; - hash ^= hash >>> 32; - return hash; - } - static class Page { static final int PAGE_SIZE = 1024; @@ -311,16 +294,12 @@ public class CalculateAverage_richardstartin { ByteBuffer key = slice.slice(offset, nextSeparator - offset).order(ByteOrder.LITTLE_ENDIAN); // find the global dictionary code to aggregate, // making this code global allows easy merging - int dictId = dictionary.encode(hash(key), key); + int dictId = dictionary.encode(key.hashCode(), key); offset = nextSeparator + 1; int newLine = findIndexOf(slice, offset, NEW_LINE); // parse the double - // todo do this without allocating a string, could use a fast parsing falgorithm - var bytes = new byte[newLine - offset]; - slice.get(offset, bytes); - var string = new String(bytes, StandardCharsets.US_ASCII); - double d = Double.parseDouble(string); + double d = parseTemperature(slice.slice(offset, newLine - offset)); Page.update(pages, dictId, d); @@ -351,7 +330,7 @@ public class CalculateAverage_richardstartin { protected double[][] compute() { if (min == max) { // fixme - hardcoded to problem size - var pages = new double[1024][]; + var pages = new double[600][]; var slice = slices.get(min); computeSlice(slice, pages); return pages; @@ -368,7 +347,7 @@ public class CalculateAverage_richardstartin { } public static void main(String[] args) throws IOException { - int maxChunkSize = 10 << 20; // 10MiB + int maxChunkSize = 250 << 20; // 250MiB try (var raf = new RandomAccessFile(FILE, "r"); var channel = raf.getChannel()) { long size = channel.size();