From 7bfc7eaec67d35ae8f13f4fe858889420a2d972e Mon Sep 17 00:00:00 2001 From: Roman Musin <995612+roman-r-m@users.noreply.github.com> Date: Sun, 21 Jan 2024 17:01:23 +0000 Subject: [PATCH] Reduce allocations and heap size (#525) * Reduce allocations * Shrink the heap size * Calculate hash when reading name (50-100ms difference) * no need to reverse bytes * bump heap size --- calculate_average_roman-r-m.sh | 3 +- .../onebrc/CalculateAverage_roman_r_m.java | 51 +++++++++++-------- 2 files changed, 31 insertions(+), 23 deletions(-) diff --git a/calculate_average_roman-r-m.sh b/calculate_average_roman-r-m.sh index fe468dc..b5d0b3d 100755 --- a/calculate_average_roman-r-m.sh +++ b/calculate_average_roman-r-m.sh @@ -19,7 +19,6 @@ JAVA_OPTS="--enable-preview -XX:+UseTransparentHugePages" # epsilon GC needs enough memory or it makes things worse # see https://stackoverflow.com/questions/58087596/why-are-repeated-memory-allocations-observed-to-be-slower-using-epsilon-vs-g1 -# 2GB seems to be the sweet spot -JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:-EnableJVMCI -XX:+UseEpsilonGC -Xmx2G -Xms2G -XX:+AlwaysPreTouch" +JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:-EnableJVMCI -XX:+UseEpsilonGC -Xmx1G -Xms1G -XX:+AlwaysPreTouch" java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_roman_r_m diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java b/src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java index a7df56e..1a43ae5 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java @@ -82,19 +82,30 @@ public class CalculateAverage_roman_r_m { private void parseName(ByteString station) { long start = offset; - long pattern; long next = UNSAFE.getLong(offset); - while ((pattern = applyPattern(next, SEMICOLON_MASK)) == 0) { - offset += 8; - next = UNSAFE.getLong(offset); + long pattern = applyPattern(next, SEMICOLON_MASK); + int bytes; + if (pattern != 0) { + bytes = Long.numberOfTrailingZeros(pattern) / 8; + offset += bytes; + long h = Long.reverseBytes(next) >>> (8 * (8 - bytes)); + station.hash = (int) (h ^ (h >>> 32)); + } + else { + long h = next; + station.hash = (int) (h ^ (h >>> 32)); + while (pattern == 0) { + offset += 8; + next = UNSAFE.getLong(offset); + pattern = applyPattern(next, SEMICOLON_MASK); + } + bytes = Long.numberOfTrailingZeros(pattern) / 8; + offset += bytes; } - int bytes = Long.numberOfTrailingZeros(pattern) / 8; - offset += bytes; int len = (int) (offset - start); station.offset = start; station.len = len; - station.hash = 0; station.tail = next & ((1L << (8 * bytes)) - 1); offset++; @@ -215,11 +226,9 @@ public class CalculateAverage_roman_r_m { this.ms = ms; } - @Override - public String toString() { - var bytes = new byte[len]; - UNSAFE.copyMemory(null, offset, bytes, Unsafe.ARRAY_BYTE_BASE_OFFSET, len); - return new String(bytes, 0, len); + public String asString(byte[] reusable) { + UNSAFE.copyMemory(null, offset, reusable, Unsafe.ARRAY_BYTE_BASE_OFFSET, len); + return new String(reusable, 0, len); } public ByteString copy() { @@ -243,9 +252,7 @@ public class CalculateAverage_roman_r_m { if (len != that.len) return false; - int i = 0; - - for (; i + 7 < len; i += 8) { + for (int i = 0; i + 7 < len; i += 8) { long l1 = UNSAFE.getLong(offset + i); long l2 = UNSAFE.getLong(that.offset + i); if (l1 != l2) { @@ -257,13 +264,14 @@ public class CalculateAverage_roman_r_m { @Override public int hashCode() { - if (hash == 0) { - long h = UNSAFE.getLong(offset); - h = Long.reverseBytes(h) >>> (8 * Math.max(0, 8 - len)); - hash = (int) (h ^ (h >>> 32)); - } return hash; } + + @Override + public String toString() { + byte[] buf = new byte[100]; + return asString(buf); + } } private static final class ResultRow { @@ -318,10 +326,11 @@ public class CalculateAverage_roman_r_m { } TreeMap toMap() { + byte[] buf = new byte[100]; var result = new TreeMap(); for (int i = 0; i < SIZE; i++) { if (keys[i] != null) { - result.put(keys[i].toString(), values[i]); + result.put(keys[i].asString(buf), values[i]); } } return result;