Next version (#596)

* cleanup prepare script * native image options * fix quardaric probing (no change to perf) * mask to get the last chunk of the name * extract hash functions * tweak the probing loop (-100ms) * fiddle with native image options * Reorder conditions in hope it makes branch predictor happier * extracted constant
2024-01-27 14:17:55 +00:00
parent c228633b57
commit f9c58414da
3 changed files with 60 additions and 43 deletions
--- a/calculate_average_roman-r-m.sh
+++ b/calculate_average_roman-r-m.sh
@@ -15,21 +15,16 @@
 #  limitations under the License.
 #
 JAVA_OPTS="--enable-preview -XX:+UseTransparentHugePages"
 # epsilon GC needs enough memory or it makes things worse
 # see https://stackoverflow.com/questions/58087596/why-are-repeated-memory-allocations-observed-to-be-slower-using-epsilon-vs-g1
 JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:-EnableJVMCI -XX:+UseEpsilonGC -Xmx1G -Xms1G -XX:+AlwaysPreTouch"
 if [ -f target/CalculateAverage_roman_r_m_image ]; then
-    echo "Picking up existing native image 'target/CalculateAverage_roman_r_m_image', delete the file to select JVM mode." 1>&2
+    echo "Running native image 'target/CalculateAverage_roman_r_m_image'." 1>&2
    target/CalculateAverage_roman_r_m_image
 else
-    JAVA_OPTS="--enable-preview -XX:+UnlockExperimentalVMOptions -XX:+TrustFinalNonStaticFields -dsa -XX:+UseNUMA"
+    JAVA_OPTS="--enable-preview -XX:+UseTransparentHugePages"
-    if [[ ! "$(uname -s)" = "Darwin" ]]; then
+    JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+TrustFinalNonStaticFields -dsa -XX:+UseNUMA"
-        # On OS/X, my machine, this errors:
+    # epsilon GC needs enough memory or it makes things worse
-        JAVA_OPTS="$JAVA_OPTS -XX:+UseTransparentHugePages"
+    # see https://stackoverflow.com/questions/58087596/why-are-repeated-memory-allocations-observed-to-be-slower-using-epsilon-vs-g1
-    fi
+    JAVA_OPTS="$JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:-EnableJVMCI -XX:+UseEpsilonGC -Xmx1G -Xms1G -XX:+AlwaysPreTouch"
-    echo "Choosing to run the app in JVM mode as no native image was found, use additional_build_step_roman_r_m.sh to generate." 1>&2
+
    echo "Running on JVM" 1>&2
    java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_roman_r_m
 fi
--- a/prepare_roman-r-m.sh
+++ b/prepare_roman-r-m.sh
@@ -23,6 +23,8 @@ if [ ! -f target/CalculateAverage_roman_r_m_image ]; then
    JAVA_OPTS="--enable-preview -dsa"
    NATIVE_IMAGE_OPTS="--initialize-at-build-time=dev.morling.onebrc.CalculateAverage_roman_r_m --gc=epsilon -Ob -O3 -march=native --strict-image-heap $JAVA_OPTS"
    NATIVE_IMAGE_OPTS="$NATIVE_IMAGE_OPTS -R:MaxHeapSize=128m"
    NATIVE_IMAGE_OPTS="$NATIVE_IMAGE_OPTS -H:+UnlockExperimentalVMOptions -H:-GenLoopSafepoints -H:InlineAllBonus=10 -H:-ParseRuntimeOptions"
    native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_roman_r_m_image dev.morling.onebrc.CalculateAverage_roman_r_m
 fi
--- a/src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java
+++ b/src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java
@@ -40,6 +40,7 @@ public class CalculateAverage_roman_r_m {
    private static final long SEMICOLON_MASK = broadcast((byte) ';');
    private static final long LINE_END_MASK = broadcast((byte) '\n');
    private static final long DOT_MASK = broadcast((byte) '.');
    private static final long ZEROES_MASK = broadcast((byte) '0');
    // from netty
@@ -64,6 +65,15 @@ public class CalculateAverage_roman_r_m {
        return start + Long.numberOfTrailingZeros(i) / 8;
    }
    static int hashFull(long word) {
        return (int) (word ^ (word >>> 32));
    }
    static int hashPartial(long word, int bytes) {
        long h = Long.reverseBytes(word) >>> (8 * (8 - bytes));
        return (int) (h ^ (h >>> 32));
    }
    public static void main(String[] args) throws Exception {
        Field f = Unsafe.class.getDeclaredField("theUnsafe");
        f.setAccessible(true);
@@ -96,34 +106,37 @@ public class CalculateAverage_roman_r_m {
                        var station = new ByteString(segment);
                        long offset = segment.address();
                        long end = offset + segment.byteSize();
                        long tailMask;
                        while (offset < end) {
                            // parsing station name
                            long start = offset;
                            long next = UNSAFE.getLong(offset);
                            long pattern = applyPattern(next, SEMICOLON_MASK);
                            int bytes;
-                            if (pattern != 0) {
+                            if (pattern == 0) {
-                                bytes = Long.numberOfTrailingZeros(pattern) / 8;
+                                station.hash = hashFull(next);
-                                offset += bytes;
+                                do {
                                long h = Long.reverseBytes(next) >>> (8 * (8 - bytes));
                                station.hash = (int) (h ^ (h >>> 32));
                            }
                            else {
                                long h = next;
                                station.hash = (int) (h ^ (h >>> 32));
                                while (pattern == 0) {
                                    offset += 8;
                                    next = UNSAFE.getLong(offset);
                                    pattern = applyPattern(next, SEMICOLON_MASK);
-                                }
+                                } while (pattern == 0);
                                bytes = Long.numberOfTrailingZeros(pattern) / 8;
                                offset += bytes;
                                tailMask = ((1L << (8 * bytes)) - 1);
                            }
                            else {
                                bytes = Long.numberOfTrailingZeros(pattern) / 8;
                                offset += bytes;
                                tailMask = ((1L << (8 * bytes)) - 1);
                                station.hash = hashPartial(next, bytes);
                            }
                            int len = (int) (offset - start);
                            station.offset = start;
                            station.len = len;
-                            station.tail = next & ((1L << (8 * bytes)) - 1);
+                            station.tail = next & tailMask;
                            offset++;
@@ -140,7 +153,7 @@ public class CalculateAverage_roman_r_m {
                                long numLen = applyPattern(encodedVal, DOT_MASK);
                                numLen = Long.numberOfTrailingZeros(numLen) / 8;
-                                encodedVal ^= broadcast((byte) 0x30);
+                                encodedVal ^= ZEROES_MASK;
                                int intPart = (int) (encodedVal & ((1 << (8 * numLen)) - 1));
                                intPart <<= 8 * (2 - numLen);
@@ -285,24 +298,31 @@ public class CalculateAverage_roman_r_m {
            int h = s.hashCode();
            int idx = (SIZE - 1) & h;
            var keys = this.keys;
            int idx0 = idx;
            int i = 0;
-            while (keys[idx] != null && !keys[idx].equals(s)) {
+            while (true) {
-                i++;
+                if (keys[idx] != null && keys[idx].equals(s)) {
-                idx = (idx + i * i) % SIZE;
+                    values[idx][0] = Math.min(values[idx][0], value);
-            }
+                    values[idx][1] = Math.max(values[idx][1], value);
-            if (keys[idx] == null) {
+                    values[idx][2] += value;
-                keys[idx] = s.copy();
+                    values[idx][3] += 1;
-                values[idx] = new int[4];
+                    return;
-                values[idx][0] = value;
+                }
-                values[idx][1] = value;
+                else if (keys[idx] == null) {
-                values[idx][2] = value;
+                    keys[idx] = s.copy();
-                values[idx][3] = 1;
+                    values[idx] = new int[4];
-            }
+                    values[idx][0] = value;
-            else {
+                    values[idx][1] = value;
-                values[idx][0] = Math.min(values[idx][0], value);
+                    values[idx][2] = value;
-                values[idx][1] = Math.max(values[idx][1], value);
+                    values[idx][3] = 1;
-                values[idx][2] += value;
+                    return;
-                values[idx][3] += 1;
+                }
                else {
                    i++;
                    idx = (idx0 + i * i) % SIZE;
                }
            }
        }