First version - roman_r_m (#193)

* initial commit * - use loop - use mutable object to store results * get rid of regex * Do not allocate measurement objects * MMap + custom double parsing ~ 1:30 (down from ~ 2:05) * HashMap for accumulation and only sort at the end - 1:05 * MMap the whole file * Use graal * no GC * Store results in an array list to avoid double map lookup * Adjust max buf size * Manual parsing number to long * Add --enable-preview * remove buffer size check (has no effect on performance) * fix min & max initialization * do not check for \r * Revert "do not check for \r" This reverts commit 9da1f574bf6261ea49c353488d3b4673cad3ce6e. * Optimise parsing. Now completes in 31 sec down from ~43 * trying to parse numbers faster * use open address hash table instead of the standard HashMap * formatting * Rename the script to match github username (change underscores to slashes) Enable transparent huge pages, seems to improve by ~2 sec * Revert "formatting" This reverts commit 4e90797d2a729ed7385c9000c85cc7e87d935f96. * Revert "use open address hash table instead of the standard HashMap" This reverts commit c784b55f61e48f548b2623e5c8958c9b283cae14. * add prepare_roman-r-m.sh * SWAR tricks to find semicolon (-2 seconds ro run time) * remove time call * fix test * Parallel version (~6.5 seconds)
2024-01-11 11:29:08 +00:00
parent 52c490cc24
commit 4b3f959812
3 changed files with 318 additions and 0 deletions
--- a/calculate_average_roman-r-m.sh
+++ b/calculate_average_roman-r-m.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+#
+#  Copyright 2023 The original authors
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+JAVA_OPTS="--enable-preview -XX:+UseTransparentHugePages"
+java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_roman_r_m
--- a/prepare_roman-r-m.sh
+++ b/prepare_roman-r-m.sh
@@ -0,0 +1,19 @@
+#!/bin/sh
+#
+#  Copyright 2023 The original authors
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+#
+
+source "$HOME/.sdkman/bin/sdkman-init.sh"
+sdk use java 21.0.1-graal 1>&2
--- a/src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java
+++ b/src/main/java/dev/morling/onebrc/CalculateAverage_roman_r_m.java
@@ -0,0 +1,280 @@
+/*
+ *  Copyright 2023 The original authors
+ *
+ *  Licensed under the Apache License, Version 2.0 (the "License");
+ *  you may not use this file except in compliance with the License.
+ *  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+package dev.morling.onebrc;
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.foreign.Arena;
+import java.lang.foreign.MemorySegment;
+import java.lang.foreign.ValueLayout;
+import java.nio.channels.FileChannel;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.stream.IntStream;
+
+public class CalculateAverage_roman_r_m {
+
+    public static final int DOT_3_RD_BYTE_MASK = (byte) '.' << 16;
+    private static final String FILE = "./measurements.txt";
+    private static MemorySegment ms;
+
+    // based on http://0x80.pl/notesen/2023-03-06-swar-find-any.html
+    static long hasZeroByte(long l) {
+        return ((l - 0x0101010101010101L) & ~(l) & 0x8080808080808080L);
+    }
+
+    static long firstSetByteIndex(long l) {
+        return ((((l - 1) & 0x101010101010101L) * 0x101010101010101L) >> 56) - 1;
+    }
+
+    static long broadcast(byte b) {
+        return 0x101010101010101L * b;
+    }
+
+    static long SEMICOLON_MASK = broadcast((byte) ';');
+    static long LINE_END_MASK = broadcast((byte) '\n');
+
+    static long find(long l, long mask) {
+        long xor = l ^ mask;
+        long match = hasZeroByte(xor);
+        return match != 0 ? firstSetByteIndex(match) : -1;
+    }
+
+    static long nextNewline(long from) {
+        long start = from;
+        long i;
+        long next = ms.get(ValueLayout.JAVA_LONG_UNALIGNED, start);
+        while ((i = find(next, LINE_END_MASK)) < 0) {
+            start += 8;
+            next = ms.get(ValueLayout.JAVA_LONG_UNALIGNED, start);
+        }
+        return start + i;
+    }
+
+    public static void main(String[] args) throws IOException {
+        long fileSize = new File(FILE).length();
+
+        var channel = FileChannel.open(Paths.get(FILE));
+        ms = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.ofAuto());
+
+        int numThreads = fileSize > Integer.MAX_VALUE ? Runtime.getRuntime().availableProcessors() : 1;
+        long chunk = fileSize / numThreads;
+        var result = IntStream.range(0, numThreads)
+                .parallel()
+                .mapToObj(i -> {
+                    boolean lastChunk = i == numThreads - 1;
+                    long chunkStart = i == 0 ? 0 : nextNewline(i * chunk) + 1;
+                    long chunkEnd = lastChunk ? fileSize : nextNewline((i + 1) * chunk);
+
+                    var resultStore = new ResultStore();
+                    var station = new ByteString();
+
+                    long offset = chunkStart;
+                    while (offset < chunkEnd) {
+                        long start = offset;
+                        long pos;
+
+                        if (!lastChunk || chunkEnd - offset >= 8) {
+                            long next = ms.get(ValueLayout.JAVA_LONG_UNALIGNED, offset);
+
+                            while ((pos = find(next, SEMICOLON_MASK)) < 0) {
+                                offset += 8;
+                                if (!lastChunk || fileSize - offset >= 8) {
+                                    next = ms.get(ValueLayout.JAVA_LONG_UNALIGNED, offset);
+                                }
+                                else {
+                                    while (ms.get(ValueLayout.JAVA_BYTE, offset + pos) != ';') {
+                                        pos++;
+                                    }
+                                    break;
+                                }
+                            }
+                        }
+                        else {
+                            pos = 0;
+                            while (ms.get(ValueLayout.JAVA_BYTE, offset + pos) != ';') {
+                                pos++;
+                            }
+                        }
+                        offset += pos;
+                        int len = (int) (offset - start);
+                        // TODO can we not copy and use a reference into the memory segment to perform table lookup?
+                        MemorySegment.copy(ms, ValueLayout.JAVA_BYTE, start, station.buf, 0, len);
+                        station.len = len;
+                        station.hash = 0;
+
+                        offset++;
+
+                        long val;
+                        boolean neg;
+                        if (!lastChunk || fileSize - offset >= 8) {
+                            long encodedVal = ms.get(ValueLayout.JAVA_LONG_UNALIGNED, offset);
+                            neg = (encodedVal & (byte) '-') == (byte) '-';
+                            if (neg) {
+                                encodedVal >>= 8;
+                                offset++;
+                            }
+
+                            if ((encodedVal & DOT_3_RD_BYTE_MASK) == DOT_3_RD_BYTE_MASK) {
+                                val = (encodedVal & 0xFF - 0x30) * 100 + (encodedVal >> 8 & 0xFF - 0x30) * 10 + (encodedVal >> 24 & 0xFF - 0x30);
+                                offset += 5;
+                            }
+                            else {
+                                // based on http://0x80.pl/articles/simd-parsing-int-sequences.html#parsing-and-conversion-of-signed-numbers
+                                val = Long.compress(encodedVal, 0xFF00FFL) - 0x303030;
+                                val = ((val * 2561) >> 8) & 0xff;
+                                offset += 4;
+                            }
+                        }
+                        else {
+                            neg = ms.get(ValueLayout.JAVA_BYTE, offset) == '-';
+                            if (neg) {
+                                offset++;
+                            }
+                            val = ms.get(ValueLayout.JAVA_BYTE, offset++) - '0';
+                            byte b;
+                            while ((b = ms.get(ValueLayout.JAVA_BYTE, offset++)) != '.') {
+                                val = val * 10 + (b - '0');
+                            }
+                            b = ms.get(ValueLayout.JAVA_BYTE, offset);
+                            val = val * 10 + (b - '0');
+                            offset += 2;
+                        }
+
+                        if (neg) {
+                            val = -val;
+                        }
+
+                        var a = resultStore.get(station);
+                        a.min = Math.min(a.min, val);
+                        a.max = Math.max(a.max, val);
+                        a.sum += val;
+                        a.count++;
+                    }
+                    return resultStore.toMap();
+                }).reduce((m1, m2) -> {
+                    m2.forEach((k, v) -> m1.merge(k, v, ResultRow::merge));
+                    return m1;
+                });
+
+        System.out.println(result.get());
+    }
+
+    static final class ByteString {
+
+        private byte[] buf = new byte[100];
+        private int len = 0;
+        private int hash = 0;
+
+        @Override
+        public String toString() {
+            return new String(buf, 0, len);
+        }
+
+        public ByteString copy() {
+            var copy = new ByteString();
+            copy.len = this.len;
+            copy.hash = this.hash;
+            if (copy.buf.length < this.buf.length) {
+                copy.buf = new byte[this.buf.length];
+            }
+            System.arraycopy(this.buf, 0, copy.buf, 0, this.len);
+            return copy;
+        }
+
+        @Override
+        public boolean equals(Object o) {
+            if (this == o)
+                return true;
+            if (o == null || getClass() != o.getClass())
+                return false;
+
+            ByteString that = (ByteString) o;
+
+            if (len != that.len)
+                return false;
+
+            // TODO use Vector
+            for (int i = 0; i < len; i++) {
+                if (buf[i] != that.buf[i]) {
+                    return false;
+                }
+            }
+
+            return true;
+        }
+
+        @Override
+        public int hashCode() {
+            if (hash == 0) {
+                for (int i = 0; i < len; i++) {
+                    hash = 31 * hash + (buf[i] & 255);
+                }
+            }
+            return hash;
+        }
+    }
+
+    private static final class ResultRow {
+        long min = 1000;
+        long sum = 0;
+        long max = -1000;
+        int count = 0;
+
+        public String toString() {
+            return round(min / 10.0) + "/" + round(sum / 10.0 / count) + "/" + round(max / 10.0);
+        }
+
+        private double round(double value) {
+            return Math.round(value * 10.0) / 10.0;
+        }
+
+        public ResultRow merge(ResultRow other) {
+            this.min = Math.min(this.min, other.min);
+            this.max = Math.max(this.max, other.max);
+            this.sum += other.sum;
+            this.count += other.count;
+            return this;
+        }
+    }
+
+    static class ResultStore {
+        private final ArrayList<ResultRow> results = new ArrayList<>(10000);
+        private final Map<ByteString, Integer> indices = new HashMap<>(10000);
+
+        ResultRow get(ByteString s) {
+            var idx = indices.get(s);
+            if (idx != null) {
+                return results.get(idx);
+            }
+            else {
+                ResultRow next = new ResultRow();
+                results.add(next);
+                indices.put(s.copy(), results.size() - 1);
+                return next;
+            }
+        }
+
+        TreeMap<String, ResultRow> toMap() {
+            var result = new TreeMap<String, ResultRow>();
+            indices.forEach((name, idx) -> result.put(name.toString(), results.get(idx)));
+            return result;
+        }
+    }
+}