diff --git a/calculate_average_gamlerhart.sh b/calculate_average_gamlerhart.sh new file mode 100755 index 0000000..c52a25b --- /dev/null +++ b/calculate_average_gamlerhart.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview --add-modules=jdk.incubator.vector" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_gamlerhart diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_gamlerhart.java b/src/main/java/dev/morling/onebrc/CalculateAverage_gamlerhart.java new file mode 100644 index 0000000..e4398d4 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_gamlerhart.java @@ -0,0 +1,270 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import jdk.incubator.vector.*; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.lang.foreign.ValueLayout; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.util.TreeMap; + +import static java.lang.Double.doubleToRawLongBits; +import static java.lang.Double.longBitsToDouble; +import static java.lang.foreign.ValueLayout.*; + +/** + * Broad experiments in this implementation: + * - Memory-Map the file with new MemorySegments + * - Use SIMD/vectorized search for the semicolon and new line feeds + * - Use SIMD/vectorized comparison for the 'key' + *

+ * Absolute stupid things / performance left on the table + * - Single Threaded! Multi threading planned. + * - The hash map/table is super basic. + * - Hash table implementation / hashing has no resizing and is quite basic + * - Zero time spend on profiling =) + *

+ *

+ * Cheats used: + * - Only works with Unix line feed \n + * - double parsing is only accepting XX.X and X.X + * - HashMap has no resizing, check, horrible hash etc. + * - Used the double parsing from yemreinci + */ +public class CalculateAverage_gamlerhart { + + private static final String FILE = "./measurements.txt"; + final static VectorSpecies byteVec = ByteVector.SPECIES_PREFERRED; + final static Vector zero = byteVec.zero(); + final static int vecLen = byteVec.length(); + final static Vector semiColon = byteVec.broadcast(';'); + final static VectorMask allTrue = byteVec.maskAll(true); + final static ValueLayout.OfInt INT_UNALIGNED_BIG_ENDIAN = ValueLayout.JAVA_INT_UNALIGNED.withOrder(ByteOrder.BIG_ENDIAN); + + public static void main(String[] args) throws Exception { + try (var arena = Arena.ofConfined(); + FileChannel fc = FileChannel.open(Path.of(FILE))) { + var map = new PrivateHashMap(); + long fileSize = fc.size(); + MemorySegment fileContent = fc.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, arena); + + var loopBound = byteVec.loopBound(fileSize) - vecLen; + for (long i = 0; i < fileSize;) { + long nameStart = i; + int simdSearchEnd = 0; + int nameLen = 0; + // Vectorized Search + if (i < loopBound) { + do { + var vec = byteVec.fromMemorySegment(fileContent, i, ByteOrder.BIG_ENDIAN); + var hasSemi = vec.eq(semiColon); + simdSearchEnd = hasSemi.firstTrue(); + i += simdSearchEnd; + nameLen += simdSearchEnd; + } while (simdSearchEnd == vecLen && i < loopBound); + } + // Left-over search + while (loopBound <= i && fileContent.get(JAVA_BYTE, i) != ';') { + nameLen++; + i++; + } + i++; // Consume ; + // Copied from yemreinci. I mostly wanted to experiment the vector math, not with parsing =) + double val; + { + boolean negative = false; + if ((fileContent.get(JAVA_BYTE, i)) == '-') { + negative = true; + i++; + } + byte b; + double temp; + if ((b = fileContent.get(JAVA_BYTE, i + 1)) == '.') { // temperature is in either XX.X or X.X form + temp = (fileContent.get(JAVA_BYTE, i) - '0') + (fileContent.get(JAVA_BYTE, i + 2) - '0') / 10.0; + i += 3; + } + else { + temp = (fileContent.get(JAVA_BYTE, i) - '0') * 10 + (b - '0') + + (fileContent.get(JAVA_BYTE, i + 3) - '0') / 10.0; + i += 4; + } + val = (negative ? -temp : temp); + } + i++; // Consume \n + map.add(fileContent, nameStart, nameLen, val); + } + // System.out.println(map.debug_reprobeMax); + var measurements = new TreeMap(); + map.fill(fileContent, measurements); + System.out.println(measurements); + } + } + + private static class PrivateHashMap { + private static final int SIZE_SHIFT = 14; + public static final int SIZE = 1 << SIZE_SHIFT; + public static int MASK = 0xFFFFFFFF >>> (32 - SIZE_SHIFT); + + public static long SHIFT_POS = 16; + public static long MASK_POS = 0xFFFFFFFFFFFF0000L; + public static long MASK_LEN = 0x000000000000FFFFL; + // Encoding: + // - Key: long + // - 48 bits index, 16 bits length + // - min: double + // - max: double + // - sum: double + // - double: double + final long[] keyValues = new long[SIZE * 5]; + + // int debug_size = 0; + + // int debug_reprobeMax = 0; + public PrivateHashMap() { + + } + + public void add(MemorySegment file, long pos, int len, double val) { + int hashCode = 1; + int i = 0; + int intBound = (len / 4) * 4; + for (; i < intBound; i += 4) { + int v = file.get(INT_UNALIGNED_BIG_ENDIAN, pos + i); + hashCode = 31 * hashCode + v; + } + for (; i < len; i++) { + int v = file.get(JAVA_BYTE, pos + i); + hashCode = 31 * hashCode + v; + } + + doAdd(file, hashCode, pos, len, val); + } + + private void doAdd(MemorySegment file, int hash, long pos, int len, double val) { + // var debug = new String(file.asSlice(pos, len).toArray(ValueLayout.JAVA_BYTE)); + int slot = hash & MASK; + for (var probe = 0; probe < 20000; probe++) { + var iSl = ((slot + probe) & MASK) * 5; + var slotEntry = keyValues[iSl]; + + var emtpy = slotEntry == 0; + if (emtpy) { + long keyInfo = pos << SHIFT_POS | len; + long valueBits = doubleToRawLongBits(val); + keyValues[iSl] = keyInfo; + keyValues[iSl + 1] = valueBits; + keyValues[iSl + 2] = valueBits; + keyValues[iSl + 3] = valueBits; + keyValues[iSl + 4] = 1; + // debug_size++; + return; + } + else if (isSameEntry(file, slotEntry, pos, len)) { + keyValues[iSl + 1] = doubleToRawLongBits(Math.min(longBitsToDouble(keyValues[iSl + 1]), val)); + keyValues[iSl + 2] = doubleToRawLongBits(Math.max(longBitsToDouble(keyValues[iSl + 2]), val)); + keyValues[iSl + 3] = doubleToRawLongBits(longBitsToDouble(keyValues[iSl + 3]) + val); + keyValues[iSl + 4] = keyValues[iSl + 4] + 1; + return; + } + else { + // long keyPos = (slotEntry & MASK_POS) >> SHIFT_POS; + // int keyLen = (int) (slotEntry & MASK_LEN); + // System.out.println("Colliding " + new String(file.asSlice(pos,len).toArray(ValueLayout.JAVA_BYTE)) + + // " with key" + new String(file.asSlice(keyPos,keyLen).toArray(ValueLayout.JAVA_BYTE)) + + // " hash " + hash + " slot " + slot + "+" + probe + " at " + iSl); + // debug_reprobeMax = Math.max(debug_reprobeMax, probe); + } + } + throw new IllegalStateException("More than 20000 reprobes"); + // throw new IllegalStateException("More than 100 reprobes: At " + debug_size + ""); + } + + private boolean isSameEntry(MemorySegment file, long slotEntry, long pos, int len) { + long keyPos = (slotEntry & MASK_POS) >> SHIFT_POS; + int keyLen = (int) (slotEntry & MASK_LEN); + var isSame = isSame(file, keyPos, pos, len); + // System.out.println("Entry:" + new String(file.asSlice(pos, len).toArray(JAVA_BYTE)) + + // ",Keys:" + new String(file.asSlice(keyPos, keyLen).toArray(JAVA_BYTE)) + + // ",match " + isSame); + return isSame; + } + + private static boolean isSame(MemorySegment file, long i1, long i2, int len) { + int i = 0; + var i1len = i1 + vecLen; + var i2len = i2 + vecLen; + if (len < vecLen && i1len <= file.byteSize() && i2len <= file.byteSize()) { + var v1 = byteVec.fromMemorySegment(file, i1, ByteOrder.BIG_ENDIAN); + var v2 = byteVec.fromMemorySegment(file, i2, ByteOrder.BIG_ENDIAN); + var isTrue = v1.compare(VectorOperators.EQ, v2, allTrue.indexInRange(0, len)); + return isTrue.trueCount() == len; + } + while (8 < (len - i)) { + var v1 = file.get(JAVA_LONG_UNALIGNED, i1 + i); + var v2 = file.get(JAVA_LONG_UNALIGNED, i2 + i); + if (v1 != v2) { + return false; + } + i += 8; + } + while (i < len) { + var v1 = file.get(JAVA_BYTE, i1 + i); + var v2 = file.get(JAVA_BYTE, i2 + i); + + if (v1 != v2) { + return false; + } + i++; + } + return true; + } + + public void fill(MemorySegment file, TreeMap treeMap) { + for (int i = 0; i < keyValues.length / 5; i++) { + var ji = i * 5; + long keyE = keyValues[ji]; + if (keyE != 0) { + long keyPos = (keyE & MASK_POS) >> SHIFT_POS; + int keyLen = (int) (keyE & MASK_LEN); + byte[] keyBytes = new byte[keyLen]; + MemorySegment.copy(file, JAVA_BYTE, keyPos, keyBytes, 0, keyLen); + var key = new String(keyBytes); + var min = longBitsToDouble(keyValues[ji + 1]); + var max = longBitsToDouble(keyValues[ji + 2]); + var sum = longBitsToDouble(keyValues[ji + 3]); + var count = keyValues[ji + 4]; + treeMap.put(key, new ResultRow(min, sum / count, max)); + } + } + } + } + + private static record ResultRow(double min, double mean, double max) { + public String toString() { + return round(min) + "/" + round(mean) + "/" + round(max); + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + } + + ; +}