From 1a4ac0d2496e9329534370eaf01b28e77658b073 Mon Sep 17 00:00:00 2001 From: Quan Anh Mai <49088128+merykitty@users.noreply.github.com> Date: Wed, 31 Jan 2024 16:47:48 +0800 Subject: [PATCH] Final submission (#669) * more efficient max, min * optimize pipeline * apply parallel to both submissions * fix bug --- .../onebrc/CalculateAverage_merykitty.java | 83 +++++++++++++----- .../CalculateAverage_merykittyunsafe.java | 87 ++++++++++++++----- 2 files changed, 126 insertions(+), 44 deletions(-) diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_merykitty.java b/src/main/java/dev/morling/onebrc/CalculateAverage_merykitty.java index 1f5acf3..502002f 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_merykitty.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_merykitty.java @@ -75,8 +75,12 @@ public class CalculateAverage_merykitty { } void observe(Aggregator node, long value) { - node.min = Math.min(node.min, value); - node.max = Math.max(node.max, value); + if (node.min > value) { + node.min = value; + } + if (node.max < value) { + node.max = value; + } node.sum += value; node.count++; } @@ -109,7 +113,7 @@ public class CalculateAverage_merykitty { var node = new Aggregator(); node.keySize = size; this.nodes[bucket] = node; - MemorySegment.copy(data, offset, MemorySegment.ofArray(this.keyData), (long) bucket * KEY_SIZE, size); + MemorySegment.copy(data, offset, MemorySegment.ofArray(this.keyData), (long) bucket * KEY_SIZE, size + 1); return node; } @@ -222,11 +226,12 @@ public class CalculateAverage_merykitty { var line = ByteVector.fromMemorySegment(BYTE_SPECIES, data, offset, ByteOrder.nativeOrder()); // Find the delimiter ';' - int keySize = line.compare(VectorOperators.EQ, ';').firstTrue(); + long semicolons = line.compare(VectorOperators.EQ, ';').toLong(); // If we cannot find the delimiter in the vector, that means the key is // longer than the vector, fall back to scalar processing - if (keySize == BYTE_SPECIES.vectorByteSize()) { + if (semicolons == 0) { + int keySize = BYTE_SPECIES.length(); while (data.get(ValueLayout.JAVA_BYTE, offset + keySize) != ';') { keySize++; } @@ -235,6 +240,7 @@ public class CalculateAverage_merykitty { } // We inline the searching of the value in the hash map + int keySize = Long.numberOfTrailingZeros(semicolons); int x; int y; if (keySize >= Integer.BYTES) { @@ -260,7 +266,7 @@ public class CalculateAverage_merykitty { var nodeKey = ByteVector.fromArray(BYTE_SPECIES, aggrMap.keyData, bucket * PoorManMap.KEY_SIZE); long eqMask = line.compare(VectorOperators.EQ, nodeKey).toLong(); - long validMask = -1L >>> -keySize; + long validMask = semicolons ^ (semicolons - 1); if ((eqMask & validMask) == validMask) { break; } @@ -269,28 +275,63 @@ public class CalculateAverage_merykitty { return parseDataPoint(aggrMap, node, data, offset + keySize + 1); } + private static long findOffset(MemorySegment data, long offset, long limit) { + if (offset == 0) { + return offset; + } + + offset--; + while (offset < limit) { + if (data.get(ValueLayout.JAVA_BYTE, offset++) == '\n') { + break; + } + } + return offset; + } + // Process all lines that start in [offset, limit) private static PoorManMap processFile(MemorySegment data, long offset, long limit) { var aggrMap = new PoorManMap(); - // Find the start of a new line - if (offset != 0) { - offset--; - while (offset < limit) { - if (data.get(ValueLayout.JAVA_BYTE, offset++) == '\n') { - break; - } - } - } - - // If there is no line starting in this segment, just return if (offset == limit) { return aggrMap; } + int batches = 2; + long batchSize = Math.ceilDiv(limit - offset, batches); + long offset0 = offset; + long offset1 = offset + batchSize; + long limit0 = Math.min(offset1, limit); + long limit1 = limit; - // The main loop, optimized for speed - while (offset < limit - Math.max(BYTE_SPECIES.vectorByteSize(), - Long.BYTES + 1 + KEY_MAX_SIZE)) { - offset = iterate(aggrMap, data, offset); + // Find the start of a new line + offset0 = findOffset(data, offset0, limit0); + offset1 = findOffset(data, offset1, limit1); + + long mainLoopMinWidth = Math.max(BYTE_SPECIES.vectorByteSize(), KEY_MAX_SIZE + 1 + Long.BYTES); + if (limit1 - offset1 < mainLoopMinWidth) { + offset = findOffset(data, offset, limit); + while (offset < limit - mainLoopMinWidth) { + offset = iterate(aggrMap, data, offset); + } + } + else { + while (true) { + boolean finish = false; + if (offset0 < limit0) { + offset0 = iterate(aggrMap, data, offset0); + } + else { + finish = true; + } + if (offset1 < limit1 - mainLoopMinWidth) { + offset1 = iterate(aggrMap, data, offset1); + } + else { + if (finish) { + break; + } + } + } + offset = offset1; } // Now we are at the tail, just be simple diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_merykittyunsafe.java b/src/main/java/dev/morling/onebrc/CalculateAverage_merykittyunsafe.java index 4983694..4b1fc0d 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_merykittyunsafe.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_merykittyunsafe.java @@ -91,10 +91,12 @@ public class CalculateAverage_merykittyunsafe { void observe(long entryOffset, long value) { long baseOffset = Unsafe.ARRAY_BYTE_BASE_OFFSET + entryOffset; - UNSAFE.putShort(this.data, baseOffset + MIN_OFFSET, - (short) Math.min(value, UNSAFE.getShort(this.data, baseOffset + MIN_OFFSET))); - UNSAFE.putShort(this.data, baseOffset + MAX_OFFSET, - (short) Math.max(value, UNSAFE.getShort(this.data, baseOffset + MAX_OFFSET))); + if (UNSAFE.getShort(this.data, baseOffset + MIN_OFFSET) > value) { + UNSAFE.putShort(this.data, baseOffset + MIN_OFFSET, (short) value); + } + if (UNSAFE.getShort(this.data, baseOffset + MAX_OFFSET) < value) { + UNSAFE.putShort(this.data, baseOffset + MAX_OFFSET, (short) value); + } UNSAFE.putLong(this.data, baseOffset + SUM_OFFSET, value + UNSAFE.getLong(this.data, baseOffset + SUM_OFFSET)); UNSAFE.putLong(this.data, baseOffset + COUNT_OFFSET, @@ -307,31 +309,70 @@ public class CalculateAverage_merykittyunsafe { return parseDataPoint(aggrMap, entryOffset, address + keySize + 1); } + private static long findOffset(long base, long offset, long limit) { + if (offset == 0) { + return offset; + } + + offset--; + while (offset < limit) { + if (UNSAFE.getByte(base + (offset++)) == '\n') { + break; + } + } + return offset; + } + // Process all lines that start in [offset, limit) private static PoorManMap processFile(MemorySegment data, long offset, long limit) { var aggrMap = new PoorManMap(); - long base = data.address(); - long begin = base + offset; - long end = base + limit; - // Find the start of a new line - if (offset != 0) { - begin--; - while (begin < end) { - if (UNSAFE.getByte(begin++) == '\n') { - break; - } - } - } - - // If there is no line starting in this segment, just return - if (begin == end) { + if (offset == limit) { return aggrMap; } + long base = data.address(); + int batches = 2; + long batchSize = Math.ceilDiv(limit - offset, batches); + long offset0 = offset; + long offset1 = offset + batchSize; + long limit0 = Math.min(offset1, limit); + long limit1 = limit; - // The main loop, optimized for speed - while (begin < end - Math.max(BYTE_SPECIES.vectorByteSize(), - Long.BYTES + 1 + KEY_MAX_SIZE)) { - begin = iterate(aggrMap, begin); + // Find the start of a new line + offset0 = findOffset(base, offset0, limit0); + offset1 = findOffset(base, offset1, limit1); + + long begin; + long end = base + limit; + long mainLoopMinWidth = Math.max(BYTE_SPECIES.vectorByteSize(), KEY_MAX_SIZE + 1 + Long.BYTES); + if (limit1 - offset1 < mainLoopMinWidth) { + begin = base + findOffset(base, offset, limit); + while (begin < end - mainLoopMinWidth) { + begin = iterate(aggrMap, begin); + } + } + else { + long begin0 = base + offset0; + long begin1 = base + offset1; + long end0 = base + limit0; + long end1 = base + limit1; + while (true) { + boolean finish = false; + if (begin0 < end0) { + begin0 = iterate(aggrMap, begin0); + } + else { + finish = true; + } + if (begin1 < end1 - mainLoopMinWidth) { + begin1 = iterate(aggrMap, begin1); + } + else { + if (finish) { + break; + } + } + } + begin = begin1; } // Now we are at the tail, just be simple