1brc/src/main/java/dev/morling/onebrc/CalculateAverage_merykitty.java

339 lines
13 KiB
Java
Raw Normal View History

/*
* Copyright 2023 The original authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dev.morling.onebrc;
import java.io.IOException;
import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.Map;
import java.util.TreeMap;
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.VectorOperators;
import jdk.incubator.vector.VectorSpecies;
public class CalculateAverage_merykitty {
private static final String FILE = "./measurements.txt";
2024-01-10 20:24:19 +01:00
private static final VectorSpecies<Byte> BYTE_SPECIES = ByteVector.SPECIES_PREFERRED.length() >= 32
? ByteVector.SPECIES_256
: ByteVector.SPECIES_128;
private static final ValueLayout.OfLong JAVA_LONG_LT = ValueLayout.JAVA_LONG_UNALIGNED.withOrder(ByteOrder.LITTLE_ENDIAN);
private static final long KEY_MAX_SIZE = 100;
2024-01-10 20:24:19 +01:00
private static class Aggregator {
private int keySize;
private long min = Integer.MAX_VALUE;
private long max = Integer.MIN_VALUE;
private long sum;
private long count;
public String toString() {
2024-01-10 20:24:19 +01:00
return round(min / 10.) + "/" + round(sum / (double) (10 * count)) + "/" + round(max / 10.);
}
private double round(double value) {
return Math.round(value * 10.0) / 10.0;
}
}
// An open-address map that is specialized for this task
private static class PoorManMap {
2024-01-10 20:24:19 +01:00
// 100-byte key + 4-byte hash + 4-byte size +
// 2-byte min + 2-byte max + 8-byte sum + 8-byte count
private static final int KEY_SIZE = 128;
// There is an assumption that map size <= 10000;
private static final int CAPACITY = 1 << 17;
private static final int BUCKET_MASK = CAPACITY - 1;
2024-01-10 20:24:19 +01:00
byte[] keyData;
Aggregator[] nodes;
PoorManMap() {
this.keyData = new byte[CAPACITY * KEY_SIZE];
this.nodes = new Aggregator[CAPACITY];
}
2024-01-10 20:24:19 +01:00
void observe(Aggregator node, long value) {
node.min = Math.min(node.min, value);
node.max = Math.max(node.max, value);
node.sum += value;
node.count++;
}
Aggregator indexSimple(MemorySegment data, long offset, int size) {
int x;
int y;
if (size >= Integer.BYTES) {
x = data.get(ValueLayout.JAVA_INT_UNALIGNED, offset);
y = data.get(ValueLayout.JAVA_INT_UNALIGNED, offset + size - Integer.BYTES);
}
else {
x = data.get(ValueLayout.JAVA_BYTE, offset);
y = data.get(ValueLayout.JAVA_BYTE, offset + size - Byte.BYTES);
}
int hash = hash(x, y);
int bucket = hash & BUCKET_MASK;
for (;; bucket = (bucket + 1) & BUCKET_MASK) {
var node = this.nodes[bucket];
if (node == null) {
2024-01-10 20:24:19 +01:00
return insertInto(bucket, data, offset, size);
}
2024-01-10 20:24:19 +01:00
else if (keyEqualScalar(bucket, data, offset, size)) {
return node;
}
}
}
2024-01-10 20:24:19 +01:00
Aggregator insertInto(int bucket, MemorySegment data, long offset, int size) {
var node = new Aggregator();
node.keySize = size;
this.nodes[bucket] = node;
MemorySegment.copy(data, offset, MemorySegment.ofArray(this.keyData), (long) bucket * KEY_SIZE, size);
return node;
}
void mergeInto(Map<String, Aggregator> target) {
for (int i = 0; i < CAPACITY; i++) {
var node = this.nodes[i];
if (node == null) {
continue;
}
2024-01-10 20:24:19 +01:00
String key = new String(this.keyData, i * KEY_SIZE, node.keySize, StandardCharsets.UTF_8);
target.compute(key, (k, v) -> {
if (v == null) {
v = new Aggregator();
}
v.min = Math.min(v.min, node.min);
v.max = Math.max(v.max, node.max);
v.sum += node.sum;
v.count += node.count;
return v;
});
}
}
2024-01-10 20:24:19 +01:00
static int hash(int x, int y) {
int seed = 0x9E3779B9;
int rotate = 5;
return (Integer.rotateLeft(x * seed, rotate) ^ y) * seed; // FxHash
}
2024-01-10 20:24:19 +01:00
private boolean keyEqualScalar(int bucket, MemorySegment data, long offset, int size) {
if (this.nodes[bucket].keySize != size) {
return false;
}
// Be simple
for (int i = 0; i < size; i++) {
2024-01-10 20:24:19 +01:00
int c1 = this.keyData[bucket * KEY_SIZE + i];
int c2 = data.get(ValueLayout.JAVA_BYTE, offset + i);
if (c1 != c2) {
return false;
}
}
return true;
}
}
// Parse a number that may/may not contain a minus sign followed by a decimal with
// 1 - 2 digits to the left and 1 digits to the right of the separator to a
// fix-precision format. It returns the offset of the next line (presumably followed
// the final digit and a '\n')
2024-01-10 20:24:19 +01:00
private static long parseDataPoint(PoorManMap aggrMap, Aggregator node, MemorySegment data, long offset) {
long word = data.get(JAVA_LONG_LT, offset);
// The 4th binary digit of the ascii of a digit is 1 while
// that of the '.' is 0. This finds the decimal separator
// The value can be 12, 20, 28
int decimalSepPos = Long.numberOfTrailingZeros(~word & 0x10101000);
int shift = 28 - decimalSepPos;
// signed is -1 if negative, 0 otherwise
long signed = (~word << 59) >> 63;
long designMask = ~(signed & 0xFF);
// Align the number to a specific position and transform the ascii code
// to actual digit value in each byte
long digits = ((word & designMask) << shift) & 0x0F000F0F00L;
// Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit)
// 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) =
// 0x000000UU00TTHH00 +
// 0x00UU00TTHH000000 * 10 +
// 0xUU00TTHH00000000 * 100
// Now TT * 100 has 2 trailing zeroes and HH * 100 + TT * 10 + UU < 0x400
// This results in our value lies in the bit 32 to 41 of this product
// That was close :)
long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF;
long value = (absValue ^ signed) - signed;
2024-01-10 20:24:19 +01:00
aggrMap.observe(node, value);
return offset + (decimalSepPos >>> 3) + 3;
}
// Tail processing version of the above, do not over-fetch and be simple
2024-01-10 20:24:19 +01:00
private static long parseDataPointSimple(PoorManMap aggrMap, Aggregator node, MemorySegment data, long offset) {
int value = 0;
boolean negative = false;
if (data.get(ValueLayout.JAVA_BYTE, offset) == '-') {
negative = true;
offset++;
}
for (;; offset++) {
int c = data.get(ValueLayout.JAVA_BYTE, offset);
if (c == '.') {
c = data.get(ValueLayout.JAVA_BYTE, offset + 1);
2024-01-10 20:24:19 +01:00
value = value * 10 + (c - '0');
offset += 3;
break;
}
2024-01-10 20:24:19 +01:00
value = value * 10 + (c - '0');
}
2024-01-10 20:24:19 +01:00
value = negative ? -value : value;
aggrMap.observe(node, value);
return offset;
}
2024-01-10 20:24:19 +01:00
// An iteration of the main parse loop, parse a line starting from offset.
// This requires offset to be the start of the line and there is spare space so
// that we have relative freedom in processing
2024-01-10 20:24:19 +01:00
// It returns the offset of the next line that it needs processing
private static long iterate(PoorManMap aggrMap, MemorySegment data, long offset) {
var line = ByteVector.fromMemorySegment(BYTE_SPECIES, data, offset, ByteOrder.nativeOrder());
// Find the delimiter ';'
2024-01-10 20:24:19 +01:00
int keySize = line.compare(VectorOperators.EQ, ';').firstTrue();
2024-01-10 20:24:19 +01:00
// If we cannot find the delimiter in the vector, that means the key is
// longer than the vector, fall back to scalar processing
if (keySize == BYTE_SPECIES.vectorByteSize()) {
while (data.get(ValueLayout.JAVA_BYTE, offset + keySize) != ';') {
keySize++;
}
2024-01-10 20:24:19 +01:00
var node = aggrMap.indexSimple(data, offset, keySize);
return parseDataPoint(aggrMap, node, data, offset + 1 + keySize);
}
2024-01-10 20:24:19 +01:00
// We inline the searching of the value in the hash map
int x;
int y;
if (keySize >= Integer.BYTES) {
x = data.get(ValueLayout.JAVA_INT_UNALIGNED, offset);
y = data.get(ValueLayout.JAVA_INT_UNALIGNED, offset + keySize - Integer.BYTES);
}
else {
x = data.get(ValueLayout.JAVA_BYTE, offset);
y = data.get(ValueLayout.JAVA_BYTE, offset + keySize - Byte.BYTES);
}
int hash = PoorManMap.hash(x, y);
int bucket = hash & PoorManMap.BUCKET_MASK;
Aggregator node;
for (;; bucket = (bucket + 1) & PoorManMap.BUCKET_MASK) {
node = aggrMap.nodes[bucket];
if (node == null) {
node = aggrMap.insertInto(bucket, data, offset, keySize);
break;
}
2024-01-10 20:24:19 +01:00
if (node.keySize != keySize) {
continue;
}
2024-01-10 20:24:19 +01:00
var nodeKey = ByteVector.fromArray(BYTE_SPECIES, aggrMap.keyData, bucket * PoorManMap.KEY_SIZE);
long eqMask = line.compare(VectorOperators.EQ, nodeKey).toLong();
long validMask = -1L >>> -keySize;
if ((eqMask & validMask) == validMask) {
break;
}
}
2024-01-10 20:24:19 +01:00
return parseDataPoint(aggrMap, node, data, offset + keySize + 1);
}
// Process all lines that start in [offset, limit)
private static PoorManMap processFile(MemorySegment data, long offset, long limit) {
2024-01-10 20:24:19 +01:00
var aggrMap = new PoorManMap();
// Find the start of a new line
if (offset != 0) {
offset--;
2024-01-10 20:24:19 +01:00
while (offset < limit) {
if (data.get(ValueLayout.JAVA_BYTE, offset++) == '\n') {
break;
}
}
}
// If there is no line starting in this segment, just return
if (offset == limit) {
return aggrMap;
}
// The main loop, optimized for speed
while (offset < limit - Math.max(BYTE_SPECIES.vectorByteSize(),
Long.BYTES + 1 + KEY_MAX_SIZE)) {
offset = iterate(aggrMap, data, offset);
}
// Now we are at the tail, just be simple
while (offset < limit) {
2024-01-10 20:24:19 +01:00
int keySize = 0;
while (data.get(ValueLayout.JAVA_BYTE, offset + keySize) != ';') {
keySize++;
}
2024-01-10 20:24:19 +01:00
var node = aggrMap.indexSimple(data, offset, keySize);
offset = parseDataPointSimple(aggrMap, node, data, offset + 1 + keySize);
}
return aggrMap;
}
public static void main(String[] args) throws InterruptedException, IOException {
int processorCnt = Runtime.getRuntime().availableProcessors();
2024-01-10 20:24:19 +01:00
var res = new TreeMap<String, Aggregator>();
try (var file = FileChannel.open(Path.of(FILE), StandardOpenOption.READ);
var arena = Arena.ofShared()) {
var data = file.map(MapMode.READ_ONLY, 0, file.size(), arena);
long chunkSize = Math.ceilDiv(data.byteSize(), processorCnt);
var threadList = new Thread[processorCnt];
var resultList = new PoorManMap[processorCnt];
for (int i = 0; i < processorCnt; i++) {
int index = i;
long offset = i * chunkSize;
long limit = Math.min((i + 1) * chunkSize, data.byteSize());
2024-01-10 20:24:19 +01:00
var thread = new Thread(() -> resultList[index] = processFile(data, offset, limit));
threadList[index] = thread;
thread.start();
}
for (var thread : threadList) {
thread.join();
}
// Collect the results
for (var aggrMap : resultList) {
2024-01-10 20:24:19 +01:00
aggrMap.mergeInto(res);
}
}
2024-01-10 20:24:19 +01:00
System.out.println(res);
}
}