submission for kidlike (#294)

* first version

* second version (0m59s)

* third version (0m46s)

* fourth version (0m39s)

* fifth version (0m18s)

* follow naming conventions from project structure

* fix rounding (see /issues/49)

* formatting changes from build

* name should case-match github username

* sixth version (14s)

* seventh version (11s)

* potential fix for other systems?

* no need for sdk install

* binary should go to ./target

* building native-image only when not existing yet
This commit is contained in:
Kidlike 2024-01-13 22:22:36 +01:00 committed by GitHub
parent 32143b2a4c
commit c01668403a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 278 additions and 0 deletions

24
calculate_average_Kidlike.sh Executable file
View File

@ -0,0 +1,24 @@
#!/bin/sh
#
# Copyright 2023 The original authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
if [ -f ./target/image_calculateaverage_Kidlike ]; then
./target/image_calculateaverage_Kidlike
else
# -XX:+UseEpsilonGC
JAVA_OPTS="--enable-preview -Xms18g -Xmx18g -XX:+UnlockExperimentalVMOptions"
java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_Kidlike
fi

24
prepare_Kidlike.sh Executable file
View File

@ -0,0 +1,24 @@
#!/bin/bash
#
# Copyright 2023 The original authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
if [ ! -e target/image_calculateaverage_Kidlike ]; then
source "$HOME/.sdkman/bin/sdkman-init.sh"
sdk use java 21.0.1-graal
#NATIVE_IMAGE_OPTS="--gc=epsilon -O3 -march=native --enable-preview"
NATIVE_IMAGE_OPTS="-O3 -march=native --enable-preview"
native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/image_calculateaverage_Kidlike dev.morling.onebrc.CalculateAverage_Kidlike
fi

View File

@ -0,0 +1,230 @@
/*
* Copyright 2023 The original authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dev.morling.onebrc;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel.MapMode;
import java.text.DecimalFormat;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
/**
* Versions (correlate to git commits):
* <ol>
* <li>2m34s: parallel file read -> load byte chunks in memory -> sequentially process bytes for result</li>
* <li>0m59s: process byte chunks in parallel (had to introduce smarter byte chunking so it splits only on newlines)</li>
* <li>0m46s: smaller numeric types for MeasurementAggregator</li>
* <li>0m39s: implement custom byte[] to int parsing, instead of Double.parseDouble(new String(bytes))</li>
* <li>0m18s: run with GraalVM native-image</li>
* <li>0m14s: remove ConcurrentHashMap</li>
* <li>0m11s: remove Map#compute</li>
* </ol>
*
* <p>
* Hardware:
* <ul>
* <li>CPU: https://www.cpubenchmark.net/cpu.php?cpu=AMD+Ryzen+7+5800H&id=3907</li>
* <li>NVMe: https://www.harddrivebenchmark.net/hdd.php?hdd=SKHynix%20HFS001TDE9X084N&id=28560</li>
* </ul>
* </p>
*/
public class CalculateAverage_Kidlike {
public static void main(String[] args) {
File file = new File("./measurements.txt");
long fileSize = file.length();
int processors = (fileSize < 1_000_000) ? 1 : Runtime.getRuntime().availableProcessors();
long chunkSize = fileSize / processors;
MappedByteBuffer[] byteBuffers = new MappedByteBuffer[processors];
try (var executor = Executors.newVirtualThreadPerTaskExecutor()) {
for (int i = 0; i < processors; i++) {
long start = i * chunkSize;
long length = (i == processors - 1) ? fileSize - chunkSize * (processors - 1) : chunkSize;
int processor = i;
executor.execute(() -> {
long realStart = start;
try (RandomAccessFile raf = new RandomAccessFile(file, "r")) {
if (start != 0) {
raf.seek(start);
while (raf.readByte() != '\n') {
// move pointer to newline, so that we can process the byte chunks in parallel later.
}
realStart = raf.getFilePointer();
}
long realLength = fileSize - realStart;
if (realStart + length < fileSize) {
raf.seek(realStart + length);
while (raf.readByte() != '\n') {
// move pointer to newline, so that we can process the byte chunks in parallel later.
}
realLength = raf.getFilePointer() - realStart;
}
MappedByteBuffer byteBuffer = raf.getChannel().map(MapMode.READ_ONLY, realStart, realLength);
byteBuffer.load();
byteBuffers[processor] = byteBuffer;
}
catch (IOException e) {
throw new RuntimeException(e);
}
});
}
}
System.out.println(new TreeMap(calculateMeasurements(byteBuffers)));
}
private static Map<String, MeasurementAggregator> calculateMeasurements(MappedByteBuffer[] buffers) {
return Arrays.stream(buffers).parallel()
.map(buffer -> {
var results = new HashMap<String, MeasurementAggregator>();
var state = State.NEXT_READ_CITY;
var citySink = new CheapByteBuffer(100);
var measurementSink = new CheapByteBuffer(10);
while (buffer.hasRemaining()) {
byte b = buffer.get();
char c = (char) b;
if (c == ';') {
state = State.NEXT_READ_MEASUREMENT;
continue;
}
if (c == '\n') {
String city = new String(citySink.getBytes(), UTF_8);
int measurement = bytesToInt(measurementSink.getBytes());
var entry = results.get(city);
if (entry == null) {
entry = new MeasurementAggregator();
results.put(city, entry);
}
entry.count++;
entry.sum += measurement;
entry.min = (short) Math.min(entry.min, measurement);
entry.max = (short) Math.max(entry.max, measurement);
citySink.clear();
measurementSink.clear();
state = State.NEXT_READ_CITY;
continue;
}
switch (state) {
case NEXT_READ_CITY -> citySink.append(b);
case NEXT_READ_MEASUREMENT -> measurementSink.append(b);
}
}
return results;
})
.flatMap(m -> m.entrySet().stream())
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, MeasurementAggregator::merge));
}
/**
* Removes decimal points and returns an integer. For example -12.3 would return -123
*/
static int bytesToInt(byte[] bytes) {
short index = (short) (bytes.length - 1);
boolean isNegative = (bytes[0] == '-');
int number = (bytes[index] - '0');
index -= 2;
number += (10 * (bytes[index--] - '0'));
if (index == 1 || (!isNegative && index == 0)) {
number += (100 * (bytes[index] - '0'));
}
if (isNegative) {
return -number;
}
else {
return number;
}
}
private enum State {
NEXT_READ_CITY,
NEXT_READ_MEASUREMENT
}
/**
* Numbers are stored as integers, because of {@link #bytesToInt}, and then divided by 10.0 to restore their decimal point.
*/
private static class MeasurementAggregator {
private static final DecimalFormat rounder = new DecimalFormat("0.0");
short min = Short.MAX_VALUE;
short max = Short.MIN_VALUE;
long sum;
int count;
private MeasurementAggregator merge(MeasurementAggregator other) {
min = (short) Math.min(min, other.min);
max = (short) Math.max(max, other.max);
sum += other.sum;
count += other.count;
return this;
}
@Override
public String toString() {
return rounder.format(min / 10.0)
+ "/" + rounder.format(Math.round((double) sum / count) / 10.0)
+ "/" + rounder.format(max / 10.0);
}
}
private static class CheapByteBuffer {
private final byte[] data;
private int length;
public CheapByteBuffer(final int startSize) {
this.data = new byte[startSize];
this.length = 0;
}
public void append(final byte b) {
data[length++] = b;
}
public void clear() {
this.length = 0;
}
public byte[] getBytes() {
return Arrays.copyOf(this.data, this.length);
}
}
}