CalculateAverage_truelive second attempt

* cleanup

* getDouble new double parser

* parseBuffer more reliable

* use graalvm to execute

* cleanup

* cleanup

* fix formatting

* fix graalvm init and launch script
This commit is contained in:
Roman Schweitzer 2024-01-05 23:40:03 +01:00 committed by GitHub
parent 13a385c2e6
commit 5f4ed31fec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 68 deletions

View File

@ -15,6 +15,8 @@
# limitations under the License.
#
source "$HOME/.sdkman/bin/sdkman-init.sh"
sdk use java 21.0.1-graalce 1>&2
JAVA_OPTS="-Xmx4G"
JAVA_OPTS="-Xmx8G -Xms2G"
time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_truelive

View File

@ -23,6 +23,7 @@ import java.nio.channels.FileChannel;
import java.util.*;
import java.util.concurrent.atomic.DoubleAccumulator;
import java.util.concurrent.atomic.LongAdder;
import java.util.stream.Collector;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
@ -31,31 +32,19 @@ public class CalculateAverage_truelive {
private static final String FILE = "./measurements.txt";
private static final long CHUNK_SIZE = 1024 * 1024 * 10L;
private static int branchlessParseInt(final byte[] input, final int length) {
// 0 if positive, 1 if negative
final int negative = ~(input[0] >> 4) & 1;
// 0 if nr length is 3, 1 if length is 4
final int has4 = ((length - negative) >> 2) & 1;
final int digit1 = input[negative] - '0';
final int digit2 = input[negative + has4] - '0';
final int digit3 = input[2 + negative + has4] - '0';
return (-negative ^ (has4 * (digit1 * 100) + digit2 * 10 + digit3) - negative);
private static double getDouble(final byte[] arr, int pos) {
final int negative = ~(arr[pos] >> 4) & 1;
int sig = 1;
sig -= 2 * negative;
pos += negative;
final int digit1 = arr[pos] - '0';
pos++;
if (arr[pos] == '.') {
return sig * (digit1 + (arr[pos + 1] - '0') / 10.0);
}
// branchless max (unprecise for large numbers, but good enough)
static int max(final int a, final int b) {
final int diff = a - b;
final int dsgn = diff >> 31;
return a - (diff & dsgn);
else {
return sig * (digit1 * 10 + (arr[pos] - '0') + (arr[pos + 2] - '0') / 10.0);
}
// branchless min (unprecise for large numbers, but good enough)
static int min(final int a, final int b) {
final int diff = a - b;
final int dsgn = diff >> 31;
return b + (diff & dsgn);
}
private record Measurement(DoubleAccumulator min, DoubleAccumulator max, DoubleAccumulator sum, LongAdder count) {
@ -104,16 +93,6 @@ public class CalculateAverage_truelive {
}
}
private static Map<String, Measurement> combineMaps(
final Map<String, Measurement> map1,
final Map<String, Measurement> map2) {
for (final var entry : map2.entrySet()) {
map1.merge(entry.getKey(), entry.getValue(), Measurement::combineWith);
}
return map1;
}
public static void main(final String[] args) throws IOException {
// long before = System.currentTimeMillis();
/**
@ -152,23 +131,17 @@ public class CalculateAverage_truelive {
}
}
};
final Map<String, Measurement> reduce = StreamSupport.stream(Spliterators.spliteratorUnknownSize(
iterator, Spliterator.IMMUTABLE), true)
.parallel()
.map(CalculateAverage_truelive::parseBuffer)
.reduce(CalculateAverage_truelive::combineMaps).get();
System.out.print("{");
System.out.print(
reduce
.entrySet()
.stream()
.sorted(Map.Entry.comparingByKey())
.map(Object::toString)
.collect(Collectors.joining(", ")));
System.out.println("}");
// System.out.println("Took: " + (System.currentTimeMillis() - before));
.flatMap(map -> map.entrySet().stream())
.collect(Collectors.toMap(
Map.Entry::getKey,
Map.Entry::getValue,
Measurement::combineWith,
TreeMap::new));
System.out.println(reduce);
}
@ -178,28 +151,25 @@ public class CalculateAverage_truelive {
bug.mark();
String name = null;
final byte[] arr = new byte[128];
int cur = 0;
while (bug.hasRemaining()) {
final char c = (char) bug.get();
if (c == ';') {
final int pos = bug.position();
bug.reset();
final int len = pos - bug.position() - 1;
bug.get(bug.position(), arr, 0, len);
name = new String(arr, 0, len);
bug.position(pos);
bug.mark();
char c = (char) bug.get();
arr[cur++] = (byte) c;
while (c != ';') {
c = (char) bug.get();
arr[cur++] = (byte) c;
}
else if (c == '\n') {
final int pos = bug.position();
bug.reset();
final int len = pos - bug.position();
bug.get(bug.position(), arr, 0, len);
final double temp = Double.parseDouble(new String(arr, 0, len));
name = new String(arr, 0, cur - 1);
cur = 0;
while (c != '\n') {
c = (char) bug.get();
arr[cur++] = (byte) c;
}
final double temp = getDouble(arr, 0);
resultMap.compute(name, (k, v) -> (v == null) ? Measurement.of(temp) : v.add(temp));
bug.position(pos);
bug.mark();
}
cur = 0;
}
return resultMap;
}
}