CalculateAverage_truelive second attempt

* cleanup

* getDouble new double parser

* parseBuffer more reliable

* use graalvm to execute

* cleanup

* cleanup

* fix formatting

* fix graalvm init and launch script
This commit is contained in:
Roman Schweitzer 2024-01-05 23:40:03 +01:00 committed by GitHub
parent 13a385c2e6
commit 5f4ed31fec
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 68 deletions

View File

@ -15,6 +15,8 @@
# limitations under the License. # limitations under the License.
# #
source "$HOME/.sdkman/bin/sdkman-init.sh"
sdk use java 21.0.1-graalce 1>&2
JAVA_OPTS="-Xmx4G" JAVA_OPTS="-Xmx8G -Xms2G"
time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_truelive time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_truelive

View File

@ -23,6 +23,7 @@ import java.nio.channels.FileChannel;
import java.util.*; import java.util.*;
import java.util.concurrent.atomic.DoubleAccumulator; import java.util.concurrent.atomic.DoubleAccumulator;
import java.util.concurrent.atomic.LongAdder; import java.util.concurrent.atomic.LongAdder;
import java.util.stream.Collector;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import java.util.stream.StreamSupport; import java.util.stream.StreamSupport;
@ -31,31 +32,19 @@ public class CalculateAverage_truelive {
private static final String FILE = "./measurements.txt"; private static final String FILE = "./measurements.txt";
private static final long CHUNK_SIZE = 1024 * 1024 * 10L; private static final long CHUNK_SIZE = 1024 * 1024 * 10L;
private static int branchlessParseInt(final byte[] input, final int length) { private static double getDouble(final byte[] arr, int pos) {
// 0 if positive, 1 if negative final int negative = ~(arr[pos] >> 4) & 1;
final int negative = ~(input[0] >> 4) & 1; int sig = 1;
// 0 if nr length is 3, 1 if length is 4 sig -= 2 * negative;
final int has4 = ((length - negative) >> 2) & 1; pos += negative;
final int digit1 = arr[pos] - '0';
final int digit1 = input[negative] - '0'; pos++;
final int digit2 = input[negative + has4] - '0'; if (arr[pos] == '.') {
final int digit3 = input[2 + negative + has4] - '0'; return sig * (digit1 + (arr[pos + 1] - '0') / 10.0);
return (-negative ^ (has4 * (digit1 * 100) + digit2 * 10 + digit3) - negative);
} }
else {
// branchless max (unprecise for large numbers, but good enough) return sig * (digit1 * 10 + (arr[pos] - '0') + (arr[pos + 2] - '0') / 10.0);
static int max(final int a, final int b) {
final int diff = a - b;
final int dsgn = diff >> 31;
return a - (diff & dsgn);
} }
// branchless min (unprecise for large numbers, but good enough)
static int min(final int a, final int b) {
final int diff = a - b;
final int dsgn = diff >> 31;
return b + (diff & dsgn);
} }
private record Measurement(DoubleAccumulator min, DoubleAccumulator max, DoubleAccumulator sum, LongAdder count) { private record Measurement(DoubleAccumulator min, DoubleAccumulator max, DoubleAccumulator sum, LongAdder count) {
@ -104,16 +93,6 @@ public class CalculateAverage_truelive {
} }
} }
private static Map<String, Measurement> combineMaps(
final Map<String, Measurement> map1,
final Map<String, Measurement> map2) {
for (final var entry : map2.entrySet()) {
map1.merge(entry.getKey(), entry.getValue(), Measurement::combineWith);
}
return map1;
}
public static void main(final String[] args) throws IOException { public static void main(final String[] args) throws IOException {
// long before = System.currentTimeMillis(); // long before = System.currentTimeMillis();
/** /**
@ -152,23 +131,17 @@ public class CalculateAverage_truelive {
} }
} }
}; };
final Map<String, Measurement> reduce = StreamSupport.stream(Spliterators.spliteratorUnknownSize( final Map<String, Measurement> reduce = StreamSupport.stream(Spliterators.spliteratorUnknownSize(
iterator, Spliterator.IMMUTABLE), true) iterator, Spliterator.IMMUTABLE), true)
.parallel()
.map(CalculateAverage_truelive::parseBuffer) .map(CalculateAverage_truelive::parseBuffer)
.reduce(CalculateAverage_truelive::combineMaps).get(); .flatMap(map -> map.entrySet().stream())
.collect(Collectors.toMap(
System.out.print("{"); Map.Entry::getKey,
System.out.print( Map.Entry::getValue,
reduce Measurement::combineWith,
.entrySet() TreeMap::new));
.stream() System.out.println(reduce);
.sorted(Map.Entry.comparingByKey())
.map(Object::toString)
.collect(Collectors.joining(", ")));
System.out.println("}");
// System.out.println("Took: " + (System.currentTimeMillis() - before));
} }
@ -178,28 +151,25 @@ public class CalculateAverage_truelive {
bug.mark(); bug.mark();
String name = null; String name = null;
final byte[] arr = new byte[128]; final byte[] arr = new byte[128];
int cur = 0;
while (bug.hasRemaining()) { while (bug.hasRemaining()) {
final char c = (char) bug.get(); char c = (char) bug.get();
if (c == ';') { arr[cur++] = (byte) c;
final int pos = bug.position(); while (c != ';') {
bug.reset(); c = (char) bug.get();
final int len = pos - bug.position() - 1; arr[cur++] = (byte) c;
bug.get(bug.position(), arr, 0, len);
name = new String(arr, 0, len);
bug.position(pos);
bug.mark();
} }
else if (c == '\n') { name = new String(arr, 0, cur - 1);
final int pos = bug.position(); cur = 0;
bug.reset(); while (c != '\n') {
final int len = pos - bug.position(); c = (char) bug.get();
bug.get(bug.position(), arr, 0, len); arr[cur++] = (byte) c;
final double temp = Double.parseDouble(new String(arr, 0, len)); }
final double temp = getDouble(arr, 0);
resultMap.compute(name, (k, v) -> (v == null) ? Measurement.of(temp) : v.add(temp)); resultMap.compute(name, (k, v) -> (v == null) ? Measurement.of(temp) : v.add(temp));
bug.position(pos); cur = 0;
bug.mark();
}
} }
return resultMap; return resultMap;
} }
} }