From 7816e32b7bdbb991c003bb6573ddc3c029723afa Mon Sep 17 00:00:00 2001 From: Gunnar Morling Date: Sun, 14 Jan 2024 09:20:32 +0100 Subject: [PATCH] #49 Fixing rounding behavior of baseline implementation --- README.md | 1 + ...late_average_baseline_original_rounding.sh | 19 ++++ .../onebrc/CalculateAverage_baseline.java | 3 +- ...ateAverage_baseline_original_rounding.java | 100 ++++++++++++++++++ .../samples/measurements-rounding.out | 2 +- .../samples/measurements-rounding.txt | 4 + 6 files changed, 127 insertions(+), 2 deletions(-) create mode 100755 calculate_average_baseline_original_rounding.sh create mode 100644 src/main/java/dev/morling/onebrc/CalculateAverage_baseline_original_rounding.java diff --git a/README.md b/README.md index 16b1c03..b9dada5 100644 --- a/README.md +++ b/README.md @@ -222,6 +222,7 @@ If you want to use a build not available via these channels, reach out to discus * There is a maximum of 10,000 unique station names * Line endings in the file are `\n` characters on all platforms * Implementations must not rely on specifics of a given data set, e.g. any valid station name as per the constraints above and any data distribution (number of measurements per station) must be supported +* The rounding of output values must be done using the semantics of IEEE 754 rounding-direction "roundTowardPositive" ## Entering the Challenge diff --git a/calculate_average_baseline_original_rounding.sh b/calculate_average_baseline_original_rounding.sh new file mode 100755 index 0000000..871559e --- /dev/null +++ b/calculate_average_baseline_original_rounding.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_baseline_original_rounding diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_baseline.java b/src/main/java/dev/morling/onebrc/CalculateAverage_baseline.java index 0d7c586..83b684e 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_baseline.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_baseline.java @@ -35,6 +35,7 @@ public class CalculateAverage_baseline { } private static record ResultRow(double min, double mean, double max) { + public String toString() { return round(min) + "/" + round(mean) + "/" + round(max); } @@ -79,7 +80,7 @@ public class CalculateAverage_baseline { return res; }, agg -> { - return new ResultRow(agg.min, agg.sum / agg.count, agg.max); + return new ResultRow(agg.min, (Math.round(agg.sum * 10.0) / 10.0) / agg.count, agg.max); }); Map measurements = new TreeMap<>(Files.lines(Paths.get(FILE)) diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_baseline_original_rounding.java b/src/main/java/dev/morling/onebrc/CalculateAverage_baseline_original_rounding.java new file mode 100644 index 0000000..9e5e3ad --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_baseline_original_rounding.java @@ -0,0 +1,100 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import static java.util.stream.Collectors.*; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Map; +import java.util.TreeMap; +import java.util.stream.Collector; + +/** + * This is the original version of the baseline implementation. It contains a + * rounding bug, which can cause calculated mean values to be off by 0.1. See + * {@link CalculateAverage_baseline} for the correct behavior. This version here + * is only kept for reference, in particular for determining whether an + * implementation is valid with the old behavior. Any new or updated entries to + * the challenge must conform to the correct behavior as implemented by + * {@code CalculateAverage_baseline}. + */ +public class CalculateAverage_baseline_original_rounding { + + private static final String FILE = "./measurements.txt"; + + private static record Measurement(String station, double value) { + private Measurement(String[] parts) { + this(parts[0], Double.parseDouble(parts[1])); + } + } + + private static record ResultRow(double min, double mean, double max) { + public String toString() { + return round(min) + "/" + round(mean) + "/" + round(max); + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + }; + + private static class MeasurementAggregator { + private double min = Double.POSITIVE_INFINITY; + private double max = Double.NEGATIVE_INFINITY; + private double sum; + private long count; + } + + public static void main(String[] args) throws IOException { + // Map measurements1 = Files.lines(Paths.get(FILE)) + // .map(l -> l.split(";")) + // .collect(groupingBy(m -> m[0], averagingDouble(m -> Double.parseDouble(m[1])))); + // + // measurements1 = new TreeMap<>(measurements1.entrySet() + // .stream() + // .collect(toMap(e -> e.getKey(), e -> Math.round(e.getValue() * 10.0) / 10.0))); + // System.out.println(measurements1); + + Collector collector = Collector.of( + MeasurementAggregator::new, + (a, m) -> { + a.min = Math.min(a.min, m.value); + a.max = Math.max(a.max, m.value); + a.sum += m.value; + a.count++; + }, + (agg1, agg2) -> { + var res = new MeasurementAggregator(); + res.min = Math.min(agg1.min, agg2.min); + res.max = Math.max(agg1.max, agg2.max); + res.sum = agg1.sum + agg2.sum; + res.count = agg1.count + agg2.count; + + return res; + }, + agg -> { + return new ResultRow(agg.min, agg.sum / agg.count, agg.max); + }); + + Map measurements = new TreeMap<>(Files.lines(Paths.get(FILE)) + .map(l -> new Measurement(l.split(";"))) + .collect(groupingBy(m -> m.station(), collector))); + + System.out.println(measurements); + } +} diff --git a/src/test/resources/samples/measurements-rounding.out b/src/test/resources/samples/measurements-rounding.out index f8ae466..4116f3b 100644 --- a/src/test/resources/samples/measurements-rounding.out +++ b/src/test/resources/samples/measurements-rounding.out @@ -1 +1 @@ -{jel=-9.0/17.9/46.5} +{ham=14.6/25.5/33.6, jel=-9.0/18.0/46.5} diff --git a/src/test/resources/samples/measurements-rounding.txt b/src/test/resources/samples/measurements-rounding.txt index 6b74ad0..844af4a 100644 --- a/src/test/resources/samples/measurements-rounding.txt +++ b/src/test/resources/samples/measurements-rounding.txt @@ -1,3 +1,7 @@ +ham;33.6 +ham;31.7 +ham;21.9 +ham;14.6 jel;18.6 jel;12.8 jel;20.7