From 2155286d7a02ee3b92596d8905607277a0bcfbe7 Mon Sep 17 00:00:00 2001 From: Roy van Rijn Date: Mon, 1 Jan 2024 18:33:40 +0100 Subject: [PATCH] Initial implementation, using BufferedReader, parallel processing, combining everything in a single go, sorting afterwards (unoptimized) --- calculate_average.sh | 2 +- calculate_average_royvanrijn.sh | 20 +++++ .../dev/morling/onebrc/CalculateAverage.java | 16 ++-- .../onebrc/CalculateAverage_royvanrijn.java | 79 +++++++++++++++++++ 4 files changed, 108 insertions(+), 9 deletions(-) create mode 100755 calculate_average_royvanrijn.sh create mode 100644 src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java diff --git a/calculate_average.sh b/calculate_average.sh index 93cf5f2..5d36397 100755 --- a/calculate_average.sh +++ b/calculate_average.sh @@ -17,4 +17,4 @@ JAVA_OPTS="" -java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage +time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage diff --git a/calculate_average_royvanrijn.sh b/calculate_average_royvanrijn.sh new file mode 100755 index 0000000..ae22a3e --- /dev/null +++ b/calculate_average_royvanrijn.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +JAVA_OPTS="" +time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_royvanrijn diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage.java b/src/main/java/dev/morling/onebrc/CalculateAverage.java index c1b8258..b2f4e59 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage.java @@ -52,14 +52,14 @@ public class CalculateAverage { } public static void main(String[] args) throws IOException { -// Map measurements1 = Files.lines(Paths.get(FILE)) -// .map(l -> l.split(";")) -// .collect(groupingBy(m -> m[0], averagingDouble(m -> Double.parseDouble(m[1])))); -// -// measurements1 = new TreeMap<>(measurements1.entrySet() -// .stream() -// .collect(toMap(e -> e.getKey(), e -> Math.round(e.getValue() * 10.0) / 10.0))); -// System.out.println(measurements1); + // Map measurements1 = Files.lines(Paths.get(FILE)) + // .map(l -> l.split(";")) + // .collect(groupingBy(m -> m[0], averagingDouble(m -> Double.parseDouble(m[1])))); + // + // measurements1 = new TreeMap<>(measurements1.entrySet() + // .stream() + // .collect(toMap(e -> e.getKey(), e -> Math.round(e.getValue() * 10.0) / 10.0))); + // System.out.println(measurements1); Collector collector = Collector.of( MeasurementAggregator::new, diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java b/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java new file mode 100644 index 0000000..e9d1e19 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java @@ -0,0 +1,79 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.AbstractMap; +import java.util.Map; +import java.util.stream.Collectors; + +public class CalculateAverage_royvanrijn { + + private static final String FILE = "./measurements.txt"; + + private record Measurement(double min, double max, double sum, long count) { + + Measurement(double initialMeasurement) { + this(initialMeasurement, initialMeasurement, initialMeasurement, 1); + } + + public static Measurement combineWith(Measurement m1, Measurement m2) { + return new Measurement( + m1.min < m2.min ? m1.min : m2.min, + m1.max > m2.max ? m1.max : m2.max, + m1.sum + m2.sum, + m1.count + m2.count + ); + } + + public String toString() { + return round(min) + "/" + round(sum / count) + "/" + round(max); + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + } + + public static void main(String[] args) throws IOException { + +// long before = System.currentTimeMillis(); + + Map resultMap = Files.lines(Path.of(FILE)).parallel() + .map(record -> { + // Map to + int pivot = record.indexOf(";"); + String key = record.substring(0, pivot); + double measured = Double.parseDouble(record.substring(pivot + 1)); + return new AbstractMap.SimpleEntry<>(key, measured); + }) + .collect(Collectors.toConcurrentMap( + // Combine/reduce: + AbstractMap.SimpleEntry::getKey, + entry -> new Measurement(entry.getValue()), + Measurement::combineWith)); + + System.out.print("{"); + System.out.print( + resultMap.entrySet().stream().sorted(Map.Entry.comparingByKey()).map(Object::toString).collect(Collectors.joining(", "))); + System.out.println("}"); + +// System.out.println("Took: " + (System.currentTimeMillis() - before)); + + } +}