diff --git a/calculate_average_agoncal.sh b/calculate_average_agoncal.sh new file mode 100755 index 0000000..9a295fc --- /dev/null +++ b/calculate_average_agoncal.sh @@ -0,0 +1,21 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# sdk use java 21.0.1-tem + +JAVA_OPTS="--enable-preview -XX:+UseShenandoahGC -XX:+UseStringDeduplication -da" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_agoncal \ No newline at end of file diff --git a/prepare_agoncal.sh b/prepare_agoncal.sh new file mode 100755 index 0000000..d2a3c6b --- /dev/null +++ b/prepare_agoncal.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.1-tem 1>&2 diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_agoncal.java b/src/main/java/dev/morling/onebrc/CalculateAverage_agoncal.java new file mode 100644 index 0000000..fe6a0a6 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_agoncal.java @@ -0,0 +1,153 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.BufferedReader; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Iterator; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.ConcurrentHashMap; + +/** + * This is the solution from GitHut Copilot Chat with the help of Antonio Goncalves (prompting and guiding, but trying not to change code directly on my own, always using Copilot). + *

+ * List of prompts that has been used: + *

+ * ============= + * ============= + * ============= + * v1 - 73603 ms + * You are entering The One Billion Row Challenge (1BRC) which is an exploration of how far modern Java can be pushed for aggregating one billion rows from a text file. Grab all the (virtual) threads, reach out to SIMD, optimize the GC, or pull any other trick, and create the fastest implementation for solving this task! + * The text file contains temperature values for a range of weather stations. Each row is one measurement in the format ;, with the measurement value having exactly one fractional digit. The following delimited with --- shows ten rows as an example: + * --- + * Hamburg;12.0 + * Bulawayo;8.9 + * Palembang;38.8 + * St. John's;15.2 + * Cracow;12.6 + * Bridgetown;26.9 + * Istanbul;6.2 + * Roseau;34.4 + * Conakry;31.2 + * Istanbul;23.0 + * --- + * You have to write a Java program which reads the file, calculates the min, mean, and max temperature value per weather station, and emits the results on stdout like the result below delimited by --- (i.e. sorted alphabetically by station name, and the result values per station in the format //, rounded to one fractional digit). Notice the curly braces: + * --- + * {Abha=-23.0/18.0/59.2, Abidjan=-16.2/26.0/67.3, Abéché=-10.0/29.4/69.0, Accra=-10.1/26.4/66.4, Addis Ababa=-23.7/16.0/67.0, Adelaide=-27.8/17.3/58.5, ...} + * --- + * You must use Java 21. + * Create an algorithm in any way you see fit including parallelizing the computation, using the (incubating) Vector API, memory-mapping different sections of the file concurrently, using AppCDS, GraalVM, CRaC, etc. for speeding up the application start-up, choosing and tuning the garbage collector, and much more. + * No external library dependencies may be used. + * ============= + * ============= + * ============= + * (Here I had to chat with Copilot about formatting the output, there were commas missing, the curly brackets were also missed) + * ============= + * ============= + * ============= + * v2 - 71831 ms + * Being written in Java 21, please use records instead of classes for Measurement. + * ============= + * ============= + * ============= + * v3 - 69333 ms + * If the temperatures are small numbers, why use double? Can't you use another datatype ? + *

+ * The profiler mentions that this line of code has very bad performance. Can you refactor it so it has better performance: + * --- + * String[] parts = line.split(";") + * --- + *

+ * There is a maximum of 10000 unique station names. Can you optimize the code taking this into account? + * ============= + * ============= + * ============= + * v4 - 56417 ms + * Which parameters can I pass to the JVM to make it run faster ? + * Which GC can I use and what is the most optimized to run CalculateAverage ? + */ +public class CalculateAverage_agoncal { + + private static final String FILE = "./measurements.txt"; + + record Measurement(String station, double temperature) { + } + + static class StationStats { + double min; + double max; + double sum; + int count; + + public StationStats(double temperature) { + this.min = temperature; + this.max = temperature; + this.sum = 0; + this.count = 0; + } + + synchronized void update(double temperature) { + min = Math.min(min, temperature); + max = Math.max(max, temperature); + sum += temperature; + count++; + } + + double getAverage() { + return round(sum) / count; + } + + @Override + public String toString() { + return String.format("%.1f/%.1f/%.1f", round(min), round(getAverage()), round(max)); + } + } + + public static void main(String[] args) throws IOException { + Map stats = new ConcurrentHashMap<>(10_000); + try (BufferedReader reader = Files.newBufferedReader(Paths.get(FILE))) { + reader.lines().parallel().forEach(line -> { + int separatorIndex = line.indexOf(';'); + String station = line.substring(0, separatorIndex); + String temperature = line.substring(separatorIndex + 1); + Measurement m = new Measurement(station, Double.parseDouble(temperature)); + stats.computeIfAbsent(m.station, k -> new StationStats(m.temperature)).update(m.temperature); + }); + } + + TreeMap sortedStats = new TreeMap<>(stats); + Iterator> iterator = sortedStats.entrySet().iterator(); + System.out.print("{"); + while (iterator.hasNext()) { + Map.Entry entry = iterator.next(); + StationStats s = entry.getValue(); + if (iterator.hasNext()) { + System.out.printf("%s=%s, ", entry.getKey(), s.toString()); + } + else { + System.out.printf("%s=%s", entry.getKey(), s.toString()); + } + } + System.out.println("}"); + } + + private static double round(double value) { + return Math.round(value * 10.0) / 10.0; + } +}