diff --git a/calculate_average_imrafaelmerino.sh b/calculate_average_imrafaelmerino.sh new file mode 100755 index 0000000..db88d62 --- /dev/null +++ b/calculate_average_imrafaelmerino.sh @@ -0,0 +1,24 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.1-graal 1>&2 +java -version +JAVA_OPTS="" +CHUNK_SIZE=$((8 * 1024 * 1024)) +time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar \ +dev.morling.onebrc.CalculateAverage_imrafaelmerino $CHUNK_SIZE diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_imrafaelmerino.java b/src/main/java/dev/morling/onebrc/CalculateAverage_imrafaelmerino.java new file mode 100644 index 0000000..8bded77 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_imrafaelmerino.java @@ -0,0 +1,293 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.FileChannel.MapMode; +import java.nio.file.Paths; +import java.nio.file.StandardOpenOption; +import java.util.*; +import java.util.concurrent.ForkJoinPool; +import java.util.function.Supplier; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +/** + * @author Rafael Merino GarcĂa + * + *
+ * + * Model Name: MacBook Pro + * Model Identifier: MacBookPro17,1 + * Chip: Apple M1 + * Total Number of Cores: 8 (4 performance and 4 efficiency) + * Memory: 16 GB + * + * Executed 10 times in my machine with a chunk size of 20MB + * + * 21.0.1-graal + * avg: 15,366 sg | min: 14,878 sg | max: 15,937 sg | acc: 153,657 sg | times: 10 + * + * 21-oracle + * avg: 17,032 sg | min: 16,448 sg | max: 17,424 sg | acc: 170,325 sg | times: 10 + * + * + * + * Credits: + * . bjhara: Really nice splitearator to be able to use the Stream API. + * . ebarlas: working with integers since we only have to consider one decimal + * (I don't think this makes a big difference though) + * . filiphr: It was my starting point, since it's the most natural way of approaching + * the problem using the nice spliterartor from bjhara. This solution has the potential + * for substantial improvement by actively pursuing a+ * + * + *
higher level of parallelization
. + *
+ * Generalization Note: + * + * - This solution is designed to be applicable to any CSV file under the following assumptions: + * + * - The line schema follows the pattern: name;value\n + * + * - The name is up to 128 characters (can be changed to hold any other size and irrelevant for the result) + * + * - The value is a decimal number with only one decimal digit. + * + * - The focus is on maintaining code simplicity without extreme optimization efforts, + * as achieving meaningful conclusions often requires substantial time and dedication, + * particularly with tools like JMH. + * + * - Emphasis on utilizing idiomatic Java and common data structures, following a pragmatic approach. + * + * - Addressing the question of whether the workload is CPU-bound or IO-bound is key; indications suggest + * both aspects are relevant. It's difficult to make the cores sweat! The observed trend in many solutions + * suggests the potential for increased parallelization to fully utilize multiple cores effectively. + * This solution brings to the table the Java class ManagedBlock, aiming to enhance parallelism in scenarios + * where threads from the Fork Join Pool are blocked. + * + * - Commong guys! stop rolling the dice with fancy optimizations and reiventing hash maps structures and + * hash algorithms. This should be hard fun + * and not tedious. Dont get me wrong! just an opinion :) + * + * - Last but not least, Gunnar Morling, you rock man! Thanks for your time and effort. + * + * - + * + *+ */ +public class CalculateAverage_imrafaelmerino { + + private static final String FILE = "./measurements.txt"; + private static final int FIELD_SIZE = 128; + + public static void main(String[] args) throws IOException { + var chunkSize = Long.parseLong(args[0].trim()); + var result = calculateStats(FILE, chunkSize); + System.out.println(result); + } + + private static Map