diff --git a/calculate_average_SamuelYvon.sh b/calculate_average_SamuelYvon.sh new file mode 100755 index 0000000..da03382 --- /dev/null +++ b/calculate_average_SamuelYvon.sh @@ -0,0 +1,19 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +JAVA_OPTS="--enable-preview" +java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_samuelyvon diff --git a/prepare_SamuelYvon.sh b/prepare_SamuelYvon.sh new file mode 100755 index 0000000..f83a3ff --- /dev/null +++ b/prepare_SamuelYvon.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +source "$HOME/.sdkman/bin/sdkman-init.sh" +sdk use java 21.0.1-graal 1>&2 diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_samuelyvon.java b/src/main/java/dev/morling/onebrc/CalculateAverage_samuelyvon.java new file mode 100644 index 0000000..10a2179 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_samuelyvon.java @@ -0,0 +1,249 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.io.RandomAccessFile; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.charset.StandardCharsets; +import java.util.*; +import java.util.stream.Collectors; + +/** + * Samuel Yvon's entry. + *
+ * Explanation behind my reasoning: + * - I want to make it as fast as possible without it being an unreadable mess; I want to avoid bit fiddling UTF-8 + * and use the provided facilities + * - I use the fact that we know the number of stations to optimize HashMap creation (75% rule) + * - I stole branch-less compare from royvanrijn + * - I assume valid ASCII encoding for the number part, which allows me to parse it manually + * (should hold for valid UTF-8) + * - I have not done Java in forever. Especially what the heck it's become. I've looked at the other submissions and + * the given sample to get inspiration. I did not even know about this Stream API thing. + *
+ * + *+ * Future ideas: + * - Probably can Vector-Apirize the number parsing (but it's three to four numbers, is it worth?) + *
+ * + *+ * Observations: + * - [2024-01-09] The branch-less code from royvarijn does not have a huge impact + *
+ * + *+ * Changelogs: + * 2024-01-09: Naive multi-threaded, no floats, manual line parsing + *
+ */ +public class CalculateAverage_samuelyvon { + + private static final String FILE = "./measurements.txt"; + + private static final int MAX_STATIONS = 10000; + + private static final byte SEMICOL = 0x3B; + + private static final byte MINUS = '-'; + + private static final byte ZERO = '0'; + + private static final byte NEWLINE = '\n'; + + // The minimum line length in bytes (over-egg.) + private static final int MIN_LINE_LENGTH_BYTES = 200; + + /** + * Branchless min (unprecise for large numbers, but good enough) + * + * @author royvanrijn + */ + private static int branchlessMax(final int a, final int b) { + final int diff = a - b; + final int dsgn = diff >> 31; + return a - (diff & dsgn); + } + + /** + * Branchless min (unprecise for large numbers, but good enough) + * + * @author royvanrijn + */ + private static int branchlessMin(final int a, final int b) { + final int diff = a - b; + final int dsgn = diff >> 31; + return b + (diff & dsgn); + } + + private static class StationMeasureAgg { + private int min; + private int max; + private long sum; + private long count; + + private final String city; + + public StationMeasureAgg(String city) { + // Actual numbers are between -99.9 and 99.9, but we *10 to avoid float + this.city = city; + min = 1000; + max = -1000; + sum = 0; + count = 0; + } + + public String city() { + return this.city; + } + + public StationMeasureAgg mergeWith(StationMeasureAgg other) { + min = branchlessMin(min, other.min); + max = branchlessMax(max, other.max); + + sum += other.sum; + count += other.count; + + return this; + } + + public void accumulate(int number) { + min = branchlessMin(min, number); + max = branchlessMax(max, number); + + sum += number; + count++; + } + + @Override + public String toString() { + double min = Math.round((double) this.min) / 10.0; + double max = Math.round((double) this.max) / 10.0; + double mean = Math.round((((double) this.sum / this.count))) / 10.0; + return min + "/" + mean + "/" + max; + } + } + + private static HashMap