diff --git a/calculate_average_kevinmcmurtrie.sh b/calculate_average_kevinmcmurtrie.sh new file mode 100755 index 0000000..417bf14 --- /dev/null +++ b/calculate_average_kevinmcmurtrie.sh @@ -0,0 +1,23 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Uncomment below to use sdk +# source "$HOME/.sdkman/bin/sdkman-init.sh" +# sdk use java 21.0.1-graal 1>&2 + +JAVA_OPTS="" +time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_kevinmcmurtrie $1 diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_kevinmcmurtrie.java b/src/main/java/dev/morling/onebrc/CalculateAverage_kevinmcmurtrie.java new file mode 100644 index 0000000..367f679 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_kevinmcmurtrie.java @@ -0,0 +1,516 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.ByteArrayInputStream; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Spliterator; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ForkJoinPool; +import java.util.concurrent.Future; +import java.util.function.Consumer; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +/** + * Kevin McMurtrie https://github.com/kevinmcmurtrie + *
+ * Code challenge submission for https://github.com/gunnarmorling/1brc
+ */
+public class CalculateAverage_kevinmcmurtrie implements AutoCloseable {
+ private static final String FILE = "./measurements.txt";
+ private static final Charset CHARSET = StandardCharsets.UTF_8;
+ private static final int THREADS = Runtime.getRuntime().availableProcessors() + 2;
+
+ // This is used for push-back when fitting buffers to the last line break
+ private static final int MAX_LINE_LENGTH = 1024;
+
+ private static final int READ_CHUNK_SIZE = 4 * 1024 * 1024;
+ private static final int CHAR_CHUNK_SIZE = Math.max(MAX_LINE_LENGTH, 64 * 1024);
+
+ // Internal array size for hashing cities
+ private static final int HASH_BUCKETS = 1039;
+
+ // Fixed-point number parameters
+ private static final int DIGITS_AFTER_DECIMAL_POINT_INPUT = 1;
+ private static final int DIGITS_AFTER_DECIMAL_POINT_OUTPUT = 1;
+
+ // City and Temperature delimiter
+ private static final char DELIMITER = ';';
+
+ private final LineAlignedInput in; // Must synchronize on this when multiple threads are reading. Use fillFromFile().
+
+ public static class Accumulator {
+ private static final Comparator
+ *
+ */
+ public static class Element {
+ private final char[] line;
+ private final int hashCode;
+ private final Element collision;
+ private long min = 0;
+ private long max = 0;
+ private long sum = 0;
+ private long count = 0;
+
+ public Element(final Element collision, final char[] line, final int hashCode, final long value) {
+ this.collision = collision;
+ this.line = line;
+ this.hashCode = hashCode;
+ min = value;
+ max = value;
+ sum = value;
+ count = 1;
+ }
+
+ public Element(final Element collision, final Element src) {
+ this.collision = collision;
+ this.line = src.line;
+ this.hashCode = src.hashCode;
+ min = src.min;
+ max = src.max;
+ sum = src.sum;
+ count = src.count;
+ }
+
+ void accumulate(final long value) {
+ min = Math.min(value, min);
+ max = Math.max(value, max);
+ sum = Math.addExact(sum, value);
+ count++;
+ }
+
+ private void merge(final Element a) {
+ min = Math.min(a.min, min);
+ max = Math.max(a.max, max);
+ sum = Math.addExact(sum, a.sum);
+ count += a.count;
+ }
+
+ @Override
+ /**
+ * City=min/avg/max
+ */
+ public String toString() {
+ return new String(line) + '=' + fixedToString(min) + "/" + fixedToString((double) sum / count) + "/" + fixedToString(max);
+ }
+ }
+
+ /**
+ * Hasher that operates on a buffer without generating a substring
+ */
+ static private final int hasher(final char[] buf, final int start, final int end) {
+ int hc = buf[start];
+ for (int i = start + 1; i < end; ++i) {
+ hc = hc * 31 + buf[i];
+ }
+ return hc & Integer.MAX_VALUE;
+ }
+
+ /**
+ * Tests if a pre-calculated hash and buffer area match an Element
+ */
+ static boolean matches(final Element o, final char[] str, final int start, final int end, final int hashCode) {
+ return (hashCode == o.hashCode) && Arrays.equals(o.line, 0, o.line.length, str, start, end);
+ }
+
+ static boolean matches(final Element a, final Element b) {
+ return (a.hashCode == b.hashCode) && Arrays.equals(a.line, b.line);
+ }
+
+ /**
+ * Merge another Accumulator into this one
+ *
+ * @param src
+ */
+ void merge(final Accumulator src) {
+ for (final Element srcElementHead : src.buckets) {
+ for (Element srcElem = srcElementHead; srcElem != null; srcElem = srcElem.collision) {
+ final int idx = srcElem.hashCode % buckets.length;
+
+ final Element elementHead = buckets[idx];
+ boolean found = false;
+ for (Element e = elementHead; e != null; e = e.collision) {
+ if (matches(e, srcElem)) {
+ e.merge(srcElem);
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ buckets[idx] = new Element(elementHead, srcElem);
+ }
+ }
+ }
+ }
+
+ /**
+ * Accumulate a weather string
+ *
+ * @param str City;temperature
+ */
+ void accumulate(final char[] buf, final int delimiterPos, final int start, final int end) {
+ final long value = readFixed(buf, delimiterPos + 1, end);
+
+ final int hc = hasher(buf, start, delimiterPos);
+ final int idx = hc % buckets.length;
+ final Element elementHead = buckets[idx];
+
+ for (Element e = elementHead; e != null; e = e.collision) {
+ if (matches(e, buf, start, delimiterPos, hc)) {
+ e.accumulate(value);
+ return;
+ }
+ }
+
+ buckets[idx] = new Element(elementHead, Arrays.copyOfRange(buf, start, delimiterPos), hc, value);
+ }
+
+ /**
+ * @return A stream of Element.toString() values.
+ */
+ public Stream