diff --git a/calculate_average_seijikun.sh b/calculate_average_seijikun.sh new file mode 100755 index 0000000..fbe68ad --- /dev/null +++ b/calculate_average_seijikun.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +JAVA_OPTS="--enable-preview" +time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_seijikun diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_seijikun.java b/src/main/java/dev/morling/onebrc/CalculateAverage_seijikun.java new file mode 100644 index 0000000..8038c1a --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_seijikun.java @@ -0,0 +1,221 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.*; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.util.TreeMap; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +public class CalculateAverage_seijikun { + + private static final String FILE = "./measurements.txt"; + + + private static class MeasurementAggregator { + private double min = Double.POSITIVE_INFINITY; + private double max = Double.NEGATIVE_INFINITY; + private double sum; + private long count; + + public void printInto(PrintStream out) { + out.printf("%.1f/%.1f/%.1f", min, (sum / (double) count), max); + } + } + + public static class StationIdent implements Comparable { + private final int nameLength; + private final String name; + private final int nameHash; + + public StationIdent(byte[] name, int nameHash) { + this.nameLength = name.length; + this.name = new String(name); + this.nameHash = nameHash; + } + + @Override + public int hashCode() { + return nameHash; + } + + @Override + public boolean equals(Object obj) { + var other = (StationIdent) obj; + if (other.nameLength != nameLength) { + return false; + } + return name.equals(other.name); + } + + @Override + public int compareTo(StationIdent o) { + return name.compareTo(o.name); + } + } + + public static class ChunkReader implements Runnable { + RandomAccessFile file; + + // Start offset of this chunk + private final long startOffset; + // end offset of this chunk + private final long endOffset; + + // state + private MappedByteBuffer buffer = null; + private int ptr = 0; + private TreeMap workSet; + + public ChunkReader(RandomAccessFile file, long startOffset, long endOffset) { + this.file = file; + this.startOffset = startOffset; + this.endOffset = endOffset; + } + + private StationIdent readStationName() { + int startPtr = ptr; + int hashCode = 0; + int hashBytePtr = 0; + byte c; + while ((c = buffer.get(ptr++)) != ';') { + hashCode ^= ((int) c) << (hashBytePtr * 8); + hashBytePtr = (hashBytePtr + 1) % 4; + } + byte[] stationNameBfr = new byte[ptr - startPtr - 1]; + buffer.get(startPtr, stationNameBfr); + return new StationIdent(stationNameBfr, hashCode); + } + + private double readTemperature() { + double ret = 0, div = 1; + byte c = buffer.get(ptr++); + boolean neg = (c == '-'); + if (neg) + c = buffer.get(ptr++); + + do { + ret = ret * 10 + c - '0'; + } while ((c = buffer.get(ptr++)) >= '0' && c <= '9'); + + if (c == '.') { + while ((c = buffer.get(ptr++)) != '\n') { + ret += (c - '0') / (div *= 10); + } + } + + if (neg) + return -ret; + return ret; + } + + @Override + public void run() { + workSet = new TreeMap<>(); + int chunkSize = (int) (endOffset - startOffset); + try { + buffer = file.getChannel().map(FileChannel.MapMode.READ_ONLY, startOffset, chunkSize); + + while (ptr < chunkSize) { + var station = readStationName(); + var temp = readTemperature(); + var stationWorkSet = workSet.get(station); + if (stationWorkSet == null) { + stationWorkSet = new MeasurementAggregator(); + workSet.put(station, stationWorkSet); + } + stationWorkSet.min = Math.min(temp, stationWorkSet.min); + stationWorkSet.max = Math.max(temp, stationWorkSet.max); + stationWorkSet.sum += temp; + stationWorkSet.count += 1; + } + } + catch (IOException e) { + e.printStackTrace(); + throw new RuntimeException(e); + } + } + } + + public static void main(String[] args) throws IOException, InterruptedException { + RandomAccessFile file = new RandomAccessFile(FILE, "r"); + + int jobCnt = Runtime.getRuntime().availableProcessors(); + + var chunks = new ChunkReader[jobCnt]; + long chunkSize = file.length() / jobCnt; + long chunkStartPtr = 0; + byte[] tmpBuffer = new byte[128]; + for (int i = 0; i < jobCnt; ++i) { + long chunkEndPtr = chunkStartPtr + chunkSize; + if (i != (jobCnt - 1)) { // align chunks to newlines + file.seek(chunkEndPtr - 1); + file.read(tmpBuffer); + int offset = 0; + while (tmpBuffer[offset] != '\n') { + offset += 1; + } + chunkEndPtr += offset; + } + else { // last chunk ends at file end + chunkEndPtr = file.length(); + } + chunks[i] = new ChunkReader(file, chunkStartPtr, chunkEndPtr); + chunkStartPtr = chunkEndPtr; + } + + try(var executor = Executors.newFixedThreadPool(jobCnt)) { + for (int i = 0; i < jobCnt; ++i) { + executor.submit(chunks[i]); + } + executor.shutdown(); + var ignored = executor.awaitTermination(1, TimeUnit.DAYS); + } + + // merge chunks + var result = chunks[0].workSet; + for (int i = 1; i < jobCnt; ++i) { + chunks[i].workSet.forEach((ident, otherStationWorkSet) -> { + var stationWorkSet = result.get(ident); + if (stationWorkSet == null) { + result.put(ident, otherStationWorkSet); + } + else { + stationWorkSet.min = Math.min(stationWorkSet.min, otherStationWorkSet.min); + stationWorkSet.max = Math.max(stationWorkSet.max, otherStationWorkSet.max); + stationWorkSet.sum += otherStationWorkSet.sum; + stationWorkSet.count += otherStationWorkSet.count; + } + }); + } + + // print in required format + System.out.write('{'); + var iterator = result.entrySet().iterator(); + while (iterator.hasNext()) { + var entry = iterator.next(); + System.out.print(entry.getKey().name); + System.out.write('='); + entry.getValue().printInto(System.out); + if (iterator.hasNext()) { + System.out.print(", "); + } + } + System.out.println('}'); + } +}