Adding Nick Palmer's submission;
* Memory mapped file, single-pass parsing, custom hash map, fixed thread pool The threading was a hasty addition and needs work * Used arraylist instead of treemap to reduce a little overhead We only need it sorted for output, so only construct a treemap for output * Attempt to speed up double conversion * Cap core count for low-core systems * Fix wrong exponent * Accumulate measurement value in double, seems marginally faster Benchmark Mode Cnt Score Error Units DoubleParsingBenchmark.ourToDouble thrpt 10 569.771 ± 7.065 ops/us DoubleParsingBenchmark.ourToDoubleAccumulateInToDouble thrpt 10 648.026 ± 7.741 ops/us DoubleParsingBenchmark.ourToDoubleDivideInsteadOfMultiply thrpt 10 570.412 ± 9.329 ops/us DoubleParsingBenchmark.ourToDoubleNegative thrpt 10 512.618 ± 8.580 ops/us DoubleParsingBenchmark.ourToDoubleNegativeAccumulateInToDouble thrpt 10 565.043 ± 18.137 ops/us DoubleParsingBenchmark.ourToDoubleNegativeDivideInsteadOfMultiply thrpt 10 511.228 ± 13.967 ops/us DoubleParsingBenchmark.stringToDouble thrpt 10 52.310 ± 1.351 ops/us DoubleParsingBenchmark.stringToDoubleNegative thrpt 10 50.785 ± 1.252 ops/us
This commit is contained in:
		
							
								
								
									
										20
									
								
								calculate_average_palmr.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										20
									
								
								calculate_average_palmr.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,20 @@ | |||||||
|  | #!/bin/sh | ||||||
|  | # | ||||||
|  | #  Copyright 2023 The original authors | ||||||
|  | # | ||||||
|  | #  Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  | #  you may not use this file except in compliance with the License. | ||||||
|  | #  You may obtain a copy of the License at | ||||||
|  | # | ||||||
|  | #      http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|  | # | ||||||
|  | #  Unless required by applicable law or agreed to in writing, software | ||||||
|  | #  distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  | #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  | #  See the License for the specific language governing permissions and | ||||||
|  | #  limitations under the License. | ||||||
|  | # | ||||||
|  |  | ||||||
|  |  | ||||||
|  | JAVA_OPTS="--enable-preview" | ||||||
|  | time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_palmr | ||||||
							
								
								
									
										250
									
								
								src/main/java/dev/morling/onebrc/CalculateAverage_palmr.java
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										250
									
								
								src/main/java/dev/morling/onebrc/CalculateAverage_palmr.java
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,250 @@ | |||||||
|  | /* | ||||||
|  |  *  Copyright 2023 The original authors | ||||||
|  |  * | ||||||
|  |  *  Licensed under the Apache License, Version 2.0 (the "License"); | ||||||
|  |  *  you may not use this file except in compliance with the License. | ||||||
|  |  *  You may obtain a copy of the License at | ||||||
|  |  * | ||||||
|  |  *      http://www.apache.org/licenses/LICENSE-2.0 | ||||||
|  |  * | ||||||
|  |  *  Unless required by applicable law or agreed to in writing, software | ||||||
|  |  *  distributed under the License is distributed on an "AS IS" BASIS, | ||||||
|  |  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||||
|  |  *  See the License for the specific language governing permissions and | ||||||
|  |  *  limitations under the License. | ||||||
|  |  */ | ||||||
|  | package dev.morling.onebrc; | ||||||
|  |  | ||||||
|  | import java.io.IOException; | ||||||
|  | import java.io.RandomAccessFile; | ||||||
|  | import java.nio.ByteBuffer; | ||||||
|  | import java.nio.channels.FileChannel; | ||||||
|  | import java.nio.charset.StandardCharsets; | ||||||
|  | import java.util.*; | ||||||
|  |  | ||||||
|  | public class CalculateAverage_palmr { | ||||||
|  |  | ||||||
|  |     private static final String FILE = "./measurements.txt"; | ||||||
|  |     public static final int CHUNK_SIZE = 1024 * 1024 * 10; // Trial and error showed ~10MB to be a good size on our machine | ||||||
|  |     public static final int LITTLE_CHUNK_SIZE = 128; // Enough bytes to cover a station name and measurement value :fingers-crossed: | ||||||
|  |     public static final int STATION_NAME_BUFFER_SIZE = 50; | ||||||
|  |     public static final int THREAD_COUNT = Math.min(8, Runtime.getRuntime().availableProcessors()); | ||||||
|  |  | ||||||
|  |     public static void main(String[] args) throws IOException { | ||||||
|  |  | ||||||
|  |         @SuppressWarnings("resource") // It's faster to leak the file than be well-behaved | ||||||
|  |         RandomAccessFile file = new RandomAccessFile(FILE, "r"); | ||||||
|  |         FileChannel channel = file.getChannel(); | ||||||
|  |         long fileSize = channel.size(); | ||||||
|  |  | ||||||
|  |         long threadChunk = fileSize / THREAD_COUNT; | ||||||
|  |  | ||||||
|  |         Thread[] threads = new Thread[THREAD_COUNT]; | ||||||
|  |         ByteArrayKeyedMap[] results = new ByteArrayKeyedMap[THREAD_COUNT]; | ||||||
|  |         for (int i = 0; i < THREAD_COUNT; i++) { | ||||||
|  |             final int j = i; | ||||||
|  |             long startPoint = j * threadChunk; | ||||||
|  |             long endPoint = startPoint + threadChunk; | ||||||
|  |             Thread thread = new Thread(() -> { | ||||||
|  |                 try { | ||||||
|  |                     results[j] = readAndParse(channel, startPoint, endPoint, fileSize); | ||||||
|  |                 } | ||||||
|  |                 catch (Throwable t) { | ||||||
|  |                     System.err.println("It's broken :("); | ||||||
|  |                     // noinspection CallToPrintStackTrace | ||||||
|  |                     t.printStackTrace(); | ||||||
|  |                 } | ||||||
|  |             }); | ||||||
|  |             threads[i] = thread; | ||||||
|  |             thread.start(); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         final Map<String, MeasurementAggregator> finalAggregator = new TreeMap<>(); | ||||||
|  |  | ||||||
|  |         for (int i = 0; i < THREAD_COUNT; i++) { | ||||||
|  |             try { | ||||||
|  |                 threads[i].join(); | ||||||
|  |             } | ||||||
|  |             catch (InterruptedException e) { | ||||||
|  |                 throw new RuntimeException(e); | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             results[i].getAsUnorderedList().forEach(v -> { | ||||||
|  |                 String stationName = new String(v.stationNameBytes, StandardCharsets.UTF_8); | ||||||
|  |                 finalAggregator.compute(stationName, (_, x) -> { | ||||||
|  |                     if (x == null) { | ||||||
|  |                         return v; | ||||||
|  |                     } | ||||||
|  |                     else { | ||||||
|  |                         x.count += v.count; | ||||||
|  |                         x.min = Math.min(x.min, v.min); | ||||||
|  |                         x.max = Math.max(x.max, v.max); | ||||||
|  |                         x.sum += v.sum; | ||||||
|  |                         return x; | ||||||
|  |                     } | ||||||
|  |                 }); | ||||||
|  |             }); | ||||||
|  |         } | ||||||
|  |         System.out.println(finalAggregator); | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     private static ByteArrayKeyedMap readAndParse(final FileChannel channel, | ||||||
|  |                                                   final long startPoint, | ||||||
|  |                                                   final long endPoint, | ||||||
|  |                                                   final long fileSize) { | ||||||
|  |         final State state = new State(); | ||||||
|  |  | ||||||
|  |         boolean skipFirstEntry = startPoint != 0; | ||||||
|  |  | ||||||
|  |         long offset = startPoint; | ||||||
|  |         while (offset < endPoint) { | ||||||
|  |             parseData(channel, state, offset, Math.min(CHUNK_SIZE, fileSize - offset), false, skipFirstEntry); | ||||||
|  |             skipFirstEntry = false; | ||||||
|  |             offset += CHUNK_SIZE; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         if (offset < fileSize) { | ||||||
|  |             // Make sure we finish reading any partially read entry by going a little in to the next chunk, stopping at the first newline | ||||||
|  |             parseData(channel, state, offset, Math.min(LITTLE_CHUNK_SIZE, fileSize - offset), true, false); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         return state.aggregators; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     private static void parseData(final FileChannel channel, | ||||||
|  |                                   final State state, | ||||||
|  |                                   final long offset, | ||||||
|  |                                   final long bufferSize, | ||||||
|  |                                   final boolean stopAtNewline, | ||||||
|  |                                   final boolean skipFirstEntry) { | ||||||
|  |         ByteBuffer byteBuffer; | ||||||
|  |         try { | ||||||
|  |             byteBuffer = channel.map(FileChannel.MapMode.READ_ONLY, offset, bufferSize); | ||||||
|  |         } | ||||||
|  |         catch (IOException e) { | ||||||
|  |             throw new RuntimeException(e); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         boolean isSkippingToFirstCleanEntry = skipFirstEntry; | ||||||
|  |  | ||||||
|  |         while (byteBuffer.hasRemaining()) { | ||||||
|  |             byte currentChar = byteBuffer.get(); | ||||||
|  |  | ||||||
|  |             if (isSkippingToFirstCleanEntry) { | ||||||
|  |                 if (currentChar == '\n') { | ||||||
|  |                     isSkippingToFirstCleanEntry = false; | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 continue; | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             if (currentChar == ';') { | ||||||
|  |                 state.parsingValue = true; | ||||||
|  |             } | ||||||
|  |             else if (currentChar == '\n') { | ||||||
|  |                 if (state.stationPointerEnd != 0) { | ||||||
|  |                     double value = state.measurementValue * state.exponent; | ||||||
|  |  | ||||||
|  |                     MeasurementAggregator aggregator = state.aggregators.computeIfAbsent(state.stationBuffer, state.stationPointerEnd, state.signedHashCode); | ||||||
|  |                     aggregator.count++; | ||||||
|  |                     aggregator.min = Math.min(aggregator.min, value); | ||||||
|  |                     aggregator.max = Math.max(aggregator.max, value); | ||||||
|  |                     aggregator.sum += value; | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 if (stopAtNewline) { | ||||||
|  |                     return; | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 // reset | ||||||
|  |                 state.reset(); | ||||||
|  |             } | ||||||
|  |             else { | ||||||
|  |                 if (!state.parsingValue) { | ||||||
|  |                     state.stationBuffer[state.stationPointerEnd++] = currentChar; | ||||||
|  |                     state.signedHashCode = 31 * state.signedHashCode + (currentChar & 0xff); | ||||||
|  |                 } | ||||||
|  |                 else { | ||||||
|  |                     if (currentChar == '-') { | ||||||
|  |                         state.exponent = -0.1; | ||||||
|  |                     } | ||||||
|  |                     else if (currentChar != '.') { | ||||||
|  |                         state.measurementValue = state.measurementValue * 10 + (currentChar - '0'); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     static final class State { | ||||||
|  |         ByteArrayKeyedMap aggregators = new ByteArrayKeyedMap(); | ||||||
|  |         boolean parsingValue = false; | ||||||
|  |         byte[] stationBuffer = new byte[STATION_NAME_BUFFER_SIZE]; | ||||||
|  |         int signedHashCode = 0; | ||||||
|  |         int stationPointerEnd = 0; | ||||||
|  |         double measurementValue = 0; | ||||||
|  |         double exponent = 0.1; | ||||||
|  |  | ||||||
|  |         public void reset() { | ||||||
|  |             parsingValue = false; | ||||||
|  |             signedHashCode = 0; | ||||||
|  |             stationPointerEnd = 0; | ||||||
|  |             measurementValue = 0; | ||||||
|  |             exponent = 0.1; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     private static class MeasurementAggregator { | ||||||
|  |         final byte[] stationNameBytes; | ||||||
|  |         final int stationNameHashCode; | ||||||
|  |         private double min = Double.POSITIVE_INFINITY; | ||||||
|  |         private double max = Double.NEGATIVE_INFINITY; | ||||||
|  |         private double sum; | ||||||
|  |         private long count; | ||||||
|  |  | ||||||
|  |         public MeasurementAggregator(final byte[] stationNameBytes, final int stationNameHashCode) { | ||||||
|  |             this.stationNameBytes = stationNameBytes; | ||||||
|  |             this.stationNameHashCode = stationNameHashCode; | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         public String toString() { | ||||||
|  |             return round(min) + "/" + round(sum / count) + "/" + round(max); | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         private double round(double value) { | ||||||
|  |             return Math.round(value * 10.0) / 10.0; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     private static class ByteArrayKeyedMap { | ||||||
|  |         private final int BUCKET_COUNT = 0xFFF; // 413 unique stations in the data set, & 0xFFF ~= 399 (only 14 collisions (given our hashcode implementation)) | ||||||
|  |         private final MeasurementAggregator[] buckets = new MeasurementAggregator[BUCKET_COUNT + 1]; | ||||||
|  |         private final List<MeasurementAggregator> compactUnorderedBuckets = new ArrayList<>(413); | ||||||
|  |  | ||||||
|  |         public MeasurementAggregator computeIfAbsent(final byte[] key, final int keyLength, final int keyHashCode) { | ||||||
|  |             int index = keyHashCode & BUCKET_COUNT; | ||||||
|  |  | ||||||
|  |             while (true) { | ||||||
|  |                 MeasurementAggregator maybe = buckets[index]; | ||||||
|  |                 if (maybe == null) { | ||||||
|  |                     final byte[] copiedKey = Arrays.copyOf(key, keyLength); | ||||||
|  |                     MeasurementAggregator measurementAggregator = new MeasurementAggregator(copiedKey, keyHashCode); | ||||||
|  |                     buckets[index] = measurementAggregator; | ||||||
|  |                     compactUnorderedBuckets.add(measurementAggregator); | ||||||
|  |                     return measurementAggregator; | ||||||
|  |                 } | ||||||
|  |                 else { | ||||||
|  |                     if (Arrays.equals(key, 0, keyLength, maybe.stationNameBytes, 0, maybe.stationNameBytes.length)) { | ||||||
|  |                         return maybe; | ||||||
|  |                     } | ||||||
|  |                     index++; | ||||||
|  |                     index &= BUCKET_COUNT; | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         public List<MeasurementAggregator> getAsUnorderedList() { | ||||||
|  |             return compactUnorderedBuckets; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user