package dev.morling.onebrc; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.TreeMap; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ExecutorService; import static java.util.concurrent.Executors.newFixedThreadPool; /** * This solution uses an imperative approach. There's essentially two elements to performance tuning attempted in this * solution: * 1. Use a thread pool to process the file in chunks, setting it to the number of available processors * 2. Use a memory mapped file to read the file * * On an Intel(R) Core(TM) i9-10920X CPU @ 3.50GHz its taking around 38 seconds to aggregate the measurements. */ public class CalculateAverage_gnmathur { private static final ExecutorService es = newFixedThreadPool(Runtime.getRuntime().availableProcessors()); private static final Map result = new ConcurrentHashMap<>(); private static final String FILE_NAME = "measurements.txt"; private static final long CHUNK_SIZE = 1024 * 1024 * 1024; // 1 GB public static final class Measurement { private double max = -5000; // impossibly low private double min = 5000; // impossibly high private double sum = 0; private double count = 0; public Measurement() { } public synchronized void addReading(double reading) { this.min = Math.min(this.min, reading); this.max = Math.max(this.max, reading); this.sum += reading; this.count++; } private static double round(double value) { return Math.round(value * 10.0) / 10.0; } @Override public String toString() { double mean = sum / count; return round(min) + "/" + round(mean) + "/" + round(max); } } private static void updateMeasurement(String line) { String[] parts = line.split(";"); String station = parts[0]; double measurement = Double.parseDouble(parts[1]); Measurement m = null; if (result.containsKey(station)) { m = result.get(station); } else { m = new Measurement(); result.put(station, m); } m.addReading(measurement); } private record FileChunkProcessor(String fileName, ExecutorService es, long start, long end) implements Runnable { @Override public void run() { // Process a chunk of the file try (RandomAccessFile fp = new RandomAccessFile(FILE_NAME, "r"); FileChannel fpChannel = fp.getChannel()) { final MappedByteBuffer mappedByteBuffer = fpChannel.map(FileChannel.MapMode.READ_ONLY, start, end - start); final ByteBuffer bb = ByteBuffer.allocate(1024); while (mappedByteBuffer.hasRemaining()) { byte b = mappedByteBuffer.get(); if (b == '\n') { // We have read a line. Convert the tempBuffer to a string and process it bb.flip(); String line = StandardCharsets.UTF_8.decode(bb).toString(); updateMeasurement(line); bb.clear(); } else { bb.put(b); } } // There should be nothing in the byte buffer at this point if (bb.position() > 0) { throw new RuntimeException("byte buffer not empty"); } } catch (IOException e) { throw new RuntimeException(e); } } } // We want to make sure that we don't split the data in the middle of a line. So we find the closest next // newline character and adjust the end to that. public static long adjustEnd(final String fileName, long end) throws IOException { try (RandomAccessFile raf = new RandomAccessFile(fileName, "r")) { raf.seek(end); while (true) { int read = raf.read(); if (read == -1 || read == '\n') { break; } end++; } } return end; } // Read the file in chunks and submit each chunk to a thread for processing public static void readChunked(final String fileName, final long chunkSize) { try (RandomAccessFile fp = new RandomAccessFile(fileName, "r")) { long fileSize = fp.length(); long start = 0; while (start < fileSize) { long end = Math.min(start + chunkSize, fileSize); end = adjustEnd(fileName, end); es.submit(new FileChunkProcessor(fileName, es, start, end)); start = end + 1; } es.shutdown(); // Wait for all the threads to finish processing es.awaitTermination(4, java.util.concurrent.TimeUnit.MINUTES); } catch (IOException | InterruptedException e) { throw new RuntimeException(e); } } public static void main(String[] args) { readChunked(FILE_NAME, CHUNK_SIZE); // Print the results System.out.println(new TreeMap<>(result)); } }