1brc/src/main/java/dev/morling/onebrc/CalculateAverage_gnmathur.java
2024-01-10 21:37:16 +01:00

167 lines
6.0 KiB
Java

/*
* Copyright 2023 The original authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dev.morling.onebrc;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import static java.util.concurrent.Executors.newFixedThreadPool;
/**
* This solution uses an imperative approach. There's essentially two elements to performance tuning attempted in this
* solution:
* 1. Use a thread pool to process the file in chunks, setting it to the number of available processors
* 2. Use a memory mapped file to read the file
*
* On an Intel(R) Core(TM) i9-10920X CPU @ 3.50GHz its taking around 38 seconds to aggregate the measurements.
*/
public class CalculateAverage_gnmathur {
private static final ExecutorService es = newFixedThreadPool(Runtime.getRuntime().availableProcessors());
private static final Map<String, Measurement> result = new ConcurrentHashMap<>();
private static final String FILE_NAME = "measurements.txt";
private static final long CHUNK_SIZE = 1024 * 1024 * 1024; // 1 GB
public static final class Measurement {
private double max = -5000; // impossibly low
private double min = 5000; // impossibly high
private double sum = 0;
private double count = 0;
public Measurement() {
}
public synchronized void addReading(double reading) {
this.min = Math.min(this.min, reading);
this.max = Math.max(this.max, reading);
this.sum += reading;
this.count++;
}
private static double round(double value) {
return Math.round(value * 10.0) / 10.0;
}
@Override
public String toString() {
double mean = sum / count;
return round(min) + "/" + round(mean) + "/" + round(max);
}
}
private static void updateMeasurement(String line) {
String[] parts = line.split(";");
String station = parts[0];
double measurement = Double.parseDouble(parts[1]);
Measurement m = null;
if (result.containsKey(station)) {
m = result.get(station);
}
else {
m = new Measurement();
result.put(station, m);
}
m.addReading(measurement);
}
private record FileChunkProcessor(String fileName, ExecutorService es, long start, long end) implements Runnable {
@Override
public void run() {
// Process a chunk of the file
try (RandomAccessFile fp = new RandomAccessFile(FILE_NAME, "r");
FileChannel fpChannel = fp.getChannel()) {
final MappedByteBuffer mappedByteBuffer = fpChannel.map(FileChannel.MapMode.READ_ONLY, start, end - start);
final ByteBuffer bb = ByteBuffer.allocate(1024);
while (mappedByteBuffer.hasRemaining()) {
byte b = mappedByteBuffer.get();
if (b == '\n') {
// We have read a line. Convert the tempBuffer to a string and process it
bb.flip();
String line = StandardCharsets.UTF_8.decode(bb).toString();
updateMeasurement(line);
bb.clear();
}
else {
bb.put(b);
}
}
// There should be nothing in the byte buffer at this point
if (bb.position() > 0) {
throw new RuntimeException("byte buffer not empty");
}
}
catch (IOException e) {
throw new RuntimeException(e);
}
}
}
// We want to make sure that we don't split the data in the middle of a line. So we find the closest next
// newline character and adjust the end to that.
public static long adjustEnd(final String fileName, long end) throws IOException {
try (RandomAccessFile raf = new RandomAccessFile(fileName, "r")) {
raf.seek(end);
while (true) {
int read = raf.read();
if (read == -1 || read == '\n') {
break;
}
end++;
}
}
return end;
}
// Read the file in chunks and submit each chunk to a thread for processing
public static void readChunked(final String fileName, final long chunkSize) {
try (RandomAccessFile fp = new RandomAccessFile(fileName, "r")) {
long fileSize = fp.length();
long start = 0;
while (start < fileSize) {
long end = Math.min(start + chunkSize, fileSize);
end = adjustEnd(fileName, end);
es.submit(new FileChunkProcessor(fileName, es, start, end));
start = end + 1;
}
es.shutdown();
// Wait for all the threads to finish processing
es.awaitTermination(4, java.util.concurrent.TimeUnit.MINUTES);
}
catch (IOException | InterruptedException e) {
throw new RuntimeException(e);
}
}
public static void main(String[] args) {
readChunked(FILE_NAME, CHUNK_SIZE);
// Print the results
System.out.println(new TreeMap<>(result));
}
}