diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_C5H12O5.java b/src/main/java/dev/morling/onebrc/CalculateAverage_C5H12O5.java
index a7baf9b..4c0351a 100644
--- a/src/main/java/dev/morling/onebrc/CalculateAverage_C5H12O5.java
+++ b/src/main/java/dev/morling/onebrc/CalculateAverage_C5H12O5.java
@@ -15,136 +15,386 @@
*/
package dev.morling.onebrc;
+import sun.misc.Unsafe;
+
import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.lang.reflect.Field;
import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
import java.nio.channels.AsynchronousFileChannel;
import java.nio.channels.CompletionHandler;
import java.nio.charset.StandardCharsets;
-import java.nio.file.Paths;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
-import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.FutureTask;
-import java.util.concurrent.LinkedBlockingQueue;
+import java.util.concurrent.LinkedTransferQueue;
+import java.util.concurrent.TransferQueue;
/**
- * Calculates the average using AIO and multiple threads.
+ * Results on Mac mini (Apple M2 with 8-core CPU / 8GB unified memory):
+ *
+ * using AIO and multiple threads:
+ * 120.15s user 4.33s system 710% cpu 17.522 total
+ *
+ * reduce the number of memory copies:
+ * 45.87s user 2.82s system 530% cpu 9.185 total
+ *
+ * processing byte array backwards and using bitwise operation to find specific byte (inspired by thomaswue):
+ * 25.38s user 3.44s system 342% cpu 8.406 total
+ *
*
* @author Xylitol
*/
+@SuppressWarnings("unchecked")
public class CalculateAverage_C5H12O5 {
- private static final int BUFFER_CAPACITY = 1024 * 1024 * 10;
- private static final int MAP_CAPACITY = 10000;
- private static final int PROCESSORS = Runtime.getRuntime().availableProcessors();
- private static final BlockingQueue BYTES_QUEUE = new LinkedBlockingQueue<>(PROCESSORS);
- private static long readPosition;
+ private static final int AVAILABLE_PROCESSOR_NUM = Runtime.getRuntime().availableProcessors();
+ private static final int TRANSFER_QUEUE_CAPACITY = 1024 / 16 / AVAILABLE_PROCESSOR_NUM; // 1GB memory max
+ private static final int BYTE_BUFFER_CAPACITY = 1024 * 1024 * 16; // 16MB one time
+ private static final int EXPECTED_MAPPINGS_NUM = 10000;
+
+ /**
+ * Fragment the file into chunks.
+ */
+ private static long[] fragment(Path path) throws IOException {
+ long size = Files.size(path);
+ long chunk = size / AVAILABLE_PROCESSOR_NUM;
+ List positions = new ArrayList<>();
+ try (RandomAccessFile file = new RandomAccessFile(path.toFile(), "r")) {
+ long position = chunk;
+ for (int i = 0; i < AVAILABLE_PROCESSOR_NUM - 1; i++) {
+ if (position >= size) {
+ break;
+ }
+ file.seek(position);
+ // move the position to the next newline byte
+ while (file.read() != '\n') {
+ position++;
+ }
+ positions.add(++position);
+ position += chunk;
+ }
+ }
+ if (positions.isEmpty() || positions.getLast() < size) {
+ positions.add(size);
+ }
+ return positions.stream().mapToLong(Long::longValue).toArray();
+ }
public static void main(String[] args) throws Exception {
- System.out.println(calc("./measurements.txt"));
+ // fragment the input file
+ Path path = Path.of("./measurements.txt");
+ long[] positions = fragment(path);
+
+ // start the calculation tasks
+ FutureTask