Improving first iteration by avoiding string creation as much as possible (#516)
- It avoids creating unnecessary Strings objects and handles with the station names with its djb2 hashes instead - Initializes hashmaps with capacity and load factor - Adds -XX:+AlwaysPreTouch
This commit is contained in:
		| @@ -16,6 +16,6 @@ | ||||
| # | ||||
|  | ||||
|  | ||||
| JAVA_OPTS="-XX:+UseStringDeduplication -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC" | ||||
| JAVA_OPTS="-XX:+UseStringDeduplication -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC -XX:+AlwaysPreTouch" | ||||
| java --enable-preview  -classpath target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_adriacabeza | ||||
|  | ||||
|   | ||||
| @@ -52,3 +52,4 @@ gnmathur;Gaurav Mathur | ||||
| vemana;Subrahmanyam | ||||
| jincongho;Jin Cong Ho | ||||
| yonatang;Yonatan Graber | ||||
| adriacabeza;Adrià Cabeza | ||||
|   | ||||
| @@ -23,9 +23,13 @@ import java.nio.file.Path; | ||||
| import java.nio.file.Paths; | ||||
| import java.nio.file.StandardOpenOption; | ||||
| import java.util.ArrayList; | ||||
| import java.util.Comparator; | ||||
| import java.util.HashMap; | ||||
| import java.util.HashSet; | ||||
| import java.util.List; | ||||
| import java.util.Map; | ||||
| import java.util.Set; | ||||
| import java.util.concurrent.ConcurrentHashMap; | ||||
| import java.util.stream.Collectors; | ||||
|  | ||||
| /** | ||||
| @@ -35,11 +39,22 @@ public class CalculateAverage_adriacabeza { | ||||
|  | ||||
|     private static final Path FILE_PATH = Paths.get("./measurements.txt"); | ||||
|     public static final int CITY_NAME_MAX_CHARACTERS = 128; | ||||
|     private static final int N_PROCESSORS = Runtime.getRuntime().availableProcessors(); | ||||
|     private static final int DJB2_INIT = 5381; | ||||
|     private static final Map<Integer, String> cityMap = new ConcurrentHashMap<>(10_000, 1, N_PROCESSORS); | ||||
|  | ||||
|     /** | ||||
|      * Represents result containing a HashMap with city as key and ResultRow as value. | ||||
|      */ | ||||
|     private static class Result { | ||||
|         public void addStation(int hash, int value) { | ||||
|             resultMap.put(hash, new StationData(value)); | ||||
|         } | ||||
|  | ||||
|         public StationData getData(int hash) { | ||||
|             return resultMap.get(hash); | ||||
|         } | ||||
|  | ||||
|         private static class StationData { | ||||
|             private int min, sum, count, max; | ||||
|  | ||||
| @@ -63,28 +78,16 @@ public class CalculateAverage_adriacabeza { | ||||
|  | ||||
|         } | ||||
|  | ||||
|         private final Map<String, StationData> resultMap; | ||||
|         private final Map<Integer, StationData> resultMap; | ||||
|  | ||||
|         public Result() { | ||||
|             this.resultMap = new HashMap<>(); | ||||
|             this.resultMap = new HashMap<>(10_000, 1); | ||||
|         } | ||||
|  | ||||
|         public Map<String, StationData> getResultMap() { | ||||
|         public Map<Integer, StationData> getResultMap() { | ||||
|             return resultMap; | ||||
|         } | ||||
|  | ||||
|         public void addMeasurement(String city, int value) { | ||||
|             resultMap.compute(city, (_, resultRow) -> { | ||||
|                 if (resultRow == null) { | ||||
|                     return new StationData(value); | ||||
|                 } | ||||
|                 else { | ||||
|                     resultRow.update(value); | ||||
|                     return resultRow; | ||||
|                 } | ||||
|             }); | ||||
|         } | ||||
|  | ||||
|         public void merge(Result other) { | ||||
|             other.getResultMap().forEach((city, resultRow) -> resultMap.merge(city, resultRow, (existing, incoming) -> { | ||||
|                 existing.min = Math.min(existing.min, incoming.min); | ||||
| @@ -96,9 +99,9 @@ public class CalculateAverage_adriacabeza { | ||||
|         } | ||||
|  | ||||
|         public String toString() { | ||||
|             return this.resultMap.entrySet().stream() | ||||
|                     .sorted(Map.Entry.comparingByKey()) | ||||
|                     .map(entry -> "%s=%s".formatted(entry.getKey(), entry.getValue())) | ||||
|             return this.resultMap.entrySet().parallelStream() | ||||
|                     .map(entry -> "%s=%s".formatted(cityMap.get(entry.getKey()), entry.getValue())) | ||||
|                     .sorted(Comparator.comparing(s -> s.split("=")[0])) | ||||
|                     .collect(Collectors.joining(", ", "{", "}")); | ||||
|         } | ||||
|     } | ||||
| @@ -155,6 +158,21 @@ public class CalculateAverage_adriacabeza { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     private static int readNumberFromBuffer(ByteBuffer buffer, int limit) { | ||||
|         var number = 0; | ||||
|         var sign = 1; | ||||
|         while (buffer.position() < limit) { | ||||
|             var numberByte = buffer.get(); | ||||
|             if (numberByte == '-') | ||||
|                 sign = -1; | ||||
|             else if (numberByte == '\n') | ||||
|                 break; | ||||
|             else if (numberByte != '.') | ||||
|                 number = number * 10 + (numberByte - '0'); | ||||
|         } | ||||
|         return sign * number; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Calculates average measurements from the file. | ||||
|      * | ||||
| @@ -167,28 +185,31 @@ public class CalculateAverage_adriacabeza { | ||||
|                     Result partialResult = new Result(); | ||||
|                     var limit = buffer.limit(); | ||||
|                     var field = new byte[CITY_NAME_MAX_CHARACTERS]; | ||||
|                     Set<Integer> seenHashes = new HashSet<>(10_000, 1); | ||||
|                     while (buffer.position() < limit) { | ||||
|                         var fieldCurrentIndex = 0; | ||||
|                         field[fieldCurrentIndex++] = buffer.get(); | ||||
|                         while (buffer.position() < limit) { | ||||
|                         var fieldByte = buffer.get(); | ||||
|                         field[fieldCurrentIndex++] = fieldByte; | ||||
|                         // implement djb2 hash: https://theartincode.stanis.me/008-djb2/ | ||||
|                         int hash = DJB2_INIT; | ||||
|                         while (buffer.position() < limit) { | ||||
|                             // hash = hash * 33 + fieldByte | ||||
|                             hash = (((hash << 5) + hash) + fieldByte); | ||||
|                             fieldByte = buffer.get(); | ||||
|                             if (fieldByte == ';') | ||||
|                                 break; | ||||
|                             field[fieldCurrentIndex++] = fieldByte; | ||||
|                         } | ||||
|                         var fieldStr = new String(field, 0, fieldCurrentIndex); | ||||
|                         var number = 0; | ||||
|                         var sign = 1; | ||||
|                         while (buffer.position() < limit) { | ||||
|                             var numberByte = buffer.get(); | ||||
|                             if (numberByte == '-') | ||||
|                                 sign = -1; | ||||
|                             else if (numberByte == '\n') | ||||
|                                 break; | ||||
|                             else if (numberByte != '.') | ||||
|                                 number = number * 10 + (numberByte - '0'); | ||||
|  | ||||
|                         var number = readNumberFromBuffer(buffer, limit); | ||||
|                         if (!seenHashes.contains(hash)) { | ||||
|                             seenHashes.add(hash); | ||||
|                             cityMap.put(hash, new String(field, 0, fieldCurrentIndex)); | ||||
|                             partialResult.addStation(hash, number); | ||||
|                         } | ||||
|                         else { | ||||
|                             partialResult.getData(hash).update(number); | ||||
|                         } | ||||
|                         partialResult.addMeasurement(fieldStr, sign * number); | ||||
|                     } | ||||
|                     return partialResult; | ||||
|                 }).reduce(new Result(), (partialResult1, partialResult2) -> { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user