diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_filiphr.java b/src/main/java/dev/morling/onebrc/CalculateAverage_filiphr.java index 966da7c..5e1b705 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_filiphr.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_filiphr.java @@ -40,6 +40,7 @@ import java.util.stream.StreamSupport; * Adding memory mapped files: 0m 55s (based on bjhara's submission) * Using big decimal and iterating the buffer once: 0m 20s * Using long parse: 0m 11s + * Using array hash code for city key: 0m 7.1s *

* Using 21.0.1 Temurin with ShenandoahGC on Macbook (Intel) Pro * `sdk use java 21.0.1-tem` @@ -59,11 +60,16 @@ public class CalculateAverage_filiphr { private static final class Measurement { + private final String city; private long min = Long.MAX_VALUE; private long max = Long.MIN_VALUE; private long sum = 0L; private long count = 0L; + private Measurement(String city) { + this.city = city; + } + private void add(long value) { this.min = Math.min(this.min, value); this.max = Math.max(this.max, value); @@ -72,7 +78,7 @@ public class CalculateAverage_filiphr { } public static Measurement combine(Measurement m1, Measurement m2) { - Measurement measurement = new Measurement(); + Measurement measurement = new Measurement(m1.city); measurement.min = Math.min(m1.min, m2.min); measurement.max = Math.max(m1.max, m2.max); measurement.sum = m1.sum + m2.sum; @@ -93,7 +99,7 @@ public class CalculateAverage_filiphr { public static void main(String[] args) throws IOException { // long start = System.nanoTime(); - Map measurements; + Map measurements; try (FileChannel fileChannel = FileChannel.open(Paths.get(FILE), StandardOpenOption.READ)) { measurements = fineChannelStream(fileChannel) .parallel() @@ -101,20 +107,25 @@ public class CalculateAverage_filiphr { .reduce(Collections.emptyMap(), CalculateAverage_filiphr::mergeMaps); } - System.out.println(new TreeMap<>(measurements)); + Map finalMeasurements = new TreeMap<>(); + for (Measurement measurement : measurements.values()) { + finalMeasurements.put(measurement.city, measurement); + } + + System.out.println(finalMeasurements); // System.out.println("Done in " + (System.nanoTime() - start) / 1000000 + " ms"); } - private static Map mergeMaps(Map map1, Map map2) { + private static Map mergeMaps(Map map1, Map map2) { if (map1.isEmpty()) { return map2; } else { - Set cities = new HashSet<>(map1.keySet()); + Set cities = new HashSet<>(map1.keySet()); cities.addAll(map2.keySet()); - Map result = HashMap.newHashMap(cities.size()); + Map result = HashMap.newHashMap(cities.size()); - for (String city : cities) { + for (Integer city : cities) { Measurement m1 = map1.get(city); Measurement m2 = map2.get(city); if (m2 == null) { @@ -137,34 +148,36 @@ public class CalculateAverage_filiphr { } /** - * This is an adapted implementation of the bjhara parseBuffer + * This is an adapted implementation of the bjhara parseBuffer. + * We are using {@code Map} because creating the string key on every single line is obsolete. + * Instead, we create a hash key from the string, and we use that as a key in the map. */ - private static Map parseBuffer(ByteBuffer bb) { - Map measurements = HashMap.newHashMap(415); + private static Map parseBuffer(ByteBuffer bb) { + Map measurements = HashMap.newHashMap(415); int limit = bb.limit(); - byte[] buffer = new byte[128]; + byte[] cityBuffer = new byte[128]; char[] charArray = new char[8]; CharBuffer charBuffer = CharBuffer.wrap(charArray); charBuffer.clear(); charBuffer.position(0); while (bb.position() < limit) { - int bufferIndex = 0; + int cityBufferIndex = 0; // Iterate through the byte buffer and fill the buffer until we find the separator (;) + // While iterating we are also going to compute the city hash key + int cityKey = 1; while (bb.position() < limit) { byte positionByte = bb.get(); if (positionByte == ';') { break; } - buffer[bufferIndex++] = positionByte; + cityBuffer[cityBufferIndex++] = positionByte; + cityKey = 31 * cityKey + positionByte; } - // Create the city - String city = new String(buffer, 0, bufferIndex); - byte lastPositionByte = '\n'; - bufferIndex = 0; + int temperatureBufferIndex = 0; while (bb.position() < limit) { byte positionByte = bb.get(); if (positionByte == '\r' || positionByte == '\n') { @@ -172,15 +185,20 @@ public class CalculateAverage_filiphr { break; } else if (positionByte != '.') { - charArray[bufferIndex++] = (char) positionByte; + charArray[temperatureBufferIndex++] = (char) positionByte; } } - // Create the temperature string - long value = Long.parseLong(charBuffer, 0, bufferIndex, 10); + // Create the temperature + long value = Long.parseLong(charBuffer, 0, temperatureBufferIndex, 10); - measurements.computeIfAbsent(city, k -> new Measurement()) - .add(value); + Measurement measurement = measurements.get(cityKey); + if (measurement == null) { + String city = new String(cityBuffer, 0, cityBufferIndex); + measurement = new Measurement(city); + measurements.put(cityKey, measurement); + } + measurement.add(value); // and get rid of the new line (handle both kinds) if (lastPositionByte == '\r') {