Improving first iteration by avoiding string creation as much as possible (#516)
- It avoids creating unnecessary Strings objects and handles with the station names with its djb2 hashes instead - Initializes hashmaps with capacity and load factor - Adds -XX:+AlwaysPreTouch
This commit is contained in:
parent
36ffed1315
commit
0a7726cc64
@ -16,6 +16,6 @@
|
|||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
JAVA_OPTS="-XX:+UseStringDeduplication -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC"
|
JAVA_OPTS="-XX:+UseStringDeduplication -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC -XX:+AlwaysPreTouch"
|
||||||
java --enable-preview -classpath target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_adriacabeza
|
java --enable-preview -classpath target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_adriacabeza
|
||||||
|
|
||||||
|
@ -52,3 +52,4 @@ gnmathur;Gaurav Mathur
|
|||||||
vemana;Subrahmanyam
|
vemana;Subrahmanyam
|
||||||
jincongho;Jin Cong Ho
|
jincongho;Jin Cong Ho
|
||||||
yonatang;Yonatan Graber
|
yonatang;Yonatan Graber
|
||||||
|
adriacabeza;Adrià Cabeza
|
||||||
|
@ -23,9 +23,13 @@ import java.nio.file.Path;
|
|||||||
import java.nio.file.Paths;
|
import java.nio.file.Paths;
|
||||||
import java.nio.file.StandardOpenOption;
|
import java.nio.file.StandardOpenOption;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Comparator;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -35,11 +39,22 @@ public class CalculateAverage_adriacabeza {
|
|||||||
|
|
||||||
private static final Path FILE_PATH = Paths.get("./measurements.txt");
|
private static final Path FILE_PATH = Paths.get("./measurements.txt");
|
||||||
public static final int CITY_NAME_MAX_CHARACTERS = 128;
|
public static final int CITY_NAME_MAX_CHARACTERS = 128;
|
||||||
|
private static final int N_PROCESSORS = Runtime.getRuntime().availableProcessors();
|
||||||
|
private static final int DJB2_INIT = 5381;
|
||||||
|
private static final Map<Integer, String> cityMap = new ConcurrentHashMap<>(10_000, 1, N_PROCESSORS);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents result containing a HashMap with city as key and ResultRow as value.
|
* Represents result containing a HashMap with city as key and ResultRow as value.
|
||||||
*/
|
*/
|
||||||
private static class Result {
|
private static class Result {
|
||||||
|
public void addStation(int hash, int value) {
|
||||||
|
resultMap.put(hash, new StationData(value));
|
||||||
|
}
|
||||||
|
|
||||||
|
public StationData getData(int hash) {
|
||||||
|
return resultMap.get(hash);
|
||||||
|
}
|
||||||
|
|
||||||
private static class StationData {
|
private static class StationData {
|
||||||
private int min, sum, count, max;
|
private int min, sum, count, max;
|
||||||
|
|
||||||
@ -63,28 +78,16 @@ public class CalculateAverage_adriacabeza {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private final Map<String, StationData> resultMap;
|
private final Map<Integer, StationData> resultMap;
|
||||||
|
|
||||||
public Result() {
|
public Result() {
|
||||||
this.resultMap = new HashMap<>();
|
this.resultMap = new HashMap<>(10_000, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Map<String, StationData> getResultMap() {
|
public Map<Integer, StationData> getResultMap() {
|
||||||
return resultMap;
|
return resultMap;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addMeasurement(String city, int value) {
|
|
||||||
resultMap.compute(city, (_, resultRow) -> {
|
|
||||||
if (resultRow == null) {
|
|
||||||
return new StationData(value);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
resultRow.update(value);
|
|
||||||
return resultRow;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
public void merge(Result other) {
|
public void merge(Result other) {
|
||||||
other.getResultMap().forEach((city, resultRow) -> resultMap.merge(city, resultRow, (existing, incoming) -> {
|
other.getResultMap().forEach((city, resultRow) -> resultMap.merge(city, resultRow, (existing, incoming) -> {
|
||||||
existing.min = Math.min(existing.min, incoming.min);
|
existing.min = Math.min(existing.min, incoming.min);
|
||||||
@ -96,9 +99,9 @@ public class CalculateAverage_adriacabeza {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return this.resultMap.entrySet().stream()
|
return this.resultMap.entrySet().parallelStream()
|
||||||
.sorted(Map.Entry.comparingByKey())
|
.map(entry -> "%s=%s".formatted(cityMap.get(entry.getKey()), entry.getValue()))
|
||||||
.map(entry -> "%s=%s".formatted(entry.getKey(), entry.getValue()))
|
.sorted(Comparator.comparing(s -> s.split("=")[0]))
|
||||||
.collect(Collectors.joining(", ", "{", "}"));
|
.collect(Collectors.joining(", ", "{", "}"));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -155,6 +158,21 @@ public class CalculateAverage_adriacabeza {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static int readNumberFromBuffer(ByteBuffer buffer, int limit) {
|
||||||
|
var number = 0;
|
||||||
|
var sign = 1;
|
||||||
|
while (buffer.position() < limit) {
|
||||||
|
var numberByte = buffer.get();
|
||||||
|
if (numberByte == '-')
|
||||||
|
sign = -1;
|
||||||
|
else if (numberByte == '\n')
|
||||||
|
break;
|
||||||
|
else if (numberByte != '.')
|
||||||
|
number = number * 10 + (numberByte - '0');
|
||||||
|
}
|
||||||
|
return sign * number;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculates average measurements from the file.
|
* Calculates average measurements from the file.
|
||||||
*
|
*
|
||||||
@ -167,28 +185,31 @@ public class CalculateAverage_adriacabeza {
|
|||||||
Result partialResult = new Result();
|
Result partialResult = new Result();
|
||||||
var limit = buffer.limit();
|
var limit = buffer.limit();
|
||||||
var field = new byte[CITY_NAME_MAX_CHARACTERS];
|
var field = new byte[CITY_NAME_MAX_CHARACTERS];
|
||||||
|
Set<Integer> seenHashes = new HashSet<>(10_000, 1);
|
||||||
while (buffer.position() < limit) {
|
while (buffer.position() < limit) {
|
||||||
var fieldCurrentIndex = 0;
|
var fieldCurrentIndex = 0;
|
||||||
field[fieldCurrentIndex++] = buffer.get();
|
var fieldByte = buffer.get();
|
||||||
|
field[fieldCurrentIndex++] = fieldByte;
|
||||||
|
// implement djb2 hash: https://theartincode.stanis.me/008-djb2/
|
||||||
|
int hash = DJB2_INIT;
|
||||||
while (buffer.position() < limit) {
|
while (buffer.position() < limit) {
|
||||||
var fieldByte = buffer.get();
|
// hash = hash * 33 + fieldByte
|
||||||
|
hash = (((hash << 5) + hash) + fieldByte);
|
||||||
|
fieldByte = buffer.get();
|
||||||
if (fieldByte == ';')
|
if (fieldByte == ';')
|
||||||
break;
|
break;
|
||||||
field[fieldCurrentIndex++] = fieldByte;
|
field[fieldCurrentIndex++] = fieldByte;
|
||||||
}
|
}
|
||||||
var fieldStr = new String(field, 0, fieldCurrentIndex);
|
|
||||||
var number = 0;
|
var number = readNumberFromBuffer(buffer, limit);
|
||||||
var sign = 1;
|
if (!seenHashes.contains(hash)) {
|
||||||
while (buffer.position() < limit) {
|
seenHashes.add(hash);
|
||||||
var numberByte = buffer.get();
|
cityMap.put(hash, new String(field, 0, fieldCurrentIndex));
|
||||||
if (numberByte == '-')
|
partialResult.addStation(hash, number);
|
||||||
sign = -1;
|
}
|
||||||
else if (numberByte == '\n')
|
else {
|
||||||
break;
|
partialResult.getData(hash).update(number);
|
||||||
else if (numberByte != '.')
|
|
||||||
number = number * 10 + (numberByte - '0');
|
|
||||||
}
|
}
|
||||||
partialResult.addMeasurement(fieldStr, sign * number);
|
|
||||||
}
|
}
|
||||||
return partialResult;
|
return partialResult;
|
||||||
}).reduce(new Result(), (partialResult1, partialResult2) -> {
|
}).reduce(new Result(), (partialResult1, partialResult2) -> {
|
||||||
|
Loading…
Reference in New Issue
Block a user