* Use graal * Use dynamic cores computer * Use stream API to cleanup code * Use max processors * Use hash to avoid init string * optimize concurrentmap init * Smaller hash size * Avoid checking concurrentmap * Optimize data type * string dedup * Faster write * Change base * Remove time * Use mul instead of div
This commit is contained in:
parent
45056e073b
commit
dc49249d36
@ -19,5 +19,5 @@
|
|||||||
# source "$HOME/.sdkman/bin/sdkman-init.sh"
|
# source "$HOME/.sdkman/bin/sdkman-init.sh"
|
||||||
# sdk use java 21.0.1-graal 1>&2
|
# sdk use java 21.0.1-graal 1>&2
|
||||||
|
|
||||||
JAVA_OPTS=""
|
JAVA_OPTS="-XX:+UseStringDeduplication"
|
||||||
time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_gnabyl
|
java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_gnabyl
|
||||||
|
20
prepare_gnabyl.sh
Executable file
20
prepare_gnabyl.sh
Executable file
@ -0,0 +1,20 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Copyright 2023 The original authors
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
|
||||||
|
# Uncomment below to use sdk
|
||||||
|
source "$HOME/.sdkman/bin/sdkman-init.sh"
|
||||||
|
sdk use java 21.0.1-graal 1>&2
|
@ -16,25 +16,30 @@
|
|||||||
package dev.morling.onebrc;
|
package dev.morling.onebrc;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.PrintWriter;
|
||||||
import java.io.RandomAccessFile;
|
import java.io.RandomAccessFile;
|
||||||
import java.nio.MappedByteBuffer;
|
import java.nio.MappedByteBuffer;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.CompletableFuture;
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
public class CalculateAverage_gnabyl {
|
public class CalculateAverage_gnabyl {
|
||||||
|
|
||||||
private static final String FILE = "./measurements.txt";
|
private static final String FILE = "./measurements.txt";
|
||||||
|
|
||||||
private static final int NB_CHUNKS = 8;
|
private static final int NB_CHUNKS = Runtime.getRuntime().availableProcessors();
|
||||||
|
|
||||||
private static record Chunk(long start, int bytesCount, MappedByteBuffer mappedByteBuffer) {
|
private static Map<Integer, String> stationNameMap = new ConcurrentHashMap<>(10000, 0.9f, NB_CHUNKS);
|
||||||
|
|
||||||
|
private static record Chunk(int bytesCount, MappedByteBuffer mappedByteBuffer) {
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int reduceSizeToFitLineBreak(FileChannel channel, long startPosition, int startSize)
|
private static int reduceSizeToFitLineBreak(FileChannel channel, long startPosition, int startSize)
|
||||||
@ -61,9 +66,9 @@ public class CalculateAverage_gnabyl {
|
|||||||
return realSize;
|
return realSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static List<Chunk> readChunks(long nbChunks) throws IOException {
|
private static List<Chunk> readChunks(int nbChunks) throws IOException {
|
||||||
RandomAccessFile file = new RandomAccessFile(FILE, "rw");
|
RandomAccessFile file = new RandomAccessFile(FILE, "rw");
|
||||||
List<Chunk> res = new ArrayList<>();
|
List<Chunk> res = new ArrayList<>(nbChunks);
|
||||||
FileChannel channel = file.getChannel();
|
FileChannel channel = file.getChannel();
|
||||||
long bytesCount = channel.size();
|
long bytesCount = channel.size();
|
||||||
long bytesPerChunk = bytesCount / nbChunks;
|
long bytesPerChunk = bytesCount / nbChunks;
|
||||||
@ -71,16 +76,18 @@ public class CalculateAverage_gnabyl {
|
|||||||
// Memory map the file in read-only mode
|
// Memory map the file in read-only mode
|
||||||
// TODO: Optimize using threads
|
// TODO: Optimize using threads
|
||||||
long currentPosition = 0;
|
long currentPosition = 0;
|
||||||
|
int startSize;
|
||||||
|
int realSize;
|
||||||
for (int i = 0; i < nbChunks; i++) {
|
for (int i = 0; i < nbChunks; i++) {
|
||||||
int startSize = (int) bytesPerChunk;
|
startSize = (int) bytesPerChunk;
|
||||||
int realSize = startSize;
|
realSize = startSize;
|
||||||
|
|
||||||
if (i == nbChunks - 1) {
|
if (i == nbChunks - 1) {
|
||||||
realSize = (int) (bytesCount - currentPosition);
|
realSize = (int) (bytesCount - currentPosition);
|
||||||
MappedByteBuffer mappedByteBuffer = channel.map(FileChannel.MapMode.READ_ONLY, currentPosition,
|
MappedByteBuffer mappedByteBuffer = channel.map(FileChannel.MapMode.READ_ONLY, currentPosition,
|
||||||
realSize);
|
realSize);
|
||||||
|
|
||||||
res.add(new Chunk(currentPosition, realSize, mappedByteBuffer));
|
res.add(new Chunk(realSize, mappedByteBuffer));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,7 +97,7 @@ public class CalculateAverage_gnabyl {
|
|||||||
MappedByteBuffer mappedByteBuffer = channel.map(FileChannel.MapMode.READ_ONLY, currentPosition,
|
MappedByteBuffer mappedByteBuffer = channel.map(FileChannel.MapMode.READ_ONLY, currentPosition,
|
||||||
realSize);
|
realSize);
|
||||||
|
|
||||||
res.add(new Chunk(currentPosition, realSize, mappedByteBuffer));
|
res.add(new Chunk(realSize, mappedByteBuffer));
|
||||||
currentPosition += realSize;
|
currentPosition += realSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -101,32 +108,32 @@ public class CalculateAverage_gnabyl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static class StationData {
|
private static class StationData {
|
||||||
private double sum, min, max;
|
private float sum, min, max;
|
||||||
private long count;
|
private int count;
|
||||||
|
|
||||||
public StationData(double value) {
|
public StationData(float value) {
|
||||||
this.count = 1;
|
this.count = 1;
|
||||||
this.sum = value;
|
this.sum = value;
|
||||||
this.min = value;
|
this.min = value;
|
||||||
this.max = value;
|
this.max = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void update(double value) {
|
public void update(float value) {
|
||||||
this.count++;
|
this.count++;
|
||||||
this.sum += value;
|
this.sum += value;
|
||||||
this.min = Math.min(this.min, value);
|
this.min = Math.min(this.min, value);
|
||||||
this.max = Math.max(this.max, value);
|
this.max = Math.max(this.max, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
public double getMean() {
|
public float getMean() {
|
||||||
return sum / count;
|
return sum / count;
|
||||||
}
|
}
|
||||||
|
|
||||||
public double getMin() {
|
public float getMin() {
|
||||||
return min;
|
return min;
|
||||||
}
|
}
|
||||||
|
|
||||||
public double getMax() {
|
public float getMax() {
|
||||||
return max;
|
return max;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -139,47 +146,44 @@ public class CalculateAverage_gnabyl {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static double round(double value) {
|
static float round(float value) {
|
||||||
return Math.round(value * 10.0) / 10.0;
|
return Math.round(value * 10.0f) * 0.1f;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class ChunkResult {
|
private static class ChunkResult {
|
||||||
private Map<String, StationData> data;
|
private Map<Integer, StationData> data;
|
||||||
|
|
||||||
public ChunkResult() {
|
public ChunkResult() {
|
||||||
data = new HashMap<>();
|
data = new HashMap<>();
|
||||||
}
|
}
|
||||||
|
|
||||||
public StationData getData(String name) {
|
public StationData getData(int hash) {
|
||||||
return data.get(name);
|
return data.get(hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addStation(String name, double value) {
|
public void addStation(int hash, float value) {
|
||||||
this.data.put(name, new StationData(value));
|
this.data.put(hash, new StationData(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void print() {
|
public void print() {
|
||||||
var stationNames = new ArrayList<String>(this.data.keySet());
|
PrintWriter out = new PrintWriter(System.out);
|
||||||
Collections.sort(stationNames);
|
out.println(
|
||||||
System.out.print("{");
|
this.data.keySet().parallelStream()
|
||||||
for (int i = 0; i < stationNames.size() - 1; i++) {
|
.map(hash -> {
|
||||||
var name = stationNames.get(i);
|
var stationData = data.get(hash);
|
||||||
var stationData = data.get(name);
|
var name = stationNameMap.get(hash);
|
||||||
System.out.printf("%s=%.1f/%.1f/%.1f, ", name, round(stationData.getMin()),
|
return String.format("%s=%.1f/%.1f/%.1f", name, round(stationData.getMin()),
|
||||||
round(stationData.getMean()),
|
round(stationData.getMean()),
|
||||||
round(stationData.getMax()));
|
round(stationData.getMax()));
|
||||||
}
|
})
|
||||||
var name = stationNames.get(stationNames.size() - 1);
|
.sorted((a, b) -> a.split("=")[0].compareTo(b.split("=")[0]))
|
||||||
var stationData = data.get(name);
|
.collect(Collectors.joining(", ", "{", "}")));
|
||||||
System.out.printf("%s=%.1f/%.1f/%.1f", name, round(stationData.getMin()),
|
out.flush();
|
||||||
round(stationData.getMean()),
|
|
||||||
round(stationData.getMax()));
|
|
||||||
System.out.println("}");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void mergeWith(ChunkResult other) {
|
public void mergeWith(ChunkResult other) {
|
||||||
for (Map.Entry<String, StationData> entry : other.data.entrySet()) {
|
for (Map.Entry<Integer, StationData> entry : other.data.entrySet()) {
|
||||||
String stationName = entry.getKey();
|
int stationName = entry.getKey();
|
||||||
StationData otherStationData = entry.getValue();
|
StationData otherStationData = entry.getValue();
|
||||||
StationData thisStationData = this.data.get(stationName);
|
StationData thisStationData = this.data.get(stationName);
|
||||||
|
|
||||||
@ -201,16 +205,22 @@ public class CalculateAverage_gnabyl {
|
|||||||
chunk.mappedByteBuffer().get(data);
|
chunk.mappedByteBuffer().get(data);
|
||||||
|
|
||||||
// Process each line
|
// Process each line
|
||||||
String stationName;
|
float value;
|
||||||
double value;
|
|
||||||
int iSplit, iEol;
|
int iSplit, iEol;
|
||||||
StationData stationData;
|
StationData stationData;
|
||||||
long negative;
|
int negative;
|
||||||
|
int hash, prime = 31;
|
||||||
|
Set<Integer> seenHashes = new HashSet<>(10000, 0.9f);
|
||||||
for (int offset = 0; offset < data.length; offset++) {
|
for (int offset = 0; offset < data.length; offset++) {
|
||||||
// Find station name
|
// Find station name
|
||||||
|
hash = 0;
|
||||||
for (iSplit = offset; data[iSplit] != ';'; iSplit++) {
|
for (iSplit = offset; data[iSplit] != ';'; iSplit++) {
|
||||||
|
hash = (hash << 5) - hash + (data[iSplit] & 0xFF);
|
||||||
|
}
|
||||||
|
if (!seenHashes.contains(hash)) {
|
||||||
|
seenHashes.add(hash);
|
||||||
|
stationNameMap.put(hash, new String(data, offset, iSplit - offset, StandardCharsets.UTF_8));
|
||||||
}
|
}
|
||||||
stationName = new String(data, offset, iSplit - offset, StandardCharsets.UTF_8);
|
|
||||||
|
|
||||||
// Find value
|
// Find value
|
||||||
iSplit++;
|
iSplit++;
|
||||||
@ -222,7 +232,7 @@ public class CalculateAverage_gnabyl {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (data[iEol] == '.') {
|
if (data[iEol] == '.') {
|
||||||
value = value + (data[iEol + 1] - 48) / 10.0;
|
value = value + (data[iEol + 1] - 48) * 0.1f;
|
||||||
iEol += 2;
|
iEol += 2;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -231,10 +241,10 @@ public class CalculateAverage_gnabyl {
|
|||||||
value *= negative;
|
value *= negative;
|
||||||
|
|
||||||
// Init & count
|
// Init & count
|
||||||
stationData = result.getData(stationName);
|
stationData = result.getData(hash);
|
||||||
|
|
||||||
if (stationData == null) {
|
if (stationData == null) {
|
||||||
result.addStation(stationName, value);
|
result.addStation(hash, value);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
stationData.update(value);
|
stationData.update(value);
|
||||||
@ -247,32 +257,8 @@ public class CalculateAverage_gnabyl {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static ChunkResult processAllChunks(List<Chunk> chunks) throws InterruptedException, ExecutionException {
|
private static ChunkResult processAllChunks(List<Chunk> chunks) throws InterruptedException, ExecutionException {
|
||||||
// var globalRes = new ChunkResult();
|
return chunks.parallelStream().map(CalculateAverage_gnabyl::processChunk).collect(ChunkResult::new,
|
||||||
// for (var chunk : chunks) {
|
ChunkResult::mergeWith, ChunkResult::mergeWith);
|
||||||
// var chunkRes = processChunk(chunk);
|
|
||||||
// globalRes.mergeWith(chunkRes);
|
|
||||||
// }
|
|
||||||
// return globalRes;
|
|
||||||
|
|
||||||
List<CompletableFuture<ChunkResult>> computeTasks = new ArrayList<>();
|
|
||||||
|
|
||||||
for (Chunk chunk : chunks) {
|
|
||||||
computeTasks.add(CompletableFuture.supplyAsync(() -> processChunk(chunk)));
|
|
||||||
}
|
|
||||||
|
|
||||||
ChunkResult globalRes = null;
|
|
||||||
|
|
||||||
for (CompletableFuture<ChunkResult> completedTask : computeTasks) {
|
|
||||||
ChunkResult chunkRes = completedTask.get();
|
|
||||||
if (globalRes == null) {
|
|
||||||
globalRes = completedTask.get();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
globalRes.mergeWith(chunkRes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return globalRes;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void main(String[] args) throws IOException, InterruptedException, ExecutionException {
|
public static void main(String[] args) throws IOException, InterruptedException, ExecutionException {
|
||||||
|
Loading…
Reference in New Issue
Block a user