Mahadev virtual thread 1brc (#611)

* Read file with multiple virtual threads and process chunks of file data in parallel.

* Updated logic to bucket every chunk of aggs into a vector and merge them into a TreeMap for printing.

* Virtual Thread / File Channels Impl.

* Renamed files with GHUsername.

* Added statement to get vals before updating.

* Added executable permission to the files.
This commit is contained in:
Mahadev K 2024-01-28 22:56:44 +05:30 committed by GitHub
parent f5bddafaf7
commit f598d74594
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 191 additions and 0 deletions

19
calculate_average_mahadev-k.sh Executable file
View File

@ -0,0 +1,19 @@
#!/bin/sh
#
# Copyright 2023 The original authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
JAVA_OPTS=""
java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_mahadev_k

20
prepare_mahadev-k.sh Executable file
View File

@ -0,0 +1,20 @@
#!/bin/bash
#
# Copyright 2023 The original authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Uncomment below to use sdk
# source "$HOME/.sdkman/bin/sdkman-init.sh"
# sdk use java 21.0.1-graal 1>&2

View File

@ -0,0 +1,152 @@
/*
* Copyright 2023 The original authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dev.morling.onebrc;
import java.io.FileDescriptor;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.io.RandomAccessFile;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadFactory;
public class CalculateAverage_mahadev_k {
private static final String FILE = "./measurements.txt";
private static Map<String, MeasurementAggregator> stationMap = new ConcurrentSkipListMap<>();
private static double round(double value) {
return Math.round(value * 10.0) / 10.0;
}
private static class MeasurementAggregator {
double minima = Double.POSITIVE_INFINITY, maxima = Double.NEGATIVE_INFINITY, total = 0, count = 0;
public synchronized void accept(double value) {
if (minima > value)
minima = value;
if (maxima < value)
maxima = value;
total += value;
count++;
}
public double min() {
return round(minima);
}
public double max() {
return round(maxima);
}
public double avg() {
return round((Math.round(total * 10.0) / 10.0) / count);
}
}
public static void main(String[] args) throws IOException {
int chunkSize = args.length == 1 ? Integer.parseInt(args[0]) : 1_000_000;
readAndProcess(chunkSize);
print();
}
public static void readAndProcess(int chunkSize) {
final ThreadFactory factory = Thread.ofVirtual().name("routine-", 0).factory();
try (RandomAccessFile file = new RandomAccessFile(FILE, "r")) {
try (var executor = Executors.newThreadPerTaskExecutor(factory)) {
var channel = file.getChannel();
var size = channel.size();
long start = 0;
while (start <= size) {
long end = start + chunkSize;
String letter = "";
do {
end--;
ByteBuffer buffer = ByteBuffer.allocate(1);
channel.read(buffer, end);
buffer.flip();
letter = StandardCharsets.UTF_8.decode(buffer).toString();
} while (!letter.equals("\n"));
if (end < start)
end = start + chunkSize;
final long currentStart = start;
final long currentEnd = end;
executor.submit(() -> {
ByteBuffer buffer = ByteBuffer.allocate((int) (currentEnd - currentStart + 1));
try {
channel.read(buffer, currentStart);
}
catch (IOException e) {
e.printStackTrace();
}
buffer.flip();
String data = StandardCharsets.UTF_8.decode(buffer).toString();
processData(data);
});
start = end + 1;
}
}
}
catch (IOException e) {
e.printStackTrace();
}
}
public static void processData(String dataBlock) {
StringTokenizer tokenizer = new StringTokenizer(dataBlock, "\n");
while (tokenizer.hasMoreElements()) {
StringTokenizer tokens = new StringTokenizer(tokenizer.nextToken(), ";");
String station = tokens.nextToken();
double value = Double.parseDouble(tokens.nextToken());
processMinMaxMean(station, value);
}
}
private static void processMinMaxMean(String station, double temp) {
var values = stationMap.get(station);
if (values == null) {
values = new MeasurementAggregator();
stationMap.putIfAbsent(station, values);
}
values = stationMap.get(station);
values.accept(temp);
}
public static void print() throws UnsupportedEncodingException {
System.setOut(new PrintStream(new FileOutputStream(FileDescriptor.out), true, StandardCharsets.UTF_8));
System.out.print("{");
int i = stationMap.size();
for (var kv : stationMap.entrySet()) {
System.out.printf("%s=%s/%s/%s", kv.getKey(), kv.getValue().min(), kv.getValue().avg(), kv.getValue().max());
if (i > 1)
System.out.print(", ");
i--;
}
System.out.println("}");
}
}