CalculateAverage_AbstractKamen
* initial commit * first attempt: segment the file and process it in parallel * remove commented stuff * custom parseDouble for this simple case * fixed some issues and improved parsing * format * Update calculate_average_AbstractKamen.sh --------- Co-authored-by: Gunnar Morling <gunnar.morling@googlemail.com>
This commit is contained in:
parent
209e005461
commit
7483b90cec
23
calculate_average_AbstractKamen.sh
Normal file
23
calculate_average_AbstractKamen.sh
Normal file
@ -0,0 +1,23 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Uncomment below to use sdk
|
||||
# source "$HOME/.sdkman/bin/sdkman-init.sh"
|
||||
# sdk use java 21.0.1-graal 1>&2
|
||||
|
||||
JAVA_OPTS=""
|
||||
java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_AbstractKamen
|
@ -0,0 +1,220 @@
|
||||
/*
|
||||
* Copyright 2023 The original authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package dev.morling.onebrc;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.channels.FileChannel.MapMode;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.*;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
public class CalculateAverage_AbstractKamen {
|
||||
|
||||
private static final String FILE = "./measurements.txt";
|
||||
|
||||
private static class Measurement {
|
||||
private int min = Integer.MAX_VALUE;
|
||||
private int max = Integer.MIN_VALUE;
|
||||
private int sum;
|
||||
private long count;
|
||||
|
||||
public String toString() {
|
||||
return round(min / 10.0) + "/" + round(sum / 10.0 / count) + "/" + round(max / 10.0);
|
||||
}
|
||||
|
||||
private double round(double value) {
|
||||
return Math.round(value * 10.0) / 10.0;
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
try (final FileChannel fc = FileChannel.open(Paths.get(FILE), StandardOpenOption.READ);
|
||||
final RandomAccessFile raf = new RandomAccessFile(new File(FILE), "r")) {
|
||||
final Map<String, Measurement> res = getParallelBufferStream(raf, fc)
|
||||
.map(CalculateAverage_AbstractKamen::getMeasurements)
|
||||
.flatMap(m -> m.entrySet().stream())
|
||||
.collect(Collectors.collectingAndThen(
|
||||
Collectors.toMap(Map.Entry::getKey,
|
||||
Map.Entry::getValue,
|
||||
CalculateAverage_AbstractKamen::aggregateMeasurements),
|
||||
TreeMap::new));
|
||||
System.out.println(res);
|
||||
}
|
||||
}
|
||||
|
||||
private static Measurement aggregateMeasurements(Measurement src, Measurement target) {
|
||||
target.min = Math.min(src.min, target.min);
|
||||
target.max = Math.max(src.max, target.max);
|
||||
target.sum = src.sum + target.sum;
|
||||
target.count = src.count + target.count;
|
||||
return target;
|
||||
}
|
||||
|
||||
private static Map<String, Measurement> getMeasurements(BufferSupplier getBuffer) {
|
||||
final Map<String, Measurement> map = new HashMap<>(50_000);
|
||||
final ByteBuffer byteBuffer = getBuffer.get();
|
||||
final byte[] bytes = new byte[512];
|
||||
while (byteBuffer.hasRemaining()) {
|
||||
int nameLen = 0;
|
||||
String name;
|
||||
byte b;
|
||||
while ((b = byteBuffer.get()) != ';') {
|
||||
bytes[nameLen++] = b;
|
||||
}
|
||||
name = new String(bytes, 0, nameLen, StandardCharsets.UTF_8);
|
||||
int valueLen = 0;
|
||||
int neg = 1;
|
||||
while (byteBuffer.hasRemaining() && ((b = byteBuffer.get()) != '\n')) {
|
||||
if (b == '-') {
|
||||
neg = -1;
|
||||
}
|
||||
else if (b == '.' || b == '\r') {
|
||||
// skip the dot and retart char
|
||||
}
|
||||
else {
|
||||
bytes[valueLen++] = b;
|
||||
}
|
||||
}
|
||||
final int val = parseAsInt(valueLen, bytes);
|
||||
takeMeasurement(val * neg, map, name);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
private static int parseAsInt(int valueLen, byte[] bytes) {
|
||||
int val;
|
||||
switch (valueLen) {
|
||||
case 2 -> val = (bytes[0] - 48) * 10 + (bytes[1] - 48);
|
||||
case 3 -> val = (bytes[0] - 48) * 100 + (bytes[1] - 48) * 10 + (bytes[2] - 48);
|
||||
default -> val = 0;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
private static void takeMeasurement(int temperature, Map<String, Measurement> map, String name) {
|
||||
Measurement measurement = map.get(name);
|
||||
if (measurement != null) {
|
||||
measurement.min = Math.min(measurement.min, temperature);
|
||||
measurement.max = Math.max(measurement.max, temperature);
|
||||
measurement.sum += temperature;
|
||||
measurement.count++;
|
||||
}
|
||||
else {
|
||||
measurement = new Measurement();
|
||||
map.put(name, measurement);
|
||||
measurement.min = temperature;
|
||||
measurement.max = temperature;
|
||||
measurement.sum = temperature;
|
||||
measurement.count = 1;
|
||||
}
|
||||
}
|
||||
|
||||
private static Stream<BufferSupplier> getParallelBufferStream(RandomAccessFile raf, FileChannel fc) throws IOException {
|
||||
final int availableProcessors = Runtime.getRuntime().availableProcessors();
|
||||
return StreamSupport.stream(
|
||||
StreamSupport.stream(
|
||||
Spliterators.spliterator(
|
||||
new BufferSupplierIterator(raf, fc, availableProcessors), availableProcessors,
|
||||
Spliterator.IMMUTABLE | Spliterator.SIZED | Spliterator.SUBSIZED),
|
||||
false)
|
||||
.spliterator(),
|
||||
true);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
interface BufferSupplier extends Supplier<ByteBuffer> {
|
||||
}
|
||||
|
||||
class BufferSupplierIterator implements Iterator<BufferSupplier> {
|
||||
private long start;
|
||||
private final RandomAccessFile raf;
|
||||
private final FileChannel fc;
|
||||
private final long fileLength;
|
||||
private final long chunkSize;
|
||||
|
||||
public BufferSupplierIterator(RandomAccessFile raf, FileChannel fc, int numberOfParts) throws IOException {
|
||||
this.raf = raf;
|
||||
this.fc = fc;
|
||||
this.fileLength = fc.size();
|
||||
this.chunkSize = Math.min(fileLength / numberOfParts, 1073741824);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return start < fileLength;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BufferSupplier next() {
|
||||
try {
|
||||
if (hasNext()) {
|
||||
final long end = getEnd();
|
||||
long s = start;
|
||||
this.start = end;
|
||||
return getBufferSupplier(s, end);
|
||||
}
|
||||
else {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private long getEnd() throws IOException {
|
||||
long end = Math.min(start + chunkSize, fileLength);
|
||||
while (end < fileLength) {
|
||||
raf.seek(end++);
|
||||
if (raf.read() == '\n')
|
||||
break;
|
||||
}
|
||||
return end;
|
||||
}
|
||||
|
||||
private BufferSupplier getBufferSupplier(long position, long end) {
|
||||
final long size = end - position;
|
||||
return new BufferSupplier() {
|
||||
|
||||
private ByteBuffer bb;
|
||||
|
||||
@Override
|
||||
public ByteBuffer get() {
|
||||
try {
|
||||
if (bb == null) {
|
||||
return (bb = fc.map(MapMode.READ_ONLY, position, size));
|
||||
}
|
||||
else {
|
||||
return bb;
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user