Implementation using memory mapped file
This commit is contained in:
parent
5e80d8a7b0
commit
6b13d52b67
20
calculate_average_bjhara.sh
Normal file
20
calculate_average_bjhara.sh
Normal file
@ -0,0 +1,20 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
JAVA_OPTS=""
|
||||
time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_bjhara
|
151
src/main/java/dev/morling/onebrc/CalculateAverage_bjhara.java
Normal file
151
src/main/java/dev/morling/onebrc/CalculateAverage_bjhara.java
Normal file
@ -0,0 +1,151 @@
|
||||
package dev.morling.onebrc;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.*;
|
||||
import java.nio.channels.*;
|
||||
import java.nio.file.*;
|
||||
import java.util.*;
|
||||
import java.util.stream.*;
|
||||
|
||||
public class CalculateAverage_bjhara {
|
||||
private static final String FILE = "./measurements.txt";
|
||||
|
||||
private static class Measurement {
|
||||
private double min = Double.POSITIVE_INFINITY;
|
||||
private double max = Double.NEGATIVE_INFINITY;
|
||||
private double sum;
|
||||
private long count;
|
||||
|
||||
public String toString() {
|
||||
return round(min) + "/" + round(sum / count) + "/" + round(max);
|
||||
}
|
||||
|
||||
private double round(double value) {
|
||||
return Math.round(value * 10.0) / 10.0;
|
||||
}
|
||||
|
||||
public static Measurement combine(Measurement m1, Measurement m2) {
|
||||
var mres = new Measurement();
|
||||
mres.min = m1.min < m2.min ? m1.min : m2.min;
|
||||
mres.max = m1.max > m2.max ? m1.max : m2.max;
|
||||
mres.sum = m1.sum + m2.sum;
|
||||
mres.count = m1.count + m2.count;
|
||||
return mres;
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
try (FileChannel fileChannel = (FileChannel) Files.newByteChannel(Path.of(FILE),
|
||||
EnumSet.of(StandardOpenOption.READ))) {
|
||||
|
||||
var cities = splitFileChannel(fileChannel)
|
||||
.parallel()
|
||||
.map(CalculateAverage_bjhara::parseBuffer)
|
||||
.collect(Collectors.reducing(CalculateAverage_bjhara::combineMaps));
|
||||
|
||||
var sortedCities = new TreeMap<>(cities.orElseThrow());
|
||||
System.out.println(sortedCities);
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<String, Measurement> combineMaps(Map<String, Measurement> map1,
|
||||
Map<String, Measurement> map2) {
|
||||
for (var entry : map2.entrySet()) {
|
||||
map1.merge(entry.getKey(), entry.getValue(), Measurement::combine);
|
||||
}
|
||||
|
||||
return map1;
|
||||
}
|
||||
|
||||
private static Stream<ByteBuffer> splitFileChannel(final FileChannel fileChannel) throws IOException {
|
||||
return StreamSupport.stream(Spliterators.spliteratorUnknownSize(new Iterator<ByteBuffer>() {
|
||||
private static final long CHUNK_SIZE = 1024 * 1024 * 10L;
|
||||
|
||||
private final long size = fileChannel.size();
|
||||
private long start = 0;
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return start < size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteBuffer next() {
|
||||
try {
|
||||
MappedByteBuffer mappedByteBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, start,
|
||||
Math.min(CHUNK_SIZE, size - start));
|
||||
|
||||
// don't split the data in the middle of lines
|
||||
// find the closest previous newline
|
||||
int realEnd = mappedByteBuffer.limit() - 1;
|
||||
while (mappedByteBuffer.get(realEnd) != '\n')
|
||||
realEnd--;
|
||||
|
||||
realEnd++;
|
||||
|
||||
mappedByteBuffer.limit(realEnd);
|
||||
start += realEnd;
|
||||
|
||||
return mappedByteBuffer;
|
||||
}
|
||||
catch (IOException ex) {
|
||||
throw new UncheckedIOException(ex);
|
||||
}
|
||||
}
|
||||
}, Spliterator.IMMUTABLE), false);
|
||||
}
|
||||
|
||||
private static Map<String, Measurement> parseBuffer(ByteBuffer bb) {
|
||||
Map<String, Measurement> cities = new HashMap<>();
|
||||
|
||||
final int limit = bb.limit();
|
||||
final byte[] buffer = new byte[128];
|
||||
|
||||
while (bb.position() < limit) {
|
||||
final int currentPosition = bb.position();
|
||||
|
||||
// find the ; separator
|
||||
int separator = currentPosition;
|
||||
while (separator != limit && bb.get(separator) != ';')
|
||||
separator++;
|
||||
|
||||
// find the end of the line
|
||||
int end = separator + 1;
|
||||
while (end != limit && !Character.isWhitespace((char) bb.get(end)))
|
||||
end++;
|
||||
|
||||
// get the name as a string
|
||||
int nameLength = separator - currentPosition;
|
||||
bb.get(buffer, 0, nameLength);
|
||||
String city = new String(buffer, 0, nameLength);
|
||||
|
||||
// get rid of the separator
|
||||
bb.get();
|
||||
|
||||
// get the double value
|
||||
int valueLength = end - separator - 1;
|
||||
bb.get(buffer, 0, valueLength);
|
||||
String valueStr = new String(buffer, 0, valueLength);
|
||||
double value = Double.parseDouble(valueStr);
|
||||
|
||||
// and get rid of the new line (handle both kinds)
|
||||
byte newline = bb.get();
|
||||
if (newline == '\r')
|
||||
bb.get();
|
||||
|
||||
// update the map with the new measurement
|
||||
Measurement agg = cities.get(city);
|
||||
if (agg == null) {
|
||||
agg = new Measurement();
|
||||
cities.put(city, agg);
|
||||
}
|
||||
|
||||
agg.min = agg.min < value ? agg.min : value;
|
||||
agg.max = agg.max > value ? agg.max : value;
|
||||
agg.sum += value;
|
||||
agg.count++;
|
||||
}
|
||||
|
||||
return cities;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user