Solution without unsafe (#507)
Co-authored-by: Giovanni Cuccu <gcuccu@imolainformatica.it>
This commit is contained in:
parent
f06de5faab
commit
2c1264def9
19
calculate_average_giovannicuccu.sh
Normal file
19
calculate_average_giovannicuccu.sh
Normal file
@ -0,0 +1,19 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
JAVA_OPTS=""
|
||||
java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_giovannicuccu
|
20
prepare_giovannicuccu.sh
Normal file
20
prepare_giovannicuccu.sh
Normal file
@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Uncomment below to use sdk
|
||||
# source "$HOME/.sdkman/bin/sdkman-init.sh"
|
||||
# sdk use java 21.0.1-graal 1>&2
|
@ -0,0 +1,421 @@
|
||||
/*
|
||||
* Copyright 2023 The original authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package dev.morling.onebrc;
|
||||
|
||||
import static java.util.stream.Collectors.*;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.MappedByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.*;
|
||||
|
||||
/*
|
||||
Solution without unsafe that borrows the ideas of splullara, thomasvue, royvanrijn
|
||||
*/
|
||||
|
||||
public class CalculateAverage_giovannicuccu {
|
||||
|
||||
private static final String FILE = "./measurements.txt";
|
||||
|
||||
public static record PartitionBoundary(long start, long end) {
|
||||
}
|
||||
|
||||
public static interface PartitionCalculator {
|
||||
PartitionBoundary[] computePartitionsBoundaries(Path path);
|
||||
}
|
||||
|
||||
public static class ProcessorPartitionCalculator implements PartitionCalculator {
|
||||
|
||||
public PartitionBoundary[] computePartitionsBoundaries(Path path) {
|
||||
try {
|
||||
int numberOfSegments = Runtime.getRuntime().availableProcessors();
|
||||
long fileSize = path.toFile().length();
|
||||
long segmentSize = fileSize / numberOfSegments;
|
||||
PartitionBoundary[] segmentBoundaries = new PartitionBoundary[numberOfSegments];
|
||||
try (RandomAccessFile randomAccessFile = new RandomAccessFile(path.toFile(), "r")) {
|
||||
long segStart = 0;
|
||||
long segEnd = segmentSize;
|
||||
for (int i = 0; i < numberOfSegments; i++) {
|
||||
segEnd = findEndSegment(randomAccessFile, segEnd, fileSize);
|
||||
segmentBoundaries[i] = new PartitionBoundary(segStart, segEnd);
|
||||
segStart = segEnd;
|
||||
segEnd = Math.min(segEnd + segmentSize, fileSize);
|
||||
}
|
||||
}
|
||||
return segmentBoundaries;
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private long findEndSegment(RandomAccessFile raf, long location, long fileSize) throws IOException {
|
||||
raf.seek(location);
|
||||
while (location < fileSize) {
|
||||
location++;
|
||||
if (raf.read() == 10)
|
||||
break;
|
||||
}
|
||||
return location;
|
||||
}
|
||||
}
|
||||
|
||||
public static class MeasurementAggregator {
|
||||
private final int hash;
|
||||
private int min;
|
||||
private int max;
|
||||
private double sum;
|
||||
private long count;
|
||||
private final byte[] station;
|
||||
private final int offset;
|
||||
private final String name;
|
||||
|
||||
private final long[] data;
|
||||
private final int dataOffset;
|
||||
|
||||
public MeasurementAggregator(byte[] station, int offset, int hash, int initialValue, long[] data, int dataOffset) {
|
||||
min = initialValue;
|
||||
max = initialValue;
|
||||
sum = initialValue;
|
||||
count = 1;
|
||||
this.station = station;
|
||||
this.offset = offset;
|
||||
this.hash = hash;
|
||||
this.data = data;
|
||||
this.dataOffset = dataOffset;
|
||||
this.name = new String(station, 0, offset, StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
public MeasurementAggregator(byte[] station, int offset, int hash, int initialValue) {
|
||||
min = initialValue;
|
||||
max = initialValue;
|
||||
sum = initialValue;
|
||||
count = 1;
|
||||
this.station = station;
|
||||
this.offset = offset;
|
||||
this.hash = hash;
|
||||
this.data = new long[0];
|
||||
this.dataOffset = 0;
|
||||
this.name = new String(station, 0, offset, StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
public boolean hasSameStation(byte[] stationIn, int offsetIn) {
|
||||
return Arrays.equals(stationIn, 0, offsetIn, station, 0, offset);
|
||||
}
|
||||
|
||||
public boolean hasSameStation(long[] dataIn, int offsetIn) {
|
||||
return Arrays.equals(dataIn, 0, offsetIn, data, 0, dataOffset);
|
||||
}
|
||||
|
||||
public void add(int value) {
|
||||
if (value < min) {
|
||||
min = value;
|
||||
}
|
||||
if (value > max) {
|
||||
max = value;
|
||||
}
|
||||
sum += value;
|
||||
count++;
|
||||
}
|
||||
|
||||
public void merge(MeasurementAggregator other) {
|
||||
// System.out.println("min=" +min + " other min=" +other.min);
|
||||
min = Math.min(min, other.min);
|
||||
max = Math.max(max, other.max);
|
||||
sum += other.sum;
|
||||
count += other.count;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return round((double) min / 10) + "/" + round((sum / (double) count) / 10) + "/" + round((double) max / 10);
|
||||
}
|
||||
|
||||
private double round(double value) {
|
||||
return Math.round(value * 10.0) / 10.0;
|
||||
}
|
||||
|
||||
public int getMin() {
|
||||
return min;
|
||||
}
|
||||
|
||||
public int getHash() {
|
||||
return hash;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public byte[] getStation() {
|
||||
return station;
|
||||
}
|
||||
|
||||
public int getOffset() {
|
||||
return offset;
|
||||
}
|
||||
|
||||
public long[] getData() {
|
||||
return data;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static class MeasurementList {
|
||||
|
||||
private static final int SIZE = 1024 * 64;
|
||||
private final MeasurementAggregator[] measurements = new MeasurementAggregator[SIZE];
|
||||
|
||||
public void add(byte[] station, int offset, int hash, int value) {
|
||||
int index = hash & (SIZE - 1);
|
||||
if (measurements[index] == null) {
|
||||
measurements[index] = new MeasurementAggregator(station.clone(), offset, hash, value);
|
||||
}
|
||||
else {
|
||||
if (measurements[index].hasSameStation(station, offset)) {
|
||||
measurements[index].add(value);
|
||||
}
|
||||
else {
|
||||
while (measurements[index] != null && !measurements[index].hasSameStation(station, offset)) {
|
||||
index = (index + 1) & (SIZE - 1);
|
||||
}
|
||||
if (measurements[index] == null) {
|
||||
measurements[index] = new MeasurementAggregator(station.clone(), offset, hash, value);
|
||||
}
|
||||
else {
|
||||
measurements[index].add(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void merge(MeasurementAggregator measurementAggregator) {
|
||||
int index = (measurementAggregator.getHash() & (SIZE - 1));
|
||||
if (measurements[index] == null) {
|
||||
measurements[index] = measurementAggregator;
|
||||
}
|
||||
else {
|
||||
while (measurements[index] != null && !measurements[index].hasSameStation(measurementAggregator.getStation(), measurementAggregator.getOffset())) {
|
||||
index = (index + 1) & (SIZE - 1);
|
||||
}
|
||||
if (measurements[index] == null) {
|
||||
measurements[index] = measurementAggregator;
|
||||
}
|
||||
else {
|
||||
measurements[index].merge(measurementAggregator);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public MeasurementAggregator[] getMeasurements() {
|
||||
return measurements;
|
||||
}
|
||||
}
|
||||
|
||||
public static class MMapReader {
|
||||
private final Path path;
|
||||
private final PartitionBoundary[] boundaries;
|
||||
|
||||
private final boolean serial;
|
||||
|
||||
public MMapReader(Path path, PartitionCalculator partitionCalculator, boolean serial) {
|
||||
this.path = path;
|
||||
this.serial = serial;
|
||||
boundaries = partitionCalculator.computePartitionsBoundaries(path);
|
||||
}
|
||||
|
||||
public TreeMap<String, MeasurementAggregator> elaborate() {
|
||||
try (ExecutorService executor = Executors.newFixedThreadPool(boundaries.length)) {
|
||||
List<Future<MeasurementList>> futures = new ArrayList<>();
|
||||
for (PartitionBoundary boundary : boundaries) {
|
||||
if (serial) {
|
||||
FutureTask<MeasurementList> future = new FutureTask<>(() -> computeListForPartition(boundary.start(), boundary.end()));
|
||||
future.run();
|
||||
// System.out.println("done with partition " + boundary);
|
||||
futures.add(future);
|
||||
}
|
||||
else {
|
||||
Future<MeasurementList> future = executor.submit(() -> computeListForPartition(boundary.start(), boundary.end()));
|
||||
futures.add(future);
|
||||
}
|
||||
}
|
||||
TreeMap<String, MeasurementAggregator> ris = reduce(futures);
|
||||
return ris;
|
||||
}
|
||||
}
|
||||
|
||||
private TreeMap<String, MeasurementAggregator> reduce(List<Future<MeasurementList>> futures) {
|
||||
try {
|
||||
TreeMap<String, MeasurementAggregator> risMap = new TreeMap<>();
|
||||
MeasurementList ris = new MeasurementList();
|
||||
for (Future<MeasurementList> future : futures) {
|
||||
MeasurementList results = future.get();
|
||||
merge(ris, results);
|
||||
}
|
||||
for (MeasurementAggregator m : ris.getMeasurements()) {
|
||||
if (m != null) {
|
||||
risMap.put(m.getName(), m);
|
||||
}
|
||||
}
|
||||
return risMap;
|
||||
}
|
||||
catch (InterruptedException | ExecutionException ie) {
|
||||
System.err.println(ie);
|
||||
throw new RuntimeException(ie);
|
||||
}
|
||||
}
|
||||
|
||||
private void merge(MeasurementList result, MeasurementList partial) {
|
||||
for (MeasurementAggregator m : partial.getMeasurements()) {
|
||||
if (m != null) {
|
||||
result.merge(m);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private MeasurementList computeListForPartition(long start, long end) {
|
||||
MeasurementList list = new MeasurementList();
|
||||
try {
|
||||
try (FileChannel fileChannel = (FileChannel) Files.newByteChannel((path), StandardOpenOption.READ)) {
|
||||
MappedByteBuffer mappedByteBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, start, end - start);
|
||||
mappedByteBuffer.order(BYTE_ORDER.LITTLE_ENDIAN);
|
||||
int limit = mappedByteBuffer.limit();
|
||||
int startLine;
|
||||
byte[] stationb = new byte[100];
|
||||
while ((startLine = mappedByteBuffer.position()) < limit - 110) {
|
||||
int currentPosition = startLine;
|
||||
byte b = 0;
|
||||
int i = 0;
|
||||
int hash = 0;
|
||||
|
||||
while ((b = mappedByteBuffer.get(currentPosition++)) != ';') {
|
||||
stationb[i++] = b;
|
||||
hash = 31 * hash + b;
|
||||
}
|
||||
if (hash < 0) {
|
||||
hash = -hash;
|
||||
}
|
||||
|
||||
long numberWord = mappedByteBuffer.getLong(currentPosition);
|
||||
int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000);
|
||||
int value = convertIntoNumber(decimalSepPos, numberWord);
|
||||
mappedByteBuffer.position(currentPosition + (decimalSepPos >>> 3) + 3);
|
||||
|
||||
list.add(stationb, i, hash, value);
|
||||
|
||||
}
|
||||
while ((startLine = mappedByteBuffer.position()) < limit) {
|
||||
int currentPosition = startLine;
|
||||
byte b = 0;
|
||||
int i = 0;
|
||||
int hash = 0;
|
||||
while ((b = mappedByteBuffer.get(currentPosition++)) != ';') {
|
||||
stationb[i++] = b;
|
||||
hash = 31 * hash + b;
|
||||
}
|
||||
if (hash < 0) {
|
||||
hash = -hash;
|
||||
}
|
||||
|
||||
int value = 0;
|
||||
if (currentPosition <= limit - 8) {
|
||||
long numberWord = mappedByteBuffer.getLong(currentPosition);
|
||||
int decimalSepPos = Long.numberOfTrailingZeros(~numberWord & 0x10101000);
|
||||
value = convertIntoNumber(decimalSepPos, numberWord);
|
||||
mappedByteBuffer.position(currentPosition + (decimalSepPos >>> 3) + 3);
|
||||
}
|
||||
else {
|
||||
int sign = 1;
|
||||
b = mappedByteBuffer.get(currentPosition++);
|
||||
if (b == '-') {
|
||||
sign = -1;
|
||||
}
|
||||
else {
|
||||
value = b - '0';
|
||||
}
|
||||
while ((b = mappedByteBuffer.get(currentPosition++)) != '.') {
|
||||
value = value * 10 + (b - '0');
|
||||
}
|
||||
b = mappedByteBuffer.get(currentPosition);
|
||||
value = value * 10 + (b - '0');
|
||||
if (sign == -1) {
|
||||
value = -value;
|
||||
}
|
||||
mappedByteBuffer.position(currentPosition + 2);
|
||||
}
|
||||
|
||||
list.add(stationb, i, hash, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
System.out.println("Error");
|
||||
System.err.println(e);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
private static final ByteOrder BYTE_ORDER = ByteOrder.nativeOrder();
|
||||
|
||||
private static long getLongLittleEndian(long value) {
|
||||
value = Long.reverseBytes(value);
|
||||
return value;
|
||||
}
|
||||
|
||||
private static int convertIntoNumber(int decimalSepPos, long numberWord) {
|
||||
int shift = 28 - decimalSepPos;
|
||||
// signed is -1 if negative, 0 otherwise
|
||||
long signed = (~numberWord << 59) >> 63;
|
||||
long designMask = ~(signed & 0xFF);
|
||||
// Align the number to a specific position and transform the ascii code
|
||||
// to actual digit value in each byte
|
||||
long digits = ((numberWord & designMask) << shift) & 0x0F000F0F00L;
|
||||
|
||||
// Now digits is in the form 0xUU00TTHH00 (UU: units digit, TT: tens digit, HH: hundreds digit)
|
||||
// 0xUU00TTHH00 * (100 * 0x1000000 + 10 * 0x10000 + 1) =
|
||||
// 0x000000UU00TTHH00 +
|
||||
// 0x00UU00TTHH000000 * 10 +
|
||||
// 0xUU00TTHH00000000 * 100
|
||||
// Now TT * 100 has 2 trailing zeroes and HH * 100 + TT * 10 + UU < 0x400
|
||||
// This results in our value lies in the bit 32 to 41 of this product
|
||||
// That was close :)
|
||||
long absValue = ((digits * 0x640a0001) >>> 32) & 0x3FF;
|
||||
long value = (absValue ^ signed) - signed;
|
||||
return (int) value;
|
||||
}
|
||||
|
||||
private static long[] masks = new long[]{ 0x0000000000000000, 0xFF00000000000000L, 0xFFFF000000000000L,
|
||||
0xFFFFFF0000000000L, 0xFFFFFFFF00000000L, 0xFFFFFFFFFF000000L, 0xFFFFFFFFFF0000L, 0xFFFFFFFFFFFF00L };
|
||||
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
long start = System.currentTimeMillis();
|
||||
MMapReader reader = new MMapReader(Paths.get(FILE), new ProcessorPartitionCalculator(), false);
|
||||
Map<String, MeasurementAggregator> measurements = reader.elaborate();
|
||||
// System.out.println("ela=" + (System.currentTimeMillis() - start));
|
||||
System.out.println(measurements);
|
||||
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user