2024-01-03 10:14:19 +01:00
|
|
|
/*
|
|
|
|
* Copyright 2023 The original authors
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
package dev.morling.onebrc;
|
|
|
|
|
2024-01-03 14:53:38 +01:00
|
|
|
import java.io.*;
|
2024-01-03 20:22:39 +01:00
|
|
|
import java.lang.foreign.Arena;
|
|
|
|
import java.lang.foreign.MemorySegment;
|
|
|
|
import java.lang.foreign.ValueLayout;
|
2024-01-03 19:14:15 +01:00
|
|
|
import java.nio.channels.FileChannel;
|
2024-01-03 14:53:38 +01:00
|
|
|
import java.nio.charset.StandardCharsets;
|
2024-01-03 10:14:19 +01:00
|
|
|
import java.nio.file.Files;
|
|
|
|
import java.nio.file.Path;
|
2024-01-03 19:14:15 +01:00
|
|
|
import java.nio.file.StandardOpenOption;
|
2024-01-03 10:14:19 +01:00
|
|
|
import java.time.Duration;
|
|
|
|
import java.time.Instant;
|
2024-01-03 19:14:15 +01:00
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.List;
|
2024-01-03 10:14:19 +01:00
|
|
|
import java.util.TreeMap;
|
|
|
|
|
|
|
|
// gunnar morling - 2:10
|
|
|
|
// roy van rijn - 1:01
|
2024-01-03 14:53:38 +01:00
|
|
|
// 0:37
|
2024-01-03 10:14:19 +01:00
|
|
|
|
|
|
|
public class CalculateAverage_ddimtirov {
|
2024-01-03 19:14:15 +01:00
|
|
|
private static final String FILE = "./measurements.txt";
|
2024-01-03 10:14:19 +01:00
|
|
|
|
2024-01-03 19:14:15 +01:00
|
|
|
private static final int HASH_NO_CLASH_MODULUS = 49999;
|
|
|
|
private static final int OFFSET_MIN = 0;
|
|
|
|
private static final int OFFSET_MAX = 1;
|
|
|
|
private static final int OFFSET_COUNT = 2;
|
2024-01-03 10:14:19 +01:00
|
|
|
|
2024-01-03 14:53:38 +01:00
|
|
|
@SuppressWarnings("RedundantSuppression")
|
|
|
|
public static void main(String[] args) throws IOException {
|
2024-01-03 19:14:15 +01:00
|
|
|
var path = Path.of(FILE);
|
|
|
|
var start = Instant.now();
|
|
|
|
var desiredSegmentsCount = Runtime.getRuntime().availableProcessors();
|
|
|
|
|
2024-01-03 20:22:39 +01:00
|
|
|
var fileSegments = FileSegment.forFile(path, desiredSegmentsCount);
|
2024-01-03 19:14:15 +01:00
|
|
|
|
2024-01-03 20:22:39 +01:00
|
|
|
var trackers = fileSegments.stream().parallel().map(fileSegment -> {
|
2024-01-03 19:14:15 +01:00
|
|
|
try (var fileChannel = (FileChannel) Files.newByteChannel(path, StandardOpenOption.READ)) {
|
|
|
|
var tracker = new Tracker();
|
2024-01-03 20:22:39 +01:00
|
|
|
var memorySegment = fileChannel.map(FileChannel.MapMode.READ_ONLY, fileSegment.start(), fileSegment.size(), Arena.ofConfined());
|
|
|
|
tracker.processSegment(memorySegment);
|
2024-01-03 19:14:15 +01:00
|
|
|
return tracker;
|
2024-01-03 20:22:39 +01:00
|
|
|
}
|
|
|
|
catch (IOException e) {
|
2024-01-03 19:14:15 +01:00
|
|
|
throw new RuntimeException(e);
|
2024-01-03 10:14:19 +01:00
|
|
|
}
|
2024-01-03 19:14:15 +01:00
|
|
|
}).toList();
|
2024-01-03 14:53:38 +01:00
|
|
|
|
2024-01-03 19:14:15 +01:00
|
|
|
var result = summarizeTrackers(trackers);
|
|
|
|
System.out.println(result);
|
2024-01-03 14:53:38 +01:00
|
|
|
|
2024-01-03 20:22:39 +01:00
|
|
|
// noinspection ConstantValue
|
|
|
|
if (start != null)
|
|
|
|
System.err.println(Duration.between(start, Instant.now()));
|
|
|
|
assert Files.readAllLines(Path.of("measurements_result.txt")).getFirst().equals(result);
|
2024-01-03 14:53:38 +01:00
|
|
|
}
|
|
|
|
|
2024-01-03 20:22:39 +01:00
|
|
|
record FileSegment(long start, long size) {
|
2024-01-03 19:14:15 +01:00
|
|
|
public static List<FileSegment> forFile(Path file, int desiredSegmentsCount) throws IOException {
|
|
|
|
try (var raf = new RandomAccessFile(file.toFile(), "r")) {
|
2024-01-03 20:22:39 +01:00
|
|
|
var segments = new ArrayList<FileSegment>();
|
|
|
|
var fileSize = raf.length();
|
|
|
|
var segmentSize = fileSize / desiredSegmentsCount;
|
2024-01-03 19:14:15 +01:00
|
|
|
for (int segmentIdx = 0; segmentIdx < desiredSegmentsCount; segmentIdx++) {
|
2024-01-03 20:22:39 +01:00
|
|
|
var segStart = segmentIdx * segmentSize;
|
|
|
|
var segEnd = (segmentIdx == desiredSegmentsCount - 1) ? fileSize : segStart + segmentSize;
|
2024-01-03 19:14:15 +01:00
|
|
|
segStart = findSegmentBoundary(raf, segmentIdx, 0, segStart, segEnd);
|
|
|
|
segEnd = findSegmentBoundary(raf, segmentIdx, desiredSegmentsCount - 1, segEnd, fileSize);
|
2024-01-03 10:14:19 +01:00
|
|
|
|
2024-01-03 20:22:39 +01:00
|
|
|
var segSize = segEnd - segStart;
|
|
|
|
|
|
|
|
segments.add(new FileSegment(segStart, segSize));
|
2024-01-03 19:14:15 +01:00
|
|
|
}
|
|
|
|
return segments;
|
|
|
|
}
|
2024-01-03 10:14:19 +01:00
|
|
|
}
|
|
|
|
|
2024-01-03 19:14:15 +01:00
|
|
|
private static long findSegmentBoundary(RandomAccessFile raf, int i, int skipForSegment, long location, long fileSize) throws IOException {
|
|
|
|
if (i == skipForSegment) return location;
|
2024-01-03 14:53:38 +01:00
|
|
|
|
2024-01-03 19:14:15 +01:00
|
|
|
raf.seek(location);
|
|
|
|
while (location < fileSize) {
|
|
|
|
location++;
|
|
|
|
if (raf.read() == '\n') break;
|
2024-01-03 14:53:38 +01:00
|
|
|
}
|
2024-01-03 19:14:15 +01:00
|
|
|
return location;
|
|
|
|
}
|
|
|
|
}
|
2024-01-03 14:53:38 +01:00
|
|
|
|
2024-01-03 19:14:15 +01:00
|
|
|
private static String summarizeTrackers(List<Tracker> trackers) {
|
|
|
|
var result = new TreeMap<String, String>();
|
2024-01-03 20:22:39 +01:00
|
|
|
for (var i = 0; i < HASH_NO_CLASH_MODULUS; i++) {
|
2024-01-03 19:14:15 +01:00
|
|
|
String name = null;
|
|
|
|
|
2024-01-03 20:22:39 +01:00
|
|
|
var min = Integer.MAX_VALUE;
|
|
|
|
var max = Integer.MIN_VALUE;
|
|
|
|
var sum = 0L;
|
|
|
|
var count = 0L;
|
2024-01-03 19:14:15 +01:00
|
|
|
for (Tracker tracker : trackers) {
|
2024-01-03 20:22:39 +01:00
|
|
|
if (tracker.names[i] == null)
|
|
|
|
continue;
|
|
|
|
if (name == null)
|
|
|
|
name = tracker.names[i];
|
|
|
|
|
|
|
|
var minn = tracker.minMaxCount[i * 3];
|
|
|
|
var maxx = tracker.minMaxCount[i * 3 + 1];
|
|
|
|
if (minn < min)
|
|
|
|
min = minn;
|
|
|
|
if (maxx > max)
|
|
|
|
max = maxx;
|
|
|
|
count += tracker.minMaxCount[i * 3 + 2];
|
2024-01-03 19:14:15 +01:00
|
|
|
sum += tracker.sums[i];
|
2024-01-03 14:53:38 +01:00
|
|
|
}
|
2024-01-03 20:22:39 +01:00
|
|
|
if (name == null)
|
|
|
|
continue;
|
2024-01-03 14:53:38 +01:00
|
|
|
|
2024-01-03 19:14:15 +01:00
|
|
|
var mean = Math.round((double) sum / count) / 10.0;
|
2024-01-03 20:22:39 +01:00
|
|
|
result.put(name, (min / 10.0) + "/" + mean + "/" + (max / 10.0));
|
2024-01-03 19:14:15 +01:00
|
|
|
}
|
|
|
|
return result.toString();
|
|
|
|
}
|
2024-01-03 14:53:38 +01:00
|
|
|
|
2024-01-03 19:14:15 +01:00
|
|
|
static class Tracker {
|
|
|
|
private final int[] minMaxCount = new int[HASH_NO_CLASH_MODULUS * 3];
|
|
|
|
private final long[] sums = new long[HASH_NO_CLASH_MODULUS];
|
|
|
|
private final String[] names = new String[HASH_NO_CLASH_MODULUS];
|
|
|
|
|
2024-01-03 20:22:39 +01:00
|
|
|
private void processSegment(MemorySegment memory) {
|
|
|
|
int position = 0;
|
|
|
|
long limit = memory.byteSize();
|
|
|
|
while (position < limit) {
|
|
|
|
int pos = position;
|
2024-01-03 19:14:15 +01:00
|
|
|
byte b;
|
|
|
|
|
|
|
|
int nameLength = 0, nameHash = 0;
|
2024-01-03 20:22:39 +01:00
|
|
|
while ((b = memory.get(ValueLayout.JAVA_BYTE, pos++)) != ';') {
|
|
|
|
nameHash = nameHash * 31 + b;
|
2024-01-03 19:14:15 +01:00
|
|
|
nameLength++;
|
2024-01-03 14:53:38 +01:00
|
|
|
}
|
2024-01-03 19:14:15 +01:00
|
|
|
|
|
|
|
int temperature = 0, sign = 1;
|
2024-01-03 20:22:39 +01:00
|
|
|
outer: while ((b = memory.get(ValueLayout.JAVA_BYTE, pos++)) != '\n') {
|
2024-01-03 19:14:15 +01:00
|
|
|
switch (b) {
|
2024-01-03 20:22:39 +01:00
|
|
|
case '\r':
|
2024-01-03 19:14:15 +01:00
|
|
|
pos++;
|
|
|
|
break outer;
|
2024-01-03 20:22:39 +01:00
|
|
|
case '.':
|
2024-01-03 19:14:15 +01:00
|
|
|
break;
|
2024-01-03 20:22:39 +01:00
|
|
|
case '-':
|
2024-01-03 19:14:15 +01:00
|
|
|
sign = -1;
|
|
|
|
break;
|
2024-01-03 20:22:39 +01:00
|
|
|
default:
|
2024-01-03 19:14:15 +01:00
|
|
|
var digit = b - '0';
|
|
|
|
assert digit >= 0 && digit <= 9;
|
|
|
|
temperature = 10 * temperature + digit;
|
2024-01-03 14:53:38 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-01-03 20:22:39 +01:00
|
|
|
processLine(nameHash, memory, position, nameLength, temperature * sign);
|
|
|
|
position = pos;
|
2024-01-03 19:14:15 +01:00
|
|
|
}
|
2024-01-03 10:14:19 +01:00
|
|
|
}
|
2024-01-03 14:53:38 +01:00
|
|
|
|
2024-01-03 20:22:39 +01:00
|
|
|
public void processLine(int nameHash, MemorySegment buffer, int nameOffset, int nameLength, int temperature) {
|
2024-01-03 19:14:15 +01:00
|
|
|
var i = Math.abs(nameHash) % HASH_NO_CLASH_MODULUS;
|
2024-01-03 14:53:38 +01:00
|
|
|
|
2024-01-03 20:22:39 +01:00
|
|
|
if (names[i] == null) {
|
2024-01-03 19:14:15 +01:00
|
|
|
names[i] = parseName(buffer, nameOffset, nameLength);
|
2024-01-03 20:22:39 +01:00
|
|
|
}
|
|
|
|
else {
|
2024-01-03 19:14:15 +01:00
|
|
|
assert parseName(buffer, nameOffset, nameLength).equals(names[i]) : parseName(buffer, nameOffset, nameLength) + "!=" + names[i];
|
|
|
|
}
|
2024-01-03 14:53:38 +01:00
|
|
|
|
2024-01-03 19:14:15 +01:00
|
|
|
sums[i] += temperature;
|
2024-01-03 14:53:38 +01:00
|
|
|
|
|
|
|
int mmcIndex = i * 3;
|
|
|
|
var min = minMaxCount[mmcIndex + OFFSET_MIN];
|
|
|
|
var max = minMaxCount[mmcIndex + OFFSET_MAX];
|
2024-01-03 20:22:39 +01:00
|
|
|
if (temperature < min)
|
|
|
|
minMaxCount[mmcIndex + OFFSET_MIN] = temperature;
|
|
|
|
if (temperature > max)
|
|
|
|
minMaxCount[mmcIndex + OFFSET_MAX] = temperature;
|
2024-01-03 14:53:38 +01:00
|
|
|
|
|
|
|
minMaxCount[mmcIndex + OFFSET_COUNT]++;
|
2024-01-03 10:14:19 +01:00
|
|
|
}
|
|
|
|
|
2024-01-03 20:22:39 +01:00
|
|
|
private String parseName(MemorySegment memory, int nameOffset, int nameLength) {
|
|
|
|
byte[] array = memory.asSlice(nameOffset, nameLength).toArray(ValueLayout.JAVA_BYTE);
|
|
|
|
return new String(array, StandardCharsets.UTF_8);
|
2024-01-03 14:53:38 +01:00
|
|
|
}
|
|
|
|
}
|
2024-01-03 10:14:19 +01:00
|
|
|
}
|