Initial submission (#588)
* Initial submission * fixed not executable scripts
This commit is contained in:
parent
22c188b148
commit
769884426b
20
calculate_average_rcasteltrione.sh
Executable file
20
calculate_average_rcasteltrione.sh
Executable file
@ -0,0 +1,20 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
JAVA_OPTS="--enable-preview"
|
||||
time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_rcasteltrione
|
19
prepare_rcasteltrione.sh
Executable file
19
prepare_rcasteltrione.sh
Executable file
@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
source "$HOME/.sdkman/bin/sdkman-init.sh"
|
||||
sdk use java 21.0.1-graal 1>&2
|
@ -0,0 +1,309 @@
|
||||
/*
|
||||
* Copyright 2023 The original authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package dev.morling.onebrc;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.nio.MappedByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.time.Duration;
|
||||
import java.time.Instant;
|
||||
import java.util.*;
|
||||
|
||||
import static java.util.stream.Collectors.toMap;
|
||||
|
||||
//baseline: 266s
|
||||
|
||||
public class CalculateAverage_rcasteltrione {
|
||||
private static final String FILE = "./measurements.txt";
|
||||
// private static final String FILE = "./backup/measurements.txt";
|
||||
|
||||
public static void main(String[] args) throws IOException, InterruptedException {
|
||||
Path path = Paths.get(FILE);
|
||||
Instant start = Instant.now();
|
||||
|
||||
var segList = FileSegment.forFile(path, Runtime.getRuntime().availableProcessors());
|
||||
var results = new ByteArrayToMeasurementMap[segList.size()];
|
||||
var threads = new Thread[segList.size()];
|
||||
try (var channel = FileChannel.open(path, StandardOpenOption.READ)) {
|
||||
for (int i = 0; i < segList.size(); i++) {
|
||||
int finalI = i;
|
||||
FileSegment fileSegment = segList.get(finalI);
|
||||
var t = Thread.ofPlatform().start(() -> results[finalI] = processSegment(channel, fileSegment));
|
||||
threads[i] = t;
|
||||
}
|
||||
for (Thread thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
}
|
||||
|
||||
Map<String, Measurement> aggregatedMap = Arrays.stream(results)
|
||||
.flatMap(m -> m.entries().stream())
|
||||
.collect(toMap(
|
||||
ByteArrayToMeasurementMap.Entry::key,
|
||||
ByteArrayToMeasurementMap.Entry::value,
|
||||
Measurement::merge,
|
||||
TreeMap::new));
|
||||
|
||||
System.out.println(aggregatedMap);
|
||||
// System.out.println(Duration.between(start, Instant.now()).toMillis());
|
||||
}
|
||||
|
||||
private static ByteArrayToMeasurementMap processSegment(FileChannel channel, FileSegment seg) {
|
||||
try {
|
||||
MappedByteBuffer mbb = channel.map(FileChannel.MapMode.READ_ONLY, seg.start(), seg.size());
|
||||
byte b;
|
||||
var result = new ByteArrayToMeasurementMap();
|
||||
var lineBuffer = new byte[1 << 13];
|
||||
var segmentPosition = mbb.position();
|
||||
var limit = mbb.limit();
|
||||
var lastLineOffset = 0;
|
||||
|
||||
while (segmentPosition < mbb.limit()) {
|
||||
|
||||
int remaining = limit - segmentPosition;
|
||||
int chunk = Math.min(remaining, lineBuffer.length);
|
||||
mbb.get(segmentPosition, lineBuffer, 0, chunk);
|
||||
for (int i = chunk - 1; i >= 0; i--) {
|
||||
if (lineBuffer[i] == '\n') {
|
||||
lastLineOffset = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (int lineBufferOffset = 0; lineBufferOffset < lastLineOffset;) {
|
||||
int nameHash = 0;
|
||||
int nameLength = 0;
|
||||
int nameStart = lineBufferOffset;
|
||||
while ((b = lineBuffer[lineBufferOffset++]) != ';') {
|
||||
nameHash = 31 * nameHash + b;
|
||||
nameLength++;
|
||||
}
|
||||
|
||||
int temp;
|
||||
int negative = 1;
|
||||
// var s = new String(Arrays.copyOfRange(lineBuffer, nameStart, lineOffset - 1), StandardCharsets.UTF_8);
|
||||
if (lineBuffer[lineBufferOffset] == '-') {
|
||||
lineBufferOffset++;
|
||||
negative = -1;
|
||||
}
|
||||
|
||||
// Temperature value: non-null double between -99.9 (inclusive) and 99.9 (inclusive), always with one fractional digit
|
||||
if (lineBuffer[lineBufferOffset + 1] == '.') {
|
||||
temp = (lineBuffer[lineBufferOffset] - '0') * 10 + (lineBuffer[lineBufferOffset + 2] - '0');
|
||||
lineBufferOffset += 3;
|
||||
}
|
||||
else {
|
||||
temp = (lineBuffer[lineBufferOffset] - '0') * 100
|
||||
+ (lineBuffer[lineBufferOffset + 1] - '0') * 10
|
||||
+ (lineBuffer[lineBufferOffset + 3] - '0');
|
||||
lineBufferOffset += 4;
|
||||
}
|
||||
if (lineBuffer[lineBufferOffset] == '\r') {
|
||||
lineBufferOffset++;
|
||||
}
|
||||
lineBufferOffset++;
|
||||
|
||||
temp *= negative;
|
||||
result.mergeOrCreate(lineBuffer, nameStart, nameLength, nameHash, temp);
|
||||
// segmentPosition += lineOffset;
|
||||
// i += lineoffset;
|
||||
}
|
||||
|
||||
segmentPosition += lastLineOffset + 1;
|
||||
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
record FileSegment(long start, long size) {
|
||||
public static List<FileSegment> forFile(Path file, int desiredSegmentsCount) throws IOException {
|
||||
try (var raf = new RandomAccessFile(file.toFile(), "r")) {
|
||||
var segments = new ArrayList<FileSegment>();
|
||||
var fileSize = raf.length();
|
||||
if (fileSize < 1000000) {
|
||||
return Collections.singletonList(new FileSegment(0, fileSize));
|
||||
}
|
||||
var segmentSize = fileSize / desiredSegmentsCount;
|
||||
for (int segmentIdx = 0; segmentIdx < desiredSegmentsCount; segmentIdx++) {
|
||||
var segStart = segmentIdx * segmentSize;
|
||||
var segEnd = (segmentIdx == desiredSegmentsCount - 1) ? fileSize : segStart + segmentSize;
|
||||
segStart = findSegmentBoundary(raf, segmentIdx, 0, segStart, segEnd);
|
||||
segEnd = findSegmentBoundary(raf, segmentIdx, desiredSegmentsCount - 1, segEnd, fileSize);
|
||||
|
||||
var segSize = segEnd - segStart;
|
||||
|
||||
segments.add(new FileSegment(segStart, segSize));
|
||||
}
|
||||
return segments;
|
||||
}
|
||||
}
|
||||
|
||||
private static long findSegmentBoundary(RandomAccessFile raf, int i, int skipForSegment, long location, long fileSize) throws IOException {
|
||||
if (i == skipForSegment) return location;
|
||||
|
||||
raf.seek(location);
|
||||
while (location < fileSize) {
|
||||
location++;
|
||||
if (raf.read() == '\n') break;
|
||||
}
|
||||
return location;
|
||||
}
|
||||
}
|
||||
|
||||
static class Measurement {
|
||||
int min, max, n;
|
||||
long sum;
|
||||
|
||||
private Measurement(int min, int max, long sum, int n) {
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
this.sum = sum;
|
||||
this.n = n;
|
||||
}
|
||||
|
||||
public Measurement(int temp) {
|
||||
this(temp, temp, temp, 1);
|
||||
}
|
||||
|
||||
final Measurement merge(Measurement other) {
|
||||
this.min = Math.min(other.min, this.min);
|
||||
this.max = Math.max(other.max, this.max);
|
||||
this.sum += other.sum;
|
||||
this.n += other.n;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return STR."\{round(min)}/\{round(((double) sum / n))}/\{round(max)}";
|
||||
}
|
||||
|
||||
double round(double v) {
|
||||
return Math.round(v) / 10.0;
|
||||
}
|
||||
}
|
||||
|
||||
static class ByteArrayToMeasurementMap {
|
||||
|
||||
public static final int DEFAULT_CAPACITY = 1024;
|
||||
public static final float LOAD_FACTOR = 0.75f;
|
||||
MeasurementSlot[] slots = new MeasurementSlot[DEFAULT_CAPACITY];
|
||||
int threshold = (int) (DEFAULT_CAPACITY * LOAD_FACTOR);
|
||||
int size = 0;
|
||||
|
||||
private record MeasurementSlot(int hash, byte[] key, String city, Measurement measurement) {
|
||||
}
|
||||
|
||||
public final void mergeOrCreate(byte[] line, int nameStart, int nameLength, int hash, int temperature) {
|
||||
int hashMask = slots.length - 1;
|
||||
|
||||
for (int idx = hash & hashMask;; idx = (idx + 1) & hashMask) {
|
||||
MeasurementSlot slot = slots[idx];
|
||||
if (slot == null) {
|
||||
size++;
|
||||
if (size > threshold) {
|
||||
idx = resize(hash);
|
||||
}
|
||||
byte[] nameBuffer = new byte[nameLength];
|
||||
System.arraycopy(line, nameStart, nameBuffer, 0, nameLength);
|
||||
slots[idx] = new MeasurementSlot(
|
||||
hash,
|
||||
nameBuffer,
|
||||
new String(nameBuffer, StandardCharsets.UTF_8),
|
||||
new Measurement(temperature));
|
||||
return;
|
||||
}
|
||||
|
||||
if (slot.hash == hash && arrayEquals(slot.key, line, nameStart, nameLength)) {
|
||||
Measurement value = slots[idx].measurement;
|
||||
value.min = Math.min(value.min, temperature);
|
||||
value.max = Math.max(value.max, temperature);
|
||||
value.sum += temperature;
|
||||
value.n++;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int resize(int hash) {
|
||||
var oldSlots = slots;
|
||||
var newSlots = new MeasurementSlot[oldSlots.length << 1];
|
||||
var mask = newSlots.length - 1;
|
||||
for (MeasurementSlot oldSlot : oldSlots) {
|
||||
if (oldSlot == null) {
|
||||
continue;
|
||||
}
|
||||
int idx = oldSlot.hash & mask;
|
||||
while (newSlots[idx] != null) {
|
||||
idx = (idx + 1) & mask;
|
||||
}
|
||||
newSlots[idx] = oldSlot;
|
||||
}
|
||||
|
||||
slots = newSlots;
|
||||
threshold = (int) (newSlots.length * LOAD_FACTOR);
|
||||
int hashMask = slots.length - 1;
|
||||
int idx;
|
||||
for (idx = hash & hashMask; slots[idx] != null; idx = (idx + 1) & hashMask) {
|
||||
}
|
||||
return idx;
|
||||
}
|
||||
|
||||
private boolean arrayEquals(byte[] storedKey, byte[] line, int nameStart, int nameLength) {
|
||||
if (storedKey.length != nameLength) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < storedKey.length; i++) {
|
||||
if (storedKey[i] != line[nameStart + i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private static int hashCode(int h) {
|
||||
h ^= (h >>> 20) ^ (h >>> 12);
|
||||
h ^= (h >>> 7) ^ (h >>> 4);
|
||||
h += h << 7;
|
||||
return h;
|
||||
}
|
||||
|
||||
public final List<Entry> entries() {
|
||||
var result = new ArrayList<Entry>(slots.length);
|
||||
for (MeasurementSlot slot : slots) {
|
||||
if (slot != null) {
|
||||
result.add(new Entry(slot.city, slot.measurement));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public record Entry(String key, Measurement value) {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user