dpsoft: first submission (#572)
* dpsoft: first submission * minor clean up * map with linear probing * clean up * update prepare * clean up * remove string format * add credits * fix format * use prepare.sh * graal 21.0.2 * fix differences * clean up * underflow protection * improve segments generation logic * clean up * remove unnecessary alignment in findsegment * new try * fix number of segments
This commit is contained in:
parent
2aed039f17
commit
bec0cef2d3
20
calculate_average_dpsoft.sh
Executable file
20
calculate_average_dpsoft.sh
Executable file
@ -0,0 +1,20 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
JAVA_OPTS="--enable-preview -XX:+UnlockExperimentalVMOptions -XX:-EnableJVMCI -XX:+UseEpsilonGC -Xms128m -Xmx128m -XX:+AlwaysPreTouch -XX:+UseTransparentHugePages -XX:-TieredCompilation -XX:+TrustFinalNonStaticFields"
|
||||
|
||||
java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_dpsoft
|
20
prepare_dpsoft.sh
Executable file
20
prepare_dpsoft.sh
Executable file
@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Uncomment below to use sdk
|
||||
source "$HOME/.sdkman/bin/sdkman-init.sh"
|
||||
sdk use java 21.0.2-graal 1>&2
|
324
src/main/java/dev/morling/onebrc/CalculateAverage_dpsoft.java
Normal file
324
src/main/java/dev/morling/onebrc/CalculateAverage_dpsoft.java
Normal file
@ -0,0 +1,324 @@
|
||||
/*
|
||||
* Copyright 2023 The original authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package dev.morling.onebrc;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.MappedByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.Phaser;
|
||||
|
||||
public class CalculateAverage_dpsoft {
|
||||
private static final String FILE = "./measurements.txt";
|
||||
private static final int MAX_ROWS = 1 << 15;
|
||||
private static final int ROWS_MASK = MAX_ROWS - 1;
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
final var cpus = Runtime.getRuntime().availableProcessors();
|
||||
final var segments = getMemorySegments(cpus);
|
||||
final var tasks = new MeasurementExtractor[segments.size()];
|
||||
final var phaser = new Phaser(segments.size());
|
||||
|
||||
for (int i = 0; i < segments.size(); i++) {
|
||||
tasks[i] = new MeasurementExtractor(segments.get(i), phaser);
|
||||
}
|
||||
|
||||
phaser.awaitAdvance(phaser.getPhase());
|
||||
|
||||
final var allMeasurements = Arrays.stream(tasks)
|
||||
.parallel()
|
||||
.map(MeasurementExtractor::getMeasurements)
|
||||
.reduce(MeasurementMap::merge)
|
||||
.orElseThrow();
|
||||
|
||||
System.out.println(sortSequentially(allMeasurements));
|
||||
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
private static Map<String, Measurement> sortSequentially(MeasurementMap allMeasurements) {
|
||||
final Map<String, Measurement> sorted = new TreeMap<>();
|
||||
for (Measurement m : allMeasurements.measurements) {
|
||||
if (m != null) {
|
||||
sorted.put(new String(m.name, StandardCharsets.UTF_8), m);
|
||||
}
|
||||
}
|
||||
return sorted;
|
||||
}
|
||||
|
||||
// Inspired by @spullara
|
||||
private static List<FileSegment> getMemorySegments(int numberOfSegments) throws IOException {
|
||||
var file = new File(FILE);
|
||||
long fileSize = file.length();
|
||||
long segmentSize = fileSize / numberOfSegments;
|
||||
List<FileSegment> segments = new ArrayList<>(numberOfSegments);
|
||||
|
||||
if (fileSize < 1_000_000) {
|
||||
segments.add(new FileSegment(0, fileSize));
|
||||
return segments;
|
||||
}
|
||||
|
||||
while (segmentSize >= Integer.MAX_VALUE) {
|
||||
numberOfSegments += 1;
|
||||
segmentSize = fileSize / numberOfSegments;
|
||||
}
|
||||
|
||||
try (RandomAccessFile randomAccessFile = new RandomAccessFile(file, "r")) {
|
||||
for (int i = 0; i < numberOfSegments; i++) {
|
||||
long segStart = i * segmentSize;
|
||||
long segEnd = (i == numberOfSegments - 1) ? fileSize : segStart + segmentSize;
|
||||
segStart = findSegment(i, 0, randomAccessFile, segStart, segEnd);
|
||||
segEnd = findSegment(i, numberOfSegments - 1, randomAccessFile, segEnd, fileSize);
|
||||
|
||||
segments.add(new FileSegment(segStart, segEnd));
|
||||
}
|
||||
}
|
||||
return segments;
|
||||
}
|
||||
|
||||
private static long findSegment(int i, int skipSegment, RandomAccessFile raf, long location, long fileSize) throws IOException {
|
||||
if (i != skipSegment) {
|
||||
raf.seek(location);
|
||||
while (location < fileSize) {
|
||||
location++;
|
||||
if (raf.read() == '\n')
|
||||
break;
|
||||
}
|
||||
}
|
||||
return location;
|
||||
}
|
||||
|
||||
record FileSegment(long start, long end) {
|
||||
}
|
||||
|
||||
static final class MeasurementExtractor implements Runnable {
|
||||
private final FileSegment segment;
|
||||
private final Phaser phaser;
|
||||
private final MeasurementMap measurements = new MeasurementMap();
|
||||
|
||||
MeasurementExtractor(FileSegment memorySegment, Phaser phaser) {
|
||||
this.segment = memorySegment;
|
||||
this.phaser = phaser;
|
||||
(new Thread(this)).start();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
long segmentEnd = segment.end();
|
||||
try (var fileChannel = FileChannel.open(Path.of(FILE), StandardOpenOption.READ)) {
|
||||
var mbb = fileChannel.map(FileChannel.MapMode.READ_ONLY, segment.start(), segmentEnd - segment.start());
|
||||
mbb.order(ByteOrder.nativeOrder());
|
||||
|
||||
if (segment.start() > 0) {
|
||||
skipToFirstLine(mbb);
|
||||
}
|
||||
|
||||
while (mbb.remaining() > 0 && mbb.position() <= segmentEnd) {
|
||||
int pos = mbb.position();
|
||||
int nameHash = hashAndRewind(mbb);
|
||||
var m = measurements.getOrCompute(nameHash, mbb, pos);
|
||||
int temp = readTemperatureFromBuffer(mbb);
|
||||
|
||||
m.sample(temp);
|
||||
}
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException("Error reading file", e);
|
||||
}
|
||||
finally {
|
||||
phaser.arriveAndAwaitAdvance();
|
||||
}
|
||||
}
|
||||
|
||||
// inspired by @lawrey
|
||||
private static int hashAndRewind(MappedByteBuffer mbb) {
|
||||
int hash = 0;
|
||||
int idx = mbb.position();
|
||||
outer: while (true) {
|
||||
int name = mbb.getInt();
|
||||
for (int c = 0; c < 4; c++) {
|
||||
int b = (name >> (c << 3)) & 0xFF;
|
||||
if (b == ';') {
|
||||
idx += c + 1;
|
||||
break outer;
|
||||
}
|
||||
hash ^= b * 82805;
|
||||
}
|
||||
idx += 4;
|
||||
}
|
||||
|
||||
var rewind = mbb.position() - idx;
|
||||
mbb.position(mbb.position() - rewind);
|
||||
return hash;
|
||||
}
|
||||
|
||||
private static int readTemperatureFromBuffer(MappedByteBuffer mbb) {
|
||||
int temp = 0;
|
||||
boolean negative = false;
|
||||
|
||||
outer: while (mbb.remaining() > 0) {
|
||||
int b = mbb.get();
|
||||
switch (b) {
|
||||
case '-':
|
||||
negative = true;
|
||||
break;
|
||||
default:
|
||||
temp = 10 * temp + (b - '0');
|
||||
break;
|
||||
case '.':
|
||||
b = mbb.get();
|
||||
temp = 10 * temp + (b - '0');
|
||||
case '\r':
|
||||
mbb.get();
|
||||
case '\n':
|
||||
break outer;
|
||||
}
|
||||
}
|
||||
if (negative)
|
||||
temp = -temp;
|
||||
return temp;
|
||||
}
|
||||
|
||||
public MeasurementMap getMeasurements() {
|
||||
return measurements;
|
||||
}
|
||||
|
||||
// Skips to the first line in the buffer, used for chunk processing.
|
||||
private static void skipToFirstLine(MappedByteBuffer mbb) {
|
||||
while ((mbb.get() & 0xFF) >= ' ') {
|
||||
// Skip bytes until reaching the start of a line.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// credits to @shipilev
|
||||
static class MeasurementMap {
|
||||
private final Measurement[] measurements = new Measurement[MAX_ROWS];
|
||||
|
||||
public Measurement getOrCompute(int hash, MappedByteBuffer mbb, int position) {
|
||||
int index = hash & ROWS_MASK;
|
||||
var measurement = measurements[index];
|
||||
if (measurement != null && hash == measurement.nameHash && Measurement.equalsTo(measurement.name, mbb, position)) {
|
||||
return measurement;
|
||||
}
|
||||
else {
|
||||
return compute(hash, mbb, position);
|
||||
}
|
||||
}
|
||||
|
||||
private Measurement compute(int hash, MappedByteBuffer mbb, int position) {
|
||||
var index = hash & ROWS_MASK;
|
||||
Measurement m;
|
||||
|
||||
while (true) {
|
||||
m = measurements[index];
|
||||
if (m == null || (hash == m.nameHash && Measurement.equalsTo(m.name, mbb, position))) {
|
||||
break;
|
||||
}
|
||||
index = (index + 1) & ROWS_MASK;
|
||||
}
|
||||
|
||||
if (m == null) {
|
||||
int len = mbb.position() - position - 1;
|
||||
byte[] bytes = new byte[len];
|
||||
mbb.position(position);
|
||||
mbb.get(bytes, 0, len);
|
||||
mbb.get();
|
||||
measurements[index] = m = new Measurement(bytes, hash);
|
||||
}
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
public MeasurementMap merge(MeasurementMap otherMap) {
|
||||
for (Measurement other : otherMap.measurements) {
|
||||
if (other == null)
|
||||
continue;
|
||||
int index = other.nameHash & ROWS_MASK;
|
||||
while (true) {
|
||||
Measurement m = measurements[index];
|
||||
if (m == null) {
|
||||
measurements[index] = other;
|
||||
break;
|
||||
}
|
||||
else if (Arrays.equals(m.name, other.name)) {
|
||||
m.merge(other);
|
||||
break;
|
||||
}
|
||||
else {
|
||||
index = (index + 1) & ROWS_MASK;
|
||||
}
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
static final class Measurement {
|
||||
public final int nameHash;
|
||||
public final byte[] name;
|
||||
|
||||
public long sum;
|
||||
public int count = 0;
|
||||
public int min = Integer.MAX_VALUE;
|
||||
public int max = Integer.MIN_VALUE;
|
||||
|
||||
public Measurement(byte[] name, int nameHash) {
|
||||
this.name = name;
|
||||
this.nameHash = nameHash;
|
||||
}
|
||||
|
||||
public static boolean equalsTo(byte[] name, MappedByteBuffer mbb, int position) {
|
||||
int len = mbb.position() - position - 1;
|
||||
if (len != name.length)
|
||||
return false;
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (name[i] != mbb.get(position + i))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public void sample(int temp) {
|
||||
min = Math.min(min, temp);
|
||||
max = Math.max(max, temp);
|
||||
sum += temp;
|
||||
count++;
|
||||
}
|
||||
|
||||
public Measurement merge(Measurement m2) {
|
||||
min = Math.min(min, m2.min);
|
||||
max = Math.max(max, m2.max);
|
||||
sum += m2.sum;
|
||||
count += m2.count;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return round(((double) min) / 10.0) + "/" + round((((double) sum) / 10.0) / count) + "/" + round(((double) max) / 10.0);
|
||||
}
|
||||
|
||||
private static double round(double value) {
|
||||
return Math.round(value * 10.0) / 10.0;
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user