Initial implementation (#158)
* Initial implementation * Make executable * Deal with collisions (but very slow) * Formatting * Remove superfluous time * Formatting * Fixes * Remove hard coded correction line * Integer parsing speedup
This commit is contained in:
parent
d4fdcc5408
commit
1589210038
20
calculate_average_berry120.sh
Executable file
20
calculate_average_berry120.sh
Executable file
@ -0,0 +1,20 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
#sdk use java 21.0.1-amzn
|
||||
JAVA_OPTS="-Xlog:gc=error --enable-preview --add-modules=jdk.incubator.vector"
|
||||
time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_berry120
|
268
src/main/java/dev/morling/onebrc/CalculateAverage_berry120.java
Normal file
268
src/main/java/dev/morling/onebrc/CalculateAverage_berry120.java
Normal file
@ -0,0 +1,268 @@
|
||||
/*
|
||||
* Copyright 2023 The original authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package dev.morling.onebrc;
|
||||
|
||||
import java.io.RandomAccessFile;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.TreeSet;
|
||||
|
||||
public class CalculateAverage_berry120 {
|
||||
|
||||
private static final String FILE = "./measurements.txt";
|
||||
// TODO: Tweak this number?
|
||||
public static final int NUM_VIRTUAL_THREADS = 1000;
|
||||
public static final boolean DEBUG = false;
|
||||
|
||||
static class TemperatureSummary implements Comparable<TemperatureSummary> {
|
||||
byte[] name;
|
||||
int min;
|
||||
int max;
|
||||
int total;
|
||||
int sampleCount;
|
||||
|
||||
public TemperatureSummary(byte[] name, int min, int max, int total, int sampleCount) {
|
||||
this.name = name;
|
||||
this.min = min;
|
||||
this.max = max;
|
||||
this.total = total;
|
||||
this.sampleCount = sampleCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(TemperatureSummary o) {
|
||||
return new String(name).compareTo(new String(o.name));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TemperatureSummary{" +
|
||||
"name=" + new String(name) +
|
||||
", min=" + min +
|
||||
", max=" + max +
|
||||
", total=" + total +
|
||||
", sampleCount=" + sampleCount +
|
||||
'}';
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
long time = System.currentTimeMillis();
|
||||
|
||||
Path path = Path.of(FILE);
|
||||
RandomAccessFile file = new RandomAccessFile(path.toFile(), "r");
|
||||
FileChannel channel = file.getChannel();
|
||||
long size = Files.size(path);
|
||||
int splitSize = size < 10_000_000 ? 1 : (NUM_VIRTUAL_THREADS - 1);
|
||||
long inc = (int) (size / splitSize);
|
||||
|
||||
List<Long> positions = new ArrayList<>();
|
||||
positions.add(0L);
|
||||
|
||||
MemorySegment segment = channel.map(FileChannel.MapMode.READ_ONLY, 0, Files.size(path), Arena.ofShared());
|
||||
|
||||
long pos = 0;
|
||||
for (int i = 0; i < splitSize; i++) {
|
||||
long endPos = pos + inc - 1;
|
||||
while (segment.get(ValueLayout.JAVA_BYTE, endPos) != '\n') {
|
||||
endPos--;
|
||||
}
|
||||
pos = endPos + 1;
|
||||
positions.add(pos);
|
||||
}
|
||||
positions.add(size);
|
||||
|
||||
if (DEBUG)
|
||||
System.out.println("WORKED OUT SPLITS: " + (System.currentTimeMillis() - time));
|
||||
|
||||
List<Thread> threads = new ArrayList<>(NUM_VIRTUAL_THREADS);
|
||||
|
||||
List<Map<?, TemperatureSummary>> maps = Collections.synchronizedList(new ArrayList<>());
|
||||
|
||||
for (int split = 0; split < positions.size() - 1; split++) {
|
||||
|
||||
long position = positions.get(split);
|
||||
long positionEnd = positions.get(split + 1);
|
||||
|
||||
threads.add(Thread.ofVirtual().start(() -> {
|
||||
|
||||
// TODO: Custom faster map?
|
||||
Map<Integer, TemperatureSummary> map = new HashMap<>();
|
||||
maps.add(map);
|
||||
|
||||
// Care much less about this map, only used if collisions in the first
|
||||
Map<String, TemperatureSummary> backupMap = new HashMap<>();
|
||||
maps.add(backupMap);
|
||||
|
||||
boolean processingPlaceName = true;
|
||||
|
||||
byte[] placeName = new byte[100];
|
||||
int placeNameIdx = 0;
|
||||
|
||||
byte[] digits = new byte[100];
|
||||
int digitIdx = 0;
|
||||
|
||||
for (long address = position; address < positionEnd; address++) {
|
||||
byte b = segment.get(ValueLayout.JAVA_BYTE, address);
|
||||
|
||||
if (b == 10) {
|
||||
int rollingHash = 5381;
|
||||
for (int i = 0; i < placeNameIdx; i++) {
|
||||
rollingHash = (((rollingHash << 5) + rollingHash) + placeName[i]) & 0xFFFFF;
|
||||
}
|
||||
|
||||
var existingTemperatureSummary = map.get(rollingHash);
|
||||
int num = parse(digits, digitIdx - 1);
|
||||
|
||||
if (existingTemperatureSummary == null) {
|
||||
byte[] thisPlace = new byte[placeNameIdx];
|
||||
System.arraycopy(placeName, 0, thisPlace, 0, placeNameIdx);
|
||||
map.put(rollingHash, new TemperatureSummary(thisPlace, num, num, num, 1));
|
||||
}
|
||||
else if (!Arrays.equals(placeName, 0, placeNameIdx, existingTemperatureSummary.name, 0, existingTemperatureSummary.name.length)) {
|
||||
|
||||
/*
|
||||
* This block will be slow - don't really care, should be very rare
|
||||
*/
|
||||
if (DEBUG)
|
||||
System.out.println("BAD: COLLISION!");
|
||||
byte[] thisPlace = new byte[placeNameIdx];
|
||||
System.arraycopy(placeName, 0, thisPlace, 0, placeNameIdx);
|
||||
String backupKey = new String(thisPlace);
|
||||
var backupExistingTemperatureSummary = backupMap.get(backupKey);
|
||||
|
||||
if (backupExistingTemperatureSummary == null) {
|
||||
backupMap.put(backupKey, new TemperatureSummary(thisPlace, num, num, num, 1));
|
||||
}
|
||||
else {
|
||||
backupExistingTemperatureSummary.max = (Math.max(num, backupExistingTemperatureSummary.max));
|
||||
backupExistingTemperatureSummary.min = (Math.min(num, backupExistingTemperatureSummary.min));
|
||||
backupExistingTemperatureSummary.total += num;
|
||||
backupExistingTemperatureSummary.sampleCount++;
|
||||
}
|
||||
/*
|
||||
* End slow block
|
||||
*/
|
||||
}
|
||||
else {
|
||||
|
||||
existingTemperatureSummary.max = (Math.max(num, existingTemperatureSummary.max));
|
||||
existingTemperatureSummary.min = (Math.min(num, existingTemperatureSummary.min));
|
||||
existingTemperatureSummary.total += num;
|
||||
existingTemperatureSummary.sampleCount++;
|
||||
}
|
||||
|
||||
processingPlaceName = true;
|
||||
placeNameIdx = 0;
|
||||
digitIdx = 0;
|
||||
}
|
||||
else if (b == ';') {
|
||||
processingPlaceName = false;
|
||||
}
|
||||
else if (processingPlaceName) {
|
||||
placeName[placeNameIdx++] = b;
|
||||
}
|
||||
else {
|
||||
digits[digitIdx++] = b;
|
||||
}
|
||||
}
|
||||
}));
|
||||
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
System.out.println("STARTED THREADS: " + (System.currentTimeMillis() - time));
|
||||
}
|
||||
|
||||
for (Thread thread : threads) {
|
||||
thread.join();
|
||||
}
|
||||
|
||||
TreeMap<String, TemperatureSummary> mergedMap = new TreeMap<>();
|
||||
|
||||
for (var map : maps) {
|
||||
for (TemperatureSummary t1 : map.values()) {
|
||||
if (t1 == null)
|
||||
continue;
|
||||
|
||||
var t2 = mergedMap.get(new String(t1.name));
|
||||
|
||||
if (t2 == null) {
|
||||
mergedMap.put(new String(t1.name), t1);
|
||||
}
|
||||
else {
|
||||
var merged = new TemperatureSummary(t1.name, Math.min(t1.min, t2.min), Math.max(t1.max, t2.max), t1.total + t2.total,
|
||||
t1.sampleCount + t2.sampleCount);
|
||||
mergedMap.put(new String(t1.name), merged);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
boolean first = true;
|
||||
StringBuilder output = new StringBuilder(16_000);
|
||||
output.append("{");
|
||||
for (var value : new TreeSet<>(mergedMap.values())) {
|
||||
if (first) {
|
||||
first = false;
|
||||
}
|
||||
else {
|
||||
output.append(", ");
|
||||
}
|
||||
output.append(new String(value.name)).append("=").append((double) value.min / 10).append("/")
|
||||
.append(String.format("%.1f", ((double) value.total / value.sampleCount / 10))).append("/").append((double) value.max / 10);
|
||||
}
|
||||
output.append("}");
|
||||
|
||||
System.out.println(output);
|
||||
// if (DEBUG)
|
||||
// System.out.println("CORRECT: " + output.toString().equals(CORRECT));
|
||||
|
||||
if (DEBUG)
|
||||
System.out.println("TOTAL TIME: " + (System.currentTimeMillis() - time));
|
||||
|
||||
}
|
||||
|
||||
private static int parse(byte[] arr, int len) {
|
||||
// TODO: SIMD?
|
||||
int num = 0;
|
||||
for (int mI = len, m = 1; mI >= 0; mI--) {
|
||||
byte d = arr[mI];
|
||||
if (d == '.') {
|
||||
}
|
||||
else if (d == '-') {
|
||||
num = -num;
|
||||
m *= 10;
|
||||
}
|
||||
else {
|
||||
num += (d & 0xF) * m;
|
||||
m *= 10;
|
||||
}
|
||||
}
|
||||
return num;
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user