A SAFE and readable version (#388)
* A SAFE and readable version * Remove unused functions * Making it slower, removing custom hashMap
This commit is contained in:
parent
1fd4712ed3
commit
3c36b5b0a8
19
calculate_average_anitasv.sh
Executable file
19
calculate_average_anitasv.sh
Executable file
@ -0,0 +1,19 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
JAVA_OPTS="--enable-preview"
|
||||
java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_anitasv
|
19
prepare_anitasv.sh
Executable file
19
prepare_anitasv.sh
Executable file
@ -0,0 +1,19 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
source "$HOME/.sdkman/bin/sdkman-init.sh"
|
||||
sdk use java 21.0.1-graal 1>&2
|
215
src/main/java/dev/morling/onebrc/CalculateAverage_anitasv.java
Normal file
215
src/main/java/dev/morling/onebrc/CalculateAverage_anitasv.java
Normal file
@ -0,0 +1,215 @@
|
||||
/*
|
||||
* Copyright 2023 The original authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package dev.morling.onebrc;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.lang.foreign.Arena;
|
||||
import java.lang.foreign.MemorySegment;
|
||||
import java.lang.foreign.ValueLayout;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.IntStream;
|
||||
|
||||
public class CalculateAverage_anitasv {
|
||||
private static final String FILE = "./measurements.txt";
|
||||
|
||||
private record Shard(MemorySegment mmapMemory,
|
||||
long chunkStart, long chunkEnd) {
|
||||
|
||||
byte getByte(long address) {
|
||||
return mmapMemory.get(ValueLayout.JAVA_BYTE, address);
|
||||
}
|
||||
|
||||
long indexOf(long position, byte ch) {
|
||||
ByteBuffer buf = mmapMemory.asSlice(position,
|
||||
Math.min(128, mmapMemory.byteSize() - position))
|
||||
.asByteBuffer();
|
||||
while (buf.hasRemaining()) {
|
||||
if (buf.get() == ch) {
|
||||
return position + buf.position() - 1;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
byte[] getRange(long start, long end) {
|
||||
return mmapMemory.asSlice(start, end - start).toArray(ValueLayout.JAVA_BYTE);
|
||||
}
|
||||
|
||||
int parseDouble(long start, long end) {
|
||||
int normalized = 0;
|
||||
boolean sign = true;
|
||||
long index = start;
|
||||
if (getByte(index) == '-') {
|
||||
index++;
|
||||
sign = false;
|
||||
}
|
||||
boolean hasDot = false;
|
||||
for (; index < end; index++) {
|
||||
byte ch = getByte(index);
|
||||
if (ch != '.') {
|
||||
normalized = normalized * 10 + (ch - '0');
|
||||
} else {
|
||||
hasDot = true;
|
||||
}
|
||||
}
|
||||
if (!hasDot) {
|
||||
normalized *= 10;
|
||||
}
|
||||
if (!sign) {
|
||||
normalized = -normalized;
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
public int computeHash(long position, long stationEnd) {
|
||||
ByteBuffer buf2 = mmapMemory.asSlice(position, stationEnd - position)
|
||||
.asByteBuffer();
|
||||
return buf2.hashCode();
|
||||
}
|
||||
|
||||
public boolean matches(byte[] existingStation, long start, long end) {
|
||||
ByteBuffer buf1 = ByteBuffer.wrap(existingStation);
|
||||
ByteBuffer buf2 = mmapMemory.asSlice(start, end - start).asByteBuffer();
|
||||
return buf1.equals(buf2);
|
||||
}
|
||||
}
|
||||
|
||||
private record ResultRow(byte[] station, IntSummaryStatistics statistics) {
|
||||
|
||||
public String toString() {
|
||||
return STR."\{new String(station, StandardCharsets.UTF_8)} : \{statToString(statistics)}";
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<String, IntSummaryStatistics> process(Shard shard) {
|
||||
HashMap<Integer, List<ResultRow>> result = new HashMap<>(1 << 14);
|
||||
|
||||
boolean skip = shard.chunkStart != 0;
|
||||
for (long position = shard.chunkStart; position < shard.chunkEnd; position++) {
|
||||
if (skip) {
|
||||
position = shard.indexOf(position, (byte) '\n');
|
||||
skip = false;
|
||||
}
|
||||
else {
|
||||
long stationEnd = shard.indexOf(position, (byte) ';');
|
||||
int hash = shard.computeHash(position, stationEnd);
|
||||
|
||||
long temperatureEnd = shard.indexOf(stationEnd + 1, (byte) '\n');
|
||||
int temperature = shard.parseDouble(stationEnd + 1, temperatureEnd);
|
||||
|
||||
List<ResultRow> collisions = result.get(hash);
|
||||
if (collisions == null) {
|
||||
collisions = new ArrayList<>();
|
||||
result.put(hash, collisions);
|
||||
}
|
||||
|
||||
boolean found = false;
|
||||
for (ResultRow existing : collisions) {
|
||||
byte[] existingStation = existing.station();
|
||||
if (shard.matches(existingStation, position, stationEnd)) {
|
||||
existing.statistics.accept(temperature);
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
IntSummaryStatistics stats = new IntSummaryStatistics();
|
||||
stats.accept(temperature);
|
||||
ResultRow rr = new ResultRow(shard.getRange(position, stationEnd), stats);
|
||||
collisions.add(rr);
|
||||
}
|
||||
position = temperatureEnd;
|
||||
}
|
||||
}
|
||||
|
||||
return result.values()
|
||||
.stream()
|
||||
.flatMap(Collection::stream)
|
||||
.map(rr -> new AbstractMap.SimpleImmutableEntry<>(
|
||||
new String(rr.station, StandardCharsets.UTF_8),
|
||||
rr.statistics))
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
|
||||
}
|
||||
|
||||
private static Map<String, IntSummaryStatistics> combineResults(List<Map<String, IntSummaryStatistics>> list) {
|
||||
|
||||
Map<String, IntSummaryStatistics> output = HashMap.newHashMap(1024);
|
||||
for (Map<String, IntSummaryStatistics> map : list) {
|
||||
for (Map.Entry<String, IntSummaryStatistics> entry : map.entrySet()) {
|
||||
output.compute(entry.getKey(), (ignore, val) -> {
|
||||
if (val == null) {
|
||||
return entry.getValue();
|
||||
}
|
||||
else {
|
||||
val.combine(entry.getValue());
|
||||
return val;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
private static Map<String, IntSummaryStatistics> master(MemorySegment mmapMemory) {
|
||||
long totalBytes = mmapMemory.byteSize();
|
||||
int numWorkers = Runtime.getRuntime().availableProcessors();
|
||||
long chunkSize = Math.ceilDiv(totalBytes, numWorkers);
|
||||
return combineResults(IntStream.range(0, numWorkers)
|
||||
.parallel()
|
||||
.mapToObj(workerId -> {
|
||||
long chunkStart = workerId * chunkSize;
|
||||
long chunkEnd = Math.min(chunkStart + chunkSize + 1, totalBytes);
|
||||
return new Shard(mmapMemory, chunkStart, chunkEnd);
|
||||
})
|
||||
.map(CalculateAverage_anitasv::process)
|
||||
.toList());
|
||||
}
|
||||
|
||||
public static Map<String, IntSummaryStatistics> start() throws IOException {
|
||||
try (FileChannel fileChannel = FileChannel.open(Path.of(FILE),
|
||||
StandardOpenOption.READ)) {
|
||||
long fileSize = fileChannel.size();
|
||||
MemorySegment mmapMemory = fileChannel.map(
|
||||
FileChannel.MapMode.READ_ONLY,
|
||||
0, fileSize, Arena.global());
|
||||
return master(mmapMemory);
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<String, String> toPrintMap(Map<String, IntSummaryStatistics> output) {
|
||||
Map<String, String> outputStr = new TreeMap<>();
|
||||
for (Map.Entry<String, IntSummaryStatistics> entry : output.entrySet()) {
|
||||
IntSummaryStatistics stat = entry.getValue();
|
||||
outputStr.put(entry.getKey(), statToString(stat));
|
||||
}
|
||||
return outputStr;
|
||||
}
|
||||
|
||||
private static String statToString(IntSummaryStatistics stat) {
|
||||
return STR."\{stat.getMin() / 10.0}/\{Math.round(stat.getAverage()) / 10.0}/\{stat.getMax() / 10.0}";
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
System.out.println(toPrintMap(start()));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user