First version - roman_r_m (#193)

* initial commit

* - use loop
- use mutable object to store results

* get rid of regex

* Do not allocate measurement objects

* MMap + custom double parsing ~ 1:30 (down from ~ 2:05)

* HashMap for accumulation and only sort at the end - 1:05

* MMap the whole file

* Use graal

* no GC

* Store results in an array list to avoid double map lookup

* Adjust max buf size

* Manual parsing number to long

* Add --enable-preview

* remove buffer size check (has no effect on performance)

* fix min & max initialization

* do not check for \r

* Revert "do not check for \r"

This reverts commit 9da1f574bf6261ea49c353488d3b4673cad3ce6e.

* Optimise parsing. Now completes in 31 sec down from ~43

* trying to parse numbers faster

* use open address hash table instead of the standard HashMap

* formatting

* Rename the script to match github username (change underscores to slashes)
Enable transparent huge pages, seems to improve by ~2 sec

* Revert "formatting"

This reverts commit 4e90797d2a729ed7385c9000c85cc7e87d935f96.

* Revert "use open address hash table instead of the standard HashMap"

This reverts commit c784b55f61e48f548b2623e5c8958c9b283cae14.

* add prepare_roman-r-m.sh

* SWAR tricks to find semicolon (-2 seconds ro run time)

* remove time call

* fix test

* Parallel version (~6.5 seconds)
This commit is contained in:
Roman Musin 2024-01-11 11:29:08 +00:00 committed by GitHub
parent 52c490cc24
commit 4b3f959812
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 318 additions and 0 deletions

19
calculate_average_roman-r-m.sh Executable file
View File

@ -0,0 +1,19 @@
#!/bin/sh
#
# Copyright 2023 The original authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
JAVA_OPTS="--enable-preview -XX:+UseTransparentHugePages"
java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_roman_r_m

19
prepare_roman-r-m.sh Executable file
View File

@ -0,0 +1,19 @@
#!/bin/sh
#
# Copyright 2023 The original authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
source "$HOME/.sdkman/bin/sdkman-init.sh"
sdk use java 21.0.1-graal 1>&2

View File

@ -0,0 +1,280 @@
/*
* Copyright 2023 The original authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dev.morling.onebrc;
import java.io.File;
import java.io.IOException;
import java.lang.foreign.Arena;
import java.lang.foreign.MemorySegment;
import java.lang.foreign.ValueLayout;
import java.nio.channels.FileChannel;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
import java.util.stream.IntStream;
public class CalculateAverage_roman_r_m {
public static final int DOT_3_RD_BYTE_MASK = (byte) '.' << 16;
private static final String FILE = "./measurements.txt";
private static MemorySegment ms;
// based on http://0x80.pl/notesen/2023-03-06-swar-find-any.html
static long hasZeroByte(long l) {
return ((l - 0x0101010101010101L) & ~(l) & 0x8080808080808080L);
}
static long firstSetByteIndex(long l) {
return ((((l - 1) & 0x101010101010101L) * 0x101010101010101L) >> 56) - 1;
}
static long broadcast(byte b) {
return 0x101010101010101L * b;
}
static long SEMICOLON_MASK = broadcast((byte) ';');
static long LINE_END_MASK = broadcast((byte) '\n');
static long find(long l, long mask) {
long xor = l ^ mask;
long match = hasZeroByte(xor);
return match != 0 ? firstSetByteIndex(match) : -1;
}
static long nextNewline(long from) {
long start = from;
long i;
long next = ms.get(ValueLayout.JAVA_LONG_UNALIGNED, start);
while ((i = find(next, LINE_END_MASK)) < 0) {
start += 8;
next = ms.get(ValueLayout.JAVA_LONG_UNALIGNED, start);
}
return start + i;
}
public static void main(String[] args) throws IOException {
long fileSize = new File(FILE).length();
var channel = FileChannel.open(Paths.get(FILE));
ms = channel.map(FileChannel.MapMode.READ_ONLY, 0, fileSize, Arena.ofAuto());
int numThreads = fileSize > Integer.MAX_VALUE ? Runtime.getRuntime().availableProcessors() : 1;
long chunk = fileSize / numThreads;
var result = IntStream.range(0, numThreads)
.parallel()
.mapToObj(i -> {
boolean lastChunk = i == numThreads - 1;
long chunkStart = i == 0 ? 0 : nextNewline(i * chunk) + 1;
long chunkEnd = lastChunk ? fileSize : nextNewline((i + 1) * chunk);
var resultStore = new ResultStore();
var station = new ByteString();
long offset = chunkStart;
while (offset < chunkEnd) {
long start = offset;
long pos;
if (!lastChunk || chunkEnd - offset >= 8) {
long next = ms.get(ValueLayout.JAVA_LONG_UNALIGNED, offset);
while ((pos = find(next, SEMICOLON_MASK)) < 0) {
offset += 8;
if (!lastChunk || fileSize - offset >= 8) {
next = ms.get(ValueLayout.JAVA_LONG_UNALIGNED, offset);
}
else {
while (ms.get(ValueLayout.JAVA_BYTE, offset + pos) != ';') {
pos++;
}
break;
}
}
}
else {
pos = 0;
while (ms.get(ValueLayout.JAVA_BYTE, offset + pos) != ';') {
pos++;
}
}
offset += pos;
int len = (int) (offset - start);
// TODO can we not copy and use a reference into the memory segment to perform table lookup?
MemorySegment.copy(ms, ValueLayout.JAVA_BYTE, start, station.buf, 0, len);
station.len = len;
station.hash = 0;
offset++;
long val;
boolean neg;
if (!lastChunk || fileSize - offset >= 8) {
long encodedVal = ms.get(ValueLayout.JAVA_LONG_UNALIGNED, offset);
neg = (encodedVal & (byte) '-') == (byte) '-';
if (neg) {
encodedVal >>= 8;
offset++;
}
if ((encodedVal & DOT_3_RD_BYTE_MASK) == DOT_3_RD_BYTE_MASK) {
val = (encodedVal & 0xFF - 0x30) * 100 + (encodedVal >> 8 & 0xFF - 0x30) * 10 + (encodedVal >> 24 & 0xFF - 0x30);
offset += 5;
}
else {
// based on http://0x80.pl/articles/simd-parsing-int-sequences.html#parsing-and-conversion-of-signed-numbers
val = Long.compress(encodedVal, 0xFF00FFL) - 0x303030;
val = ((val * 2561) >> 8) & 0xff;
offset += 4;
}
}
else {
neg = ms.get(ValueLayout.JAVA_BYTE, offset) == '-';
if (neg) {
offset++;
}
val = ms.get(ValueLayout.JAVA_BYTE, offset++) - '0';
byte b;
while ((b = ms.get(ValueLayout.JAVA_BYTE, offset++)) != '.') {
val = val * 10 + (b - '0');
}
b = ms.get(ValueLayout.JAVA_BYTE, offset);
val = val * 10 + (b - '0');
offset += 2;
}
if (neg) {
val = -val;
}
var a = resultStore.get(station);
a.min = Math.min(a.min, val);
a.max = Math.max(a.max, val);
a.sum += val;
a.count++;
}
return resultStore.toMap();
}).reduce((m1, m2) -> {
m2.forEach((k, v) -> m1.merge(k, v, ResultRow::merge));
return m1;
});
System.out.println(result.get());
}
static final class ByteString {
private byte[] buf = new byte[100];
private int len = 0;
private int hash = 0;
@Override
public String toString() {
return new String(buf, 0, len);
}
public ByteString copy() {
var copy = new ByteString();
copy.len = this.len;
copy.hash = this.hash;
if (copy.buf.length < this.buf.length) {
copy.buf = new byte[this.buf.length];
}
System.arraycopy(this.buf, 0, copy.buf, 0, this.len);
return copy;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (o == null || getClass() != o.getClass())
return false;
ByteString that = (ByteString) o;
if (len != that.len)
return false;
// TODO use Vector
for (int i = 0; i < len; i++) {
if (buf[i] != that.buf[i]) {
return false;
}
}
return true;
}
@Override
public int hashCode() {
if (hash == 0) {
for (int i = 0; i < len; i++) {
hash = 31 * hash + (buf[i] & 255);
}
}
return hash;
}
}
private static final class ResultRow {
long min = 1000;
long sum = 0;
long max = -1000;
int count = 0;
public String toString() {
return round(min / 10.0) + "/" + round(sum / 10.0 / count) + "/" + round(max / 10.0);
}
private double round(double value) {
return Math.round(value * 10.0) / 10.0;
}
public ResultRow merge(ResultRow other) {
this.min = Math.min(this.min, other.min);
this.max = Math.max(this.max, other.max);
this.sum += other.sum;
this.count += other.count;
return this;
}
}
static class ResultStore {
private final ArrayList<ResultRow> results = new ArrayList<>(10000);
private final Map<ByteString, Integer> indices = new HashMap<>(10000);
ResultRow get(ByteString s) {
var idx = indices.get(s);
if (idx != null) {
return results.get(idx);
}
else {
ResultRow next = new ResultRow();
results.add(next);
indices.put(s.copy(), results.size() - 1);
return next;
}
}
TreeMap<String, ResultRow> toMap() {
var result = new TreeMap<String, ResultRow>();
indices.forEach((name, idx) -> result.put(name.toString(), results.get(idx)));
return result;
}
}
}