martin2038: first submission (#665)
* first double as int * - hashcode * JAVA_OPTS empty * native * native * CalculateAverage_melgenek https://questdb.io/blog/building-faster-hash-table-high-performance-sql-joins/#fastmap-internals * mvn formatting * jvm model * 10k name * 10k name * round mean * limit ChunkSize smaller than Integer.MAX_VALUE --------- Co-authored-by: martin.cong <martin.cong@zhulinkeji.com>
This commit is contained in:
parent
ba20cd8439
commit
f02279df8c
30
calculate_average_martin2038.sh
Executable file
30
calculate_average_martin2038.sh
Executable file
@ -0,0 +1,30 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
|
||||
if [ -f target/CalculateAverage_martin2038_image ]; then
|
||||
echo "Picking up existing native image 'target/CalculateAverage_martin2038_image', delete the file to select JVM mode." 1>&2
|
||||
target/CalculateAverage_martin2038_image
|
||||
else
|
||||
|
||||
#JAVA_OPTS="--enable-preview"
|
||||
echo "Chosing to run the app in JVM mode as no native image was found, use prepare_martin2038.sh to generate." 1>&2
|
||||
# JAVA_OPTS="-XX:-EnableJVMCI -Xms16g -Xmx16g -XX:+AlwaysPreTouch -XX:+UnlockExperimentalVMOptions -XX:+UseEpsilonGC"
|
||||
JAVA_OPTS=""
|
||||
java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_martin2038
|
||||
|
||||
fi
|
26
prepare_martin2038.sh
Executable file
26
prepare_martin2038.sh
Executable file
@ -0,0 +1,26 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Copyright 2023 The original authors
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
# Uncomment below to use sdk
|
||||
source "$HOME/.sdkman/bin/sdkman-init.sh"
|
||||
sdk use java 21.0.2-graal 1>&2
|
||||
##
|
||||
#if [ ! -f target/CalculateAverage_martin2038 ]; then
|
||||
# MAIN=dev.morling.onebrc.CalculateAverage_martin2038
|
||||
# NATIVE_IMAGE_OPTS="-H:+UnlockExperimentalVMOptions --initialize-at-build-time=$MAIN --gc=epsilon -O3 -march=native -R:MaxHeapSize=515m -H:-GenLoopSafepoints -H:InlineAllBonus=10 -H:-ParseRuntimeOptions"
|
||||
# native-image $NATIVE_IMAGE_OPTS -cp target/average-1.0.0-SNAPSHOT.jar -o target/CalculateAverage_martin2038_image $MAIN
|
||||
#fi
|
@ -0,0 +1,337 @@
|
||||
/*
|
||||
* Copyright 2023 The original authors
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package dev.morling.onebrc;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.lang.invoke.VarHandle;
|
||||
import java.nio.ByteOrder;
|
||||
import java.nio.MappedByteBuffer;
|
||||
import java.nio.channels.FileChannel.MapMode;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
public class CalculateAverage_martin2038 {
|
||||
|
||||
// private static final String FILE = "/Users/martin/Garden/blog/1BRC/1brc/./measurements.txt";
|
||||
|
||||
private static final String FILE = "./measurements.txt";
|
||||
|
||||
private static class MeasurementAggregator {
|
||||
private int min = Integer.MAX_VALUE;
|
||||
private int max = Integer.MIN_VALUE;
|
||||
private long sum;
|
||||
private int count;
|
||||
|
||||
void update(int temp) {
|
||||
update(1, temp, temp, temp);
|
||||
}
|
||||
|
||||
void update(int cnt, long sm, int min, int max) {
|
||||
sum += sm;
|
||||
count += cnt;
|
||||
if (this.min > min) {
|
||||
this.min = min;
|
||||
}
|
||||
if (this.max < max) {
|
||||
this.max = max;
|
||||
}
|
||||
}
|
||||
|
||||
void merge(MeasurementAggregator it) {
|
||||
update(it.count, it.sum, it.min, it.max);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
var mean = this.sum / 10.0 / this.count;
|
||||
return (min / 10f) + "/" + Math.round(mean * 10) / 10f + "/" + (max / 10f);
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
|
||||
var file = new RandomAccessFile(FILE, "r");
|
||||
final int maxNameLength = 110;
|
||||
var fc = file.getChannel();
|
||||
split(file).stream().parallel().map(ck -> {
|
||||
// StrFastHashKey 比string快500ms
|
||||
var map = new HashMap<StrFastHashKey, MeasurementAggregator>(200);
|
||||
// var pb = System.currentTimeMillis();
|
||||
try {
|
||||
var mb = fc.map(MapMode.READ_ONLY, ck.start, ck.length);
|
||||
var buff = new byte[maxNameLength];
|
||||
while (mb.hasRemaining()) {
|
||||
var name = readNextHashKey(buff, mb);
|
||||
// var name = readNextString(buff, mb);// .intern();
|
||||
var temp = readNextInt10Times(buff, mb);
|
||||
add2map(map, name, temp);
|
||||
}
|
||||
// long end = ck.start + ck.length;
|
||||
// do {
|
||||
// var name = readNext(file, ';', 30).intern();
|
||||
// var temp = Double.parseDouble(readNext(file, '\n', 6));
|
||||
// var agg = map.computeIfAbsent(name,it->new MeasurementAggregator());
|
||||
// agg.update(temp);
|
||||
// }while (file.getFilePointer()<end);
|
||||
}
|
||||
catch (IOException | NumberFormatException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
// System.out.println("chunk end , cost : " + (System.currentTimeMillis() - pb));
|
||||
return map;
|
||||
}).reduce(CalculateAverage_martin2038::reduceMap).ifPresent(map -> {
|
||||
|
||||
var sb = new StringBuilder(map.size() * 100);
|
||||
sb.append('{');
|
||||
map.entrySet().stream().sorted(Map.Entry.comparingByKey())
|
||||
.forEachOrdered(kv -> sb.append(kv.getKey()).append('=').append(kv.getValue()).append(", "));
|
||||
sb.deleteCharAt(sb.length() - 1);
|
||||
sb.setCharAt(sb.length() - 1, '}');
|
||||
var resultStr = sb.toString();
|
||||
System.out.println(resultStr);
|
||||
// System.out.println(resultStr.hashCode());
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
static <Key> HashMap<Key, MeasurementAggregator> reduceMap(HashMap<Key, MeasurementAggregator> aMap, HashMap<Key, MeasurementAggregator> bMap) {
|
||||
aMap.forEach((k, v) -> {
|
||||
var b = bMap.get(k);
|
||||
if (null == b) {
|
||||
bMap.put(k, v);
|
||||
}
|
||||
else {
|
||||
b.merge(v);
|
||||
}
|
||||
});
|
||||
return bMap;
|
||||
}
|
||||
|
||||
static <Key> void add2map(Map<Key, MeasurementAggregator> map, Key name, int temp) {
|
||||
// 比computeIfAbsent 节约1秒
|
||||
var agg = map.get(name);
|
||||
if (null == agg) {
|
||||
agg = new MeasurementAggregator();
|
||||
map.put(name, agg);
|
||||
}
|
||||
// var agg = map.computeIfAbsent(name,it->new MeasurementAggregator());
|
||||
agg.update(temp);
|
||||
}
|
||||
|
||||
record FileChunk(long start, long length) {
|
||||
}
|
||||
|
||||
static List<FileChunk> split(RandomAccessFile file) throws IOException {
|
||||
long total = file.length();
|
||||
var threadNum = Math.max((int) (total / Integer.MAX_VALUE + 1), Runtime.getRuntime().availableProcessors());
|
||||
long avgChunkSize = total / threadNum;
|
||||
// System.out.println(avgChunkSize +" \t avgChunkSize : INT/MAX \t"+Integer.MAX_VALUE);
|
||||
// Exception in thread "main" java.lang.IllegalArgumentException: Size exceeds Integer.MAX_VALUE
|
||||
// at java.base/sun.nio.ch.FileChannelImpl.map(FileChannelImpl.java:1183)
|
||||
long lastStart = 0;
|
||||
var list = new ArrayList<FileChunk>(threadNum);
|
||||
for (var i = 0; i < threadNum - 1; i++) {
|
||||
var length = avgChunkSize;
|
||||
file.seek(lastStart + length);
|
||||
while (file.readByte() != '\n') {
|
||||
// file.seek(lastStart+ ++length);
|
||||
++length;
|
||||
}
|
||||
// include the '\n'
|
||||
length++;
|
||||
list.add(new FileChunk(lastStart, length));
|
||||
lastStart += length;
|
||||
if (lastStart >= total) {
|
||||
return list;
|
||||
}
|
||||
}
|
||||
list.add(new FileChunk(lastStart, total - lastStart));
|
||||
return list;
|
||||
}
|
||||
|
||||
static StrFastHashKey readNextHashKey(byte[] buf, MappedByteBuffer mb) {
|
||||
int i = 1;
|
||||
mb.get(buf, 0, i);
|
||||
byte b;
|
||||
while ((b = mb.get()) != ';') {
|
||||
buf[i++] = b;
|
||||
}
|
||||
return new StrFastHashKey(buf, i);
|
||||
}
|
||||
|
||||
static String readNextString(byte[] buf, MappedByteBuffer mb) {
|
||||
int i = 1;
|
||||
mb.get(buf, 0, i);
|
||||
byte b;
|
||||
while ((b = mb.get()) != ';') {
|
||||
buf[i++] = b;
|
||||
}
|
||||
return new String(buf, 0, i);
|
||||
}
|
||||
|
||||
// copy from CalculateAverage_3j5a
|
||||
// 替换 Double.parse
|
||||
// 时间 38秒 -> 5418 ms
|
||||
static int readNextInt10Times(byte[] buf, MappedByteBuffer mb) {
|
||||
final int min_number_len = 3;
|
||||
int i = min_number_len;
|
||||
mb.get(buf, 0, i);
|
||||
byte b;
|
||||
while ((b = mb.get()) != '\n') {
|
||||
buf[i++] = b;
|
||||
}
|
||||
// -3.2
|
||||
var zeroAscii = '0';
|
||||
int temperature = buf[--i] - zeroAscii;
|
||||
i--; // skipping dot
|
||||
var base = 10;
|
||||
while (i > 0) {
|
||||
b = buf[--i];
|
||||
if (b == '-') {
|
||||
temperature = -temperature;
|
||||
}
|
||||
else {
|
||||
temperature = base * (b - zeroAscii) + temperature;
|
||||
base *= base;
|
||||
}
|
||||
}
|
||||
return temperature;
|
||||
}
|
||||
|
||||
// static String readNext(RandomAccessFile file, char endFlag,int initLength) throws IOException {
|
||||
// StringBuilder input = new StringBuilder(initLength);
|
||||
// int c = -1;
|
||||
// //boolean eol = false;
|
||||
//
|
||||
// while (true) {
|
||||
// c = file.read();
|
||||
// if( c == endFlag || c == -1) {
|
||||
// break;
|
||||
// }
|
||||
// input.append((char)c);
|
||||
// }
|
||||
//
|
||||
// //if ((c == -1) && (input.length() == 0)) {
|
||||
// // return null;
|
||||
// //}
|
||||
// return input.toString();
|
||||
// }
|
||||
|
||||
static class StrFastHashKey implements Comparable<StrFastHashKey> {
|
||||
final byte[] name;
|
||||
final int hash;
|
||||
|
||||
String nameStr;
|
||||
|
||||
StrFastHashKey(byte[] buf, int size) {
|
||||
name = new byte[size];
|
||||
System.arraycopy(buf, 0, name, 0, size);
|
||||
// hash = calculateHash(name, 0, size - 1);
|
||||
// FNV1a save 100+ms than calculateHash
|
||||
hash = hashFNV1a(name, size);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
// if (this == o) {return true;}
|
||||
// if (o == null || getClass() != o.getClass()) {return false;}
|
||||
StrFastHashKey that = (StrFastHashKey) o;
|
||||
return hash == that.hash && Arrays.equals(name, that.name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return hash;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
if (null == nameStr) {
|
||||
nameStr = new String(name);
|
||||
}
|
||||
return nameStr;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(StrFastHashKey o) {
|
||||
return toString().compareTo(o.toString());
|
||||
}
|
||||
}
|
||||
|
||||
private static final VarHandle LONG_VIEW = MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.nativeOrder())
|
||||
.withInvokeExactBehavior();
|
||||
private static final VarHandle INT_VIEW = MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.nativeOrder())
|
||||
.withInvokeExactBehavior();
|
||||
|
||||
/**
|
||||
* This is a prime number that gives pretty
|
||||
* <a href="https://vanilla-java.github.io/2018/08/15/Looking-at-randomness-and-performance-for-hash-codes.html">good hash distributions</a>
|
||||
* on the data in this challenge.
|
||||
*/
|
||||
private static final long RANDOM_PRIME = 0x7A646E4D;
|
||||
|
||||
/**
|
||||
* The hash calculation is inspired by
|
||||
* <a href="https://questdb.io/blog/building-faster-hash-table-high-performance-sql-joins/#fastmap-internals">QuestDB FastMap</a>
|
||||
*/
|
||||
private static int calculateHash(byte[] buffer, int startPosition, int endPosition) {
|
||||
long hash = 0;
|
||||
|
||||
int position = startPosition;
|
||||
for (; position + Long.BYTES <= endPosition; position += Long.BYTES) {
|
||||
long value = (long) LONG_VIEW.get(buffer, position);
|
||||
hash = hash * RANDOM_PRIME + value;
|
||||
}
|
||||
|
||||
if (position + Integer.BYTES <= endPosition) {
|
||||
int value = (int) INT_VIEW.get(buffer, position);
|
||||
hash = hash * RANDOM_PRIME + value;
|
||||
position += Integer.BYTES;
|
||||
}
|
||||
|
||||
for (; position <= endPosition; position++) {
|
||||
hash = hash * RANDOM_PRIME + buffer[position];
|
||||
}
|
||||
hash = hash * RANDOM_PRIME;
|
||||
return (int) hash ^ (int) (hash >>> 32);
|
||||
}
|
||||
|
||||
private static final int FNV1_32_INIT = 0x811c9dc5;
|
||||
private static final int FNV1_PRIME_32 = 16777619;
|
||||
|
||||
/**
|
||||
* https://github.com/prasanthj/hasher/blob/master/src/main/java/hasher/FNV1a.java
|
||||
*
|
||||
* FNV1a 32 bit variant.
|
||||
*
|
||||
* @param data - input byte array
|
||||
* @param length - length of array
|
||||
* @return - hashcode
|
||||
*/
|
||||
public static int hashFNV1a(byte[] data, int length) {
|
||||
int hash = FNV1_32_INIT;
|
||||
for (int i = 0; i < length; i++) {
|
||||
hash ^= (data[i] & 0xff);
|
||||
hash *= FNV1_PRIME_32;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user