Richard Startin: Adopt @spullara's double parsing code;

* increase chunk size
* simplify and tune parameters
This commit is contained in:
Richard Startin 2024-01-04 17:19:56 +00:00 committed by GitHub
parent a09fa928db
commit c3411f6023
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -52,6 +52,26 @@ public class CalculateAverage_richardstartin {
return new String(bytes, StandardCharsets.UTF_8);
}
static double parseTemperature(ByteBuffer slice) {
// credit: adapted from spullara's submission
int value = 0;
int negative = 1;
int i = 0;
while (i != slice.limit()) {
byte b = slice.get(i++);
switch (b) {
case '-':
negative = -1;
case '.':
break;
default:
value = 10 * value + (b - '0');
}
}
value *= negative;
return value / 10.0;
}
@FunctionalInterface
interface IndexedStringConsumer {
void accept(String value, int index);
@ -60,7 +80,7 @@ public class CalculateAverage_richardstartin {
/** Maps text to an integer encoding. Adapted from async-profiler. */
public static class Dictionary {
private static final int ROW_BITS = 7;
private static final int ROW_BITS = 12;
private static final int ROWS = (1 << ROW_BITS);
private static final int CELLS = 3;
private static final int TABLE_CAPACITY = (ROWS * CELLS);
@ -90,10 +110,10 @@ public class CalculateAverage_richardstartin {
forEach(this.table, consumer);
}
public int encode(long hash, ByteBuffer slice) {
public int encode(int hash, ByteBuffer slice) {
Table table = this.table;
while (true) {
int rowIndex = (int)(Math.abs(hash) % ROWS);
int rowIndex = Math.abs(hash) % ROWS;
Row row = table.rows[rowIndex];
for (int c = 0; c < CELLS; c++) {
ByteBuffer storedKey = row.keys.get(c);
@ -111,7 +131,7 @@ public class CalculateAverage_richardstartin {
}
}
table = row.getOrCreateNextTable(this::nextBaseIndex);
hash = Long.rotateRight(hash, ROW_BITS);
hash = Integer.rotateRight(hash, ROW_BITS);
}
}
@ -207,43 +227,6 @@ public class CalculateAverage_richardstartin {
return buffer.limit();
}
private static long hash(ByteBuffer slice) {
long hash = slice.limit() + PRIME_5 + 0x123456789abcdef1L;
int i = 0;
for (; i + Long.BYTES < slice.limit(); i += Long.BYTES) {
hash = hashLong(hash, slice.getLong(i));
}
long part = 0L;
for (; i < slice.limit(); i++) {
part = (part >>> 8) | ((slice.get(i) & 0xFFL) << 56);
}
hash = hashLong(hash, part);
return mix(hash);
}
static final long PRIME_1 = 0x9E3779B185EBCA87L;
static final long PRIME_2 = 0xC2B2AE3D27D4EB4FL;
static final long PRIME_3 = 0x165667B19E3779F9L;
static final long PRIME_4 = 0x85EBCA77C2B2AE63L;
static final long PRIME_5 = 0x27D4EB2F165667C5L;
private static long hashLong(long hash, long k) {
k *= PRIME_2;
k = Long.rotateLeft(k, 31);
k *= PRIME_1;
hash ^= k;
return Long.rotateLeft(hash, 27) * PRIME_1 + PRIME_4;
}
private static long mix(long hash) {
hash ^= hash >>> 33;
hash *= PRIME_2;
hash ^= hash >>> 29;
hash *= PRIME_3;
hash ^= hash >>> 32;
return hash;
}
static class Page {
static final int PAGE_SIZE = 1024;
@ -311,16 +294,12 @@ public class CalculateAverage_richardstartin {
ByteBuffer key = slice.slice(offset, nextSeparator - offset).order(ByteOrder.LITTLE_ENDIAN);
// find the global dictionary code to aggregate,
// making this code global allows easy merging
int dictId = dictionary.encode(hash(key), key);
int dictId = dictionary.encode(key.hashCode(), key);
offset = nextSeparator + 1;
int newLine = findIndexOf(slice, offset, NEW_LINE);
// parse the double
// todo do this without allocating a string, could use a fast parsing falgorithm
var bytes = new byte[newLine - offset];
slice.get(offset, bytes);
var string = new String(bytes, StandardCharsets.US_ASCII);
double d = Double.parseDouble(string);
double d = parseTemperature(slice.slice(offset, newLine - offset));
Page.update(pages, dictId, d);
@ -351,7 +330,7 @@ public class CalculateAverage_richardstartin {
protected double[][] compute() {
if (min == max) {
// fixme - hardcoded to problem size
var pages = new double[1024][];
var pages = new double[600][];
var slice = slices.get(min);
computeSlice(slice, pages);
return pages;
@ -368,7 +347,7 @@ public class CalculateAverage_richardstartin {
}
public static void main(String[] args) throws IOException {
int maxChunkSize = 10 << 20; // 10MiB
int maxChunkSize = 250 << 20; // 250MiB
try (var raf = new RandomAccessFile(FILE, "r");
var channel = raf.getChannel()) {
long size = channel.size();