Richard Startin: Adopt @spullara's double parsing code;
* increase chunk size * simplify and tune parameters
This commit is contained in:
parent
a09fa928db
commit
c3411f6023
@ -52,6 +52,26 @@ public class CalculateAverage_richardstartin {
|
||||
return new String(bytes, StandardCharsets.UTF_8);
|
||||
}
|
||||
|
||||
static double parseTemperature(ByteBuffer slice) {
|
||||
// credit: adapted from spullara's submission
|
||||
int value = 0;
|
||||
int negative = 1;
|
||||
int i = 0;
|
||||
while (i != slice.limit()) {
|
||||
byte b = slice.get(i++);
|
||||
switch (b) {
|
||||
case '-':
|
||||
negative = -1;
|
||||
case '.':
|
||||
break;
|
||||
default:
|
||||
value = 10 * value + (b - '0');
|
||||
}
|
||||
}
|
||||
value *= negative;
|
||||
return value / 10.0;
|
||||
}
|
||||
|
||||
@FunctionalInterface
|
||||
interface IndexedStringConsumer {
|
||||
void accept(String value, int index);
|
||||
@ -60,7 +80,7 @@ public class CalculateAverage_richardstartin {
|
||||
/** Maps text to an integer encoding. Adapted from async-profiler. */
|
||||
public static class Dictionary {
|
||||
|
||||
private static final int ROW_BITS = 7;
|
||||
private static final int ROW_BITS = 12;
|
||||
private static final int ROWS = (1 << ROW_BITS);
|
||||
private static final int CELLS = 3;
|
||||
private static final int TABLE_CAPACITY = (ROWS * CELLS);
|
||||
@ -90,10 +110,10 @@ public class CalculateAverage_richardstartin {
|
||||
forEach(this.table, consumer);
|
||||
}
|
||||
|
||||
public int encode(long hash, ByteBuffer slice) {
|
||||
public int encode(int hash, ByteBuffer slice) {
|
||||
Table table = this.table;
|
||||
while (true) {
|
||||
int rowIndex = (int)(Math.abs(hash) % ROWS);
|
||||
int rowIndex = Math.abs(hash) % ROWS;
|
||||
Row row = table.rows[rowIndex];
|
||||
for (int c = 0; c < CELLS; c++) {
|
||||
ByteBuffer storedKey = row.keys.get(c);
|
||||
@ -111,7 +131,7 @@ public class CalculateAverage_richardstartin {
|
||||
}
|
||||
}
|
||||
table = row.getOrCreateNextTable(this::nextBaseIndex);
|
||||
hash = Long.rotateRight(hash, ROW_BITS);
|
||||
hash = Integer.rotateRight(hash, ROW_BITS);
|
||||
}
|
||||
}
|
||||
|
||||
@ -207,43 +227,6 @@ public class CalculateAverage_richardstartin {
|
||||
return buffer.limit();
|
||||
}
|
||||
|
||||
private static long hash(ByteBuffer slice) {
|
||||
long hash = slice.limit() + PRIME_5 + 0x123456789abcdef1L;
|
||||
int i = 0;
|
||||
for (; i + Long.BYTES < slice.limit(); i += Long.BYTES) {
|
||||
hash = hashLong(hash, slice.getLong(i));
|
||||
}
|
||||
long part = 0L;
|
||||
for (; i < slice.limit(); i++) {
|
||||
part = (part >>> 8) | ((slice.get(i) & 0xFFL) << 56);
|
||||
}
|
||||
hash = hashLong(hash, part);
|
||||
return mix(hash);
|
||||
}
|
||||
|
||||
static final long PRIME_1 = 0x9E3779B185EBCA87L;
|
||||
static final long PRIME_2 = 0xC2B2AE3D27D4EB4FL;
|
||||
static final long PRIME_3 = 0x165667B19E3779F9L;
|
||||
static final long PRIME_4 = 0x85EBCA77C2B2AE63L;
|
||||
static final long PRIME_5 = 0x27D4EB2F165667C5L;
|
||||
|
||||
private static long hashLong(long hash, long k) {
|
||||
k *= PRIME_2;
|
||||
k = Long.rotateLeft(k, 31);
|
||||
k *= PRIME_1;
|
||||
hash ^= k;
|
||||
return Long.rotateLeft(hash, 27) * PRIME_1 + PRIME_4;
|
||||
}
|
||||
|
||||
private static long mix(long hash) {
|
||||
hash ^= hash >>> 33;
|
||||
hash *= PRIME_2;
|
||||
hash ^= hash >>> 29;
|
||||
hash *= PRIME_3;
|
||||
hash ^= hash >>> 32;
|
||||
return hash;
|
||||
}
|
||||
|
||||
static class Page {
|
||||
|
||||
static final int PAGE_SIZE = 1024;
|
||||
@ -311,16 +294,12 @@ public class CalculateAverage_richardstartin {
|
||||
ByteBuffer key = slice.slice(offset, nextSeparator - offset).order(ByteOrder.LITTLE_ENDIAN);
|
||||
// find the global dictionary code to aggregate,
|
||||
// making this code global allows easy merging
|
||||
int dictId = dictionary.encode(hash(key), key);
|
||||
int dictId = dictionary.encode(key.hashCode(), key);
|
||||
|
||||
offset = nextSeparator + 1;
|
||||
int newLine = findIndexOf(slice, offset, NEW_LINE);
|
||||
// parse the double
|
||||
// todo do this without allocating a string, could use a fast parsing falgorithm
|
||||
var bytes = new byte[newLine - offset];
|
||||
slice.get(offset, bytes);
|
||||
var string = new String(bytes, StandardCharsets.US_ASCII);
|
||||
double d = Double.parseDouble(string);
|
||||
double d = parseTemperature(slice.slice(offset, newLine - offset));
|
||||
|
||||
Page.update(pages, dictId, d);
|
||||
|
||||
@ -351,7 +330,7 @@ public class CalculateAverage_richardstartin {
|
||||
protected double[][] compute() {
|
||||
if (min == max) {
|
||||
// fixme - hardcoded to problem size
|
||||
var pages = new double[1024][];
|
||||
var pages = new double[600][];
|
||||
var slice = slices.get(min);
|
||||
computeSlice(slice, pages);
|
||||
return pages;
|
||||
@ -368,7 +347,7 @@ public class CalculateAverage_richardstartin {
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
int maxChunkSize = 10 << 20; // 10MiB
|
||||
int maxChunkSize = 250 << 20; // 250MiB
|
||||
try (var raf = new RandomAccessFile(FILE, "r");
|
||||
var channel = raf.getChannel()) {
|
||||
long size = channel.size();
|
||||
|
Loading…
Reference in New Issue
Block a user