Richard Startin: Adopt @spullara's double parsing code;
* increase chunk size * simplify and tune parameters
This commit is contained in:
		@@ -52,6 +52,26 @@ public class CalculateAverage_richardstartin {
 | 
				
			|||||||
        return new String(bytes, StandardCharsets.UTF_8);
 | 
					        return new String(bytes, StandardCharsets.UTF_8);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    static double parseTemperature(ByteBuffer slice) {
 | 
				
			||||||
 | 
					        // credit: adapted from spullara's submission
 | 
				
			||||||
 | 
					        int value = 0;
 | 
				
			||||||
 | 
					        int negative = 1;
 | 
				
			||||||
 | 
					        int i = 0;
 | 
				
			||||||
 | 
					        while (i != slice.limit()) {
 | 
				
			||||||
 | 
					            byte b = slice.get(i++);
 | 
				
			||||||
 | 
					            switch (b) {
 | 
				
			||||||
 | 
					                case '-':
 | 
				
			||||||
 | 
					                    negative = -1;
 | 
				
			||||||
 | 
					                case '.':
 | 
				
			||||||
 | 
					                    break;
 | 
				
			||||||
 | 
					                default:
 | 
				
			||||||
 | 
					                    value = 10 * value + (b - '0');
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        value *= negative;
 | 
				
			||||||
 | 
					        return value / 10.0;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @FunctionalInterface
 | 
					    @FunctionalInterface
 | 
				
			||||||
    interface IndexedStringConsumer {
 | 
					    interface IndexedStringConsumer {
 | 
				
			||||||
        void accept(String value, int index);
 | 
					        void accept(String value, int index);
 | 
				
			||||||
@@ -60,7 +80,7 @@ public class CalculateAverage_richardstartin {
 | 
				
			|||||||
    /** Maps text to an integer encoding. Adapted from async-profiler. */
 | 
					    /** Maps text to an integer encoding. Adapted from async-profiler. */
 | 
				
			||||||
    public static class Dictionary {
 | 
					    public static class Dictionary {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        private static final int ROW_BITS = 7;
 | 
					        private static final int ROW_BITS = 12;
 | 
				
			||||||
        private static final int ROWS = (1 << ROW_BITS);
 | 
					        private static final int ROWS = (1 << ROW_BITS);
 | 
				
			||||||
        private static final int CELLS = 3;
 | 
					        private static final int CELLS = 3;
 | 
				
			||||||
        private static final int TABLE_CAPACITY = (ROWS * CELLS);
 | 
					        private static final int TABLE_CAPACITY = (ROWS * CELLS);
 | 
				
			||||||
@@ -90,10 +110,10 @@ public class CalculateAverage_richardstartin {
 | 
				
			|||||||
            forEach(this.table, consumer);
 | 
					            forEach(this.table, consumer);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        public int encode(long hash, ByteBuffer slice) {
 | 
					        public int encode(int hash, ByteBuffer slice) {
 | 
				
			||||||
            Table table = this.table;
 | 
					            Table table = this.table;
 | 
				
			||||||
            while (true) {
 | 
					            while (true) {
 | 
				
			||||||
                int rowIndex = (int)(Math.abs(hash) % ROWS);
 | 
					                int rowIndex = Math.abs(hash) % ROWS;
 | 
				
			||||||
                Row row = table.rows[rowIndex];
 | 
					                Row row = table.rows[rowIndex];
 | 
				
			||||||
                for (int c = 0; c < CELLS; c++) {
 | 
					                for (int c = 0; c < CELLS; c++) {
 | 
				
			||||||
                    ByteBuffer storedKey = row.keys.get(c);
 | 
					                    ByteBuffer storedKey = row.keys.get(c);
 | 
				
			||||||
@@ -111,7 +131,7 @@ public class CalculateAverage_richardstartin {
 | 
				
			|||||||
                    }
 | 
					                    }
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                table = row.getOrCreateNextTable(this::nextBaseIndex);
 | 
					                table = row.getOrCreateNextTable(this::nextBaseIndex);
 | 
				
			||||||
                hash = Long.rotateRight(hash, ROW_BITS);
 | 
					                hash = Integer.rotateRight(hash, ROW_BITS);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -207,43 +227,6 @@ public class CalculateAverage_richardstartin {
 | 
				
			|||||||
        return buffer.limit();
 | 
					        return buffer.limit();
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    private static long hash(ByteBuffer slice) {
 | 
					 | 
				
			||||||
        long hash = slice.limit() + PRIME_5 + 0x123456789abcdef1L;
 | 
					 | 
				
			||||||
        int i = 0;
 | 
					 | 
				
			||||||
        for (; i + Long.BYTES < slice.limit(); i += Long.BYTES) {
 | 
					 | 
				
			||||||
            hash = hashLong(hash, slice.getLong(i));
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        long part = 0L;
 | 
					 | 
				
			||||||
        for (; i < slice.limit(); i++) {
 | 
					 | 
				
			||||||
            part = (part >>> 8) | ((slice.get(i) & 0xFFL) << 56);
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        hash = hashLong(hash, part);
 | 
					 | 
				
			||||||
        return mix(hash);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    static final long PRIME_1 = 0x9E3779B185EBCA87L;
 | 
					 | 
				
			||||||
    static final long PRIME_2 = 0xC2B2AE3D27D4EB4FL;
 | 
					 | 
				
			||||||
    static final long PRIME_3 = 0x165667B19E3779F9L;
 | 
					 | 
				
			||||||
    static final long PRIME_4 = 0x85EBCA77C2B2AE63L;
 | 
					 | 
				
			||||||
    static final long PRIME_5 = 0x27D4EB2F165667C5L;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    private static long hashLong(long hash, long k) {
 | 
					 | 
				
			||||||
        k *= PRIME_2;
 | 
					 | 
				
			||||||
        k = Long.rotateLeft(k, 31);
 | 
					 | 
				
			||||||
        k *= PRIME_1;
 | 
					 | 
				
			||||||
        hash ^= k;
 | 
					 | 
				
			||||||
        return Long.rotateLeft(hash, 27) * PRIME_1 + PRIME_4;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    private static long mix(long hash) {
 | 
					 | 
				
			||||||
        hash ^= hash >>> 33;
 | 
					 | 
				
			||||||
        hash *= PRIME_2;
 | 
					 | 
				
			||||||
        hash ^= hash >>> 29;
 | 
					 | 
				
			||||||
        hash *= PRIME_3;
 | 
					 | 
				
			||||||
        hash ^= hash >>> 32;
 | 
					 | 
				
			||||||
        return hash;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    static class Page {
 | 
					    static class Page {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        static final int PAGE_SIZE = 1024;
 | 
					        static final int PAGE_SIZE = 1024;
 | 
				
			||||||
@@ -311,16 +294,12 @@ public class CalculateAverage_richardstartin {
 | 
				
			|||||||
                ByteBuffer key = slice.slice(offset, nextSeparator - offset).order(ByteOrder.LITTLE_ENDIAN);
 | 
					                ByteBuffer key = slice.slice(offset, nextSeparator - offset).order(ByteOrder.LITTLE_ENDIAN);
 | 
				
			||||||
                // find the global dictionary code to aggregate,
 | 
					                // find the global dictionary code to aggregate,
 | 
				
			||||||
                // making this code global allows easy merging
 | 
					                // making this code global allows easy merging
 | 
				
			||||||
                int dictId = dictionary.encode(hash(key), key);
 | 
					                int dictId = dictionary.encode(key.hashCode(), key);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                offset = nextSeparator + 1;
 | 
					                offset = nextSeparator + 1;
 | 
				
			||||||
                int newLine = findIndexOf(slice, offset, NEW_LINE);
 | 
					                int newLine = findIndexOf(slice, offset, NEW_LINE);
 | 
				
			||||||
                // parse the double
 | 
					                // parse the double
 | 
				
			||||||
                // todo do this without allocating a string, could use a fast parsing falgorithm
 | 
					                double d = parseTemperature(slice.slice(offset, newLine - offset));
 | 
				
			||||||
                var bytes = new byte[newLine - offset];
 | 
					 | 
				
			||||||
                slice.get(offset, bytes);
 | 
					 | 
				
			||||||
                var string = new String(bytes, StandardCharsets.US_ASCII);
 | 
					 | 
				
			||||||
                double d = Double.parseDouble(string);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                Page.update(pages, dictId, d);
 | 
					                Page.update(pages, dictId, d);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -351,7 +330,7 @@ public class CalculateAverage_richardstartin {
 | 
				
			|||||||
        protected double[][] compute() {
 | 
					        protected double[][] compute() {
 | 
				
			||||||
            if (min == max) {
 | 
					            if (min == max) {
 | 
				
			||||||
                // fixme - hardcoded to problem size
 | 
					                // fixme - hardcoded to problem size
 | 
				
			||||||
                var pages = new double[1024][];
 | 
					                var pages = new double[600][];
 | 
				
			||||||
                var slice = slices.get(min);
 | 
					                var slice = slices.get(min);
 | 
				
			||||||
                computeSlice(slice, pages);
 | 
					                computeSlice(slice, pages);
 | 
				
			||||||
                return pages;
 | 
					                return pages;
 | 
				
			||||||
@@ -368,7 +347,7 @@ public class CalculateAverage_richardstartin {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    public static void main(String[] args) throws IOException {
 | 
					    public static void main(String[] args) throws IOException {
 | 
				
			||||||
        int maxChunkSize = 10 << 20; // 10MiB
 | 
					        int maxChunkSize = 250 << 20; // 250MiB
 | 
				
			||||||
        try (var raf = new RandomAccessFile(FILE, "r");
 | 
					        try (var raf = new RandomAccessFile(FILE, "r");
 | 
				
			||||||
             var channel = raf.getChannel()) {
 | 
					             var channel = raf.getChannel()) {
 | 
				
			||||||
            long size = channel.size();
 | 
					            long size = channel.size();
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user