1brc/src/main/java/dev/morling/onebrc/PerfectHashSearch_hundredwatt.java
Jason Nochlin 6181996678
hundredwatt - 2nd submission (#338)
* check full hash before comparing

* implement merykitty suggestions to simplify temperature masking; required refactoring to little-endian

* standalone script for offline Perfect Hash seed searching

* stop using an oversized hash table

---------

Co-authored-by: Jason Nochlin <hundredwatt@users.noreply.github.com>
2024-01-12 09:03:31 +01:00

171 lines
6.4 KiB
Java

/*
* Copyright 2023 The original authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dev.morling.onebrc;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.security.NoSuchAlgorithmException;
import java.security.SecureRandom;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.random.RandomGenerator;
import java.util.random.RandomGeneratorFactory;
/** Offline script used to find the perfect hash seed for CalculateAverage_hundredwatt. */
public class PerfectHashSearch_hundredwatt {
public static final int DESIRED_SLOTS = 5003;
public static final int N_THREADS = Runtime.getRuntime().availableProcessors() - 1;
public static void main(String[] args) throws IOException, InterruptedException {
AtomicLong magicSeed = new AtomicLong(0);
AtomicLong totalAttempts = new AtomicLong(0);
AtomicLong maxCardinality = new AtomicLong(0);
long start = System.currentTimeMillis();
System.out.println("Searching for perfect hash seed for " + DESIRED_SLOTS + " slots");
// Figure out encoding for all possible temperature values (1999 total)
Map<Long, Short> decodeTemperatureMap = new HashMap<>();
for (short i = -999; i <= 999; i++) {
long word = 0;
int shift = 0;
if (i < 0) {
word |= ((long) '-') << shift;
shift += 8;
}
if (Math.abs(i) >= 100) {
int hh = Math.abs(i) / 100;
int tt = (Math.abs(i) - hh * 100) / 10;
word |= ((long) (hh + '0')) << shift;
shift += 8;
word |= ((long) (tt + '0')) << shift;
}
else {
int tt = Math.abs(i) / 10;
// convert to ascii
word |= ((long) (tt + '0')) << shift;
}
shift += 8;
word |= ((long) '.') << shift;
shift += 8;
int uu = Math.abs(i) % 10;
word |= ((long) (uu + '0')) << shift;
// 31302e3000000000
decodeTemperatureMap.put(word, i);
}
ExecutorService executor = Executors.newFixedThreadPool(N_THREADS);
RandomGeneratorFactory factory = RandomGeneratorFactory.of("L64X256MixRandom");
Runnable search = () -> {
// Brute force to find seed:
// generate a cryptographically secure random seed
RandomGenerator rand;
try {
byte[] seed = new byte[16];
SecureRandom.getInstanceStrong().nextBytes(seed);
rand = factory.create(ByteBuffer.wrap(seed).getLong());
System.out.println(Thread.currentThread().getName() + " | Using seed: " + rand.nextLong());
}
catch (NoSuchAlgorithmException e) {
throw new RuntimeException(e);
}
int max = 0;
int attempts = 0;
while (true) {
BitSet bs = new BitSet(DESIRED_SLOTS);
var seed = rand.nextLong();
seed |= 0b1; // make sure it's odd
for (var word : decodeTemperatureMap.keySet()) {
var h = (word * seed) & ~(1L << 63);
var pos = (int) (h % DESIRED_SLOTS);
bs.set(pos);
}
var c = bs.cardinality();
if (c == decodeTemperatureMap.size()) {
System.out.println("FOUND seed: " + seed + " cardinality: " + c + " max cardinality: " + max);
magicSeed.set(seed);
return;
}
max = Math.max(max, c);
if (attempts % 100_000 == 0) {
if (magicSeed.get() != 0)
return;
int finalMax = max;
long currentMaxCardinality = maxCardinality.updateAndGet(currentMax -> Math.max(currentMax, finalMax));
long currentTotalAttempts = totalAttempts.addAndGet(100_000);
if (Thread.currentThread().getName().endsWith("-1"))
System.out.println(Thread.currentThread().getName() + " | max cardinality: " + currentMaxCardinality + " attempts: "
+ String.format("%,d", currentTotalAttempts));
}
attempts++;
}
};
for (int i = 0; i < Runtime.getRuntime().availableProcessors(); i++) {
executor.submit(search);
}
// Wait for the search to complete
executor.shutdown();
executor.awaitTermination(1, TimeUnit.DAYS);
short[] TEMPERATURES = new short[DESIRED_SLOTS];
long seed = magicSeed.get();
decodeTemperatureMap.entrySet().stream().forEach(e -> {
var word = e.getKey();
var h = (word * seed) & ~(1L << 63);
var pos = (int) (h % DESIRED_SLOTS);
if (TEMPERATURES[pos] != 0)
throw new RuntimeException("collision at " + pos);
TEMPERATURES[pos] = e.getValue();
});
System.out.println("SUCCESS seed: " + seed + " total attempts: " + totalAttempts.get());
try {
File file = new File("seeds.txt");
file.delete();
file.createNewFile();
// Write the seed to seeds.txt
FileWriter myWriter = new FileWriter("seeds.txt");
myWriter.write(Long.toString(seed));
myWriter.write("\n");
myWriter.close();
}
catch (IOException e) {
throw new RuntimeException(e);
}
System.out.println("Search took " + ((System.currentTimeMillis() - start) / 1000) + "s");
}
}