jerrinot's improvement (#514)
* refactoring * segregated heap for names also a different hashing function. turns out hashing just first word is good enough
This commit is contained in:
parent
e1a0b79259
commit
114ba76d20
@ -38,7 +38,7 @@ public class CalculateAverage_jerrinot {
|
|||||||
// todo: with hyper-threading enable we would be better of with availableProcessors / 2;
|
// todo: with hyper-threading enable we would be better of with availableProcessors / 2;
|
||||||
// todo: validate the testing env. params.
|
// todo: validate the testing env. params.
|
||||||
private static final int THREAD_COUNT = Runtime.getRuntime().availableProcessors();
|
private static final int THREAD_COUNT = Runtime.getRuntime().availableProcessors();
|
||||||
// private static final int THREAD_COUNT = 4;
|
// private static final int THREAD_COUNT = 1;
|
||||||
|
|
||||||
private static final long SEPARATOR_PATTERN = 0x3B3B3B3B3B3B3B3BL;
|
private static final long SEPARATOR_PATTERN = 0x3B3B3B3B3B3B3B3BL;
|
||||||
|
|
||||||
@ -153,8 +153,9 @@ public class CalculateAverage_jerrinot {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static class Processor implements Runnable {
|
private static class Processor implements Runnable {
|
||||||
private static final int MAP_SLOT_COUNT = ceilPow2(10000);
|
private static final int MAX_UNIQUE_KEYS = 10000;
|
||||||
private static final int STATION_MAX_NAME_BYTES = 120;
|
private static final int MAP_SLOT_COUNT = ceilPow2(MAX_UNIQUE_KEYS);
|
||||||
|
private static final int STATION_MAX_NAME_BYTES = 104;
|
||||||
|
|
||||||
private static final long COUNT_OFFSET = 0;
|
private static final long COUNT_OFFSET = 0;
|
||||||
private static final long MIN_OFFSET = 4;
|
private static final long MIN_OFFSET = 4;
|
||||||
@ -163,20 +164,20 @@ public class CalculateAverage_jerrinot {
|
|||||||
private static final long LEN_OFFSET = 20;
|
private static final long LEN_OFFSET = 20;
|
||||||
private static final long NAME_OFFSET = 24;
|
private static final long NAME_OFFSET = 24;
|
||||||
|
|
||||||
private static final int MAP_ENTRY_SIZE_BYTES = +Integer.BYTES // count // 0
|
private static final int MAP_ENTRY_SIZE_BYTES = Integer.BYTES // count // 0
|
||||||
+ Integer.BYTES // min // +4
|
+ Integer.BYTES // min // +4
|
||||||
+ Integer.BYTES // max // +8
|
+ Integer.BYTES // max // +8
|
||||||
+ Long.BYTES // sum // +12
|
+ Long.BYTES // sum // +12
|
||||||
+ Integer.BYTES // station name len // +20
|
+ Integer.BYTES // station name len // +20
|
||||||
+ STATION_MAX_NAME_BYTES; // +24
|
+ Long.BYTES; // station name ptr // 24
|
||||||
|
|
||||||
private static final int MAP_SIZE_BYTES = MAP_SLOT_COUNT * MAP_ENTRY_SIZE_BYTES;
|
private static final int MAP_SIZE_BYTES = MAP_SLOT_COUNT * MAP_ENTRY_SIZE_BYTES;
|
||||||
|
private static final int MAP_NAMES_BYTES = MAX_UNIQUE_KEYS * STATION_MAX_NAME_BYTES;
|
||||||
private static final long MAP_MASK = MAP_SLOT_COUNT - 1;
|
private static final long MAP_MASK = MAP_SLOT_COUNT - 1;
|
||||||
|
|
||||||
// todo: some fields could probably be converted to locals
|
|
||||||
|
|
||||||
private final long map;
|
private final long map;
|
||||||
|
private long currentNamesPtr;
|
||||||
|
private final long namesHi;
|
||||||
private long cursorA;
|
private long cursorA;
|
||||||
private long endA;
|
private long endA;
|
||||||
private long cursorB;
|
private long cursorB;
|
||||||
@ -240,7 +241,7 @@ public class CalculateAverage_jerrinot {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
byte[] nameArr = new byte[(int) len];
|
byte[] nameArr = new byte[(int) len];
|
||||||
long baseNameAddr = baseAddress + NAME_OFFSET;
|
long baseNameAddr = UNSAFE.getLong(baseAddress + NAME_OFFSET);
|
||||||
for (int i = 0; i < len; i++) {
|
for (int i = 0; i < len; i++) {
|
||||||
nameArr[i] = UNSAFE.getByte(baseNameAddr + i);
|
nameArr[i] = UNSAFE.getByte(baseNameAddr + i);
|
||||||
}
|
}
|
||||||
@ -270,6 +271,8 @@ public class CalculateAverage_jerrinot {
|
|||||||
this.endC = endC;
|
this.endC = endC;
|
||||||
this.endD = endD;
|
this.endD = endD;
|
||||||
this.map = UNSAFE.allocateMemory(MAP_SIZE_BYTES);
|
this.map = UNSAFE.allocateMemory(MAP_SIZE_BYTES);
|
||||||
|
this.currentNamesPtr = UNSAFE.allocateMemory(MAP_NAMES_BYTES);
|
||||||
|
this.namesHi = currentNamesPtr + MAP_NAMES_BYTES;
|
||||||
|
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < MAP_SIZE_BYTES; i += 8) {
|
for (i = 0; i < MAP_SIZE_BYTES; i += 8) {
|
||||||
@ -278,6 +281,7 @@ public class CalculateAverage_jerrinot {
|
|||||||
for (i = i - 8; i < MAP_SIZE_BYTES; i++) {
|
for (i = i - 8; i < MAP_SIZE_BYTES; i++) {
|
||||||
UNSAFE.putByte(map + i, (byte) 0);
|
UNSAFE.putByte(map + i, (byte) 0);
|
||||||
}
|
}
|
||||||
|
UNSAFE.setMemory(currentNamesPtr, MAP_NAMES_BYTES, (byte) 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void doTail() {
|
private void doTail() {
|
||||||
@ -293,58 +297,56 @@ public class CalculateAverage_jerrinot {
|
|||||||
// System.out.println("done D");
|
// System.out.println("done D");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void doOne(long cursorA, long endA) {
|
private void doOne(long cursor, long endA) {
|
||||||
while (cursorA < endA) {
|
while (cursor < endA) {
|
||||||
long startA = cursorA;
|
long start = cursor;
|
||||||
long delimiterWordA = UNSAFE.getLong(cursorA);
|
long currentWord = UNSAFE.getLong(cursor);
|
||||||
long hashA = 0;
|
long mask = getDelimiterMask(currentWord);
|
||||||
long maskA = getDelimiterMask(delimiterWordA);
|
long maskedFirstWord = currentWord & ((mask - 1) ^ mask) >>> 8;
|
||||||
while (maskA == 0) {
|
long hash = hash(maskedFirstWord);
|
||||||
hashA ^= delimiterWordA;
|
while (mask == 0) {
|
||||||
cursorA += 8;
|
cursor += 8;
|
||||||
delimiterWordA = UNSAFE.getLong(cursorA);
|
currentWord = UNSAFE.getLong(cursor);
|
||||||
maskA = getDelimiterMask(delimiterWordA);
|
mask = getDelimiterMask(currentWord);
|
||||||
}
|
}
|
||||||
final int delimiterByteA = Long.numberOfTrailingZeros(maskA);
|
final int delimiterByte = Long.numberOfTrailingZeros(mask);
|
||||||
final long semicolonA = cursorA + (delimiterByteA >> 3);
|
final long semicolon = cursor + (delimiterByte >> 3);
|
||||||
final long maskedWordA = delimiterWordA & ((maskA - 1) ^ maskA) >>> 8;
|
final long maskedWord = currentWord & ((mask - 1) ^ mask) >>> 8;
|
||||||
hashA ^= maskedWordA;
|
long baseEntryPtr = getOrCreateEntryBaseOffset(semicolon, start, (int) hash, maskedWord);
|
||||||
int intHashA = (int) (hashA ^ (hashA >> 32));
|
long temperatureWord = UNSAFE.getLong(semicolon + 1);
|
||||||
intHashA = intHashA ^ (intHashA >> 17);
|
cursor = parseAndStoreTemperature(semicolon + 1, baseEntryPtr, temperatureWord);
|
||||||
|
|
||||||
long baseEntryPtrA = getOrCreateEntryBaseOffset(semicolonA, startA, intHashA, maskedWordA);
|
|
||||||
long temperatureWordA = UNSAFE.getLong(semicolonA + 1);
|
|
||||||
cursorA = parseAndStoreTemperature(semicolonA + 1, baseEntryPtrA, temperatureWordA);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static long hash(long word1) {
|
||||||
|
// credit: mtopolnik
|
||||||
|
long seed = 0x51_7c_c1_b7_27_22_0a_95L;
|
||||||
|
int rotDist = 17;
|
||||||
|
|
||||||
|
long hash = word1;
|
||||||
|
hash *= seed;
|
||||||
|
hash = Long.rotateLeft(hash, rotDist);
|
||||||
|
return hash;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
while (cursorA < endA && cursorB < endB && cursorC < endC && cursorD < endD) {
|
while (cursorA < endA && cursorB < endB && cursorC < endC && cursorD < endD) {
|
||||||
// todo: experiment with different inter-leaving
|
|
||||||
long startA = cursorA;
|
long startA = cursorA;
|
||||||
long startB = cursorB;
|
long startB = cursorB;
|
||||||
long startC = cursorC;
|
long startC = cursorC;
|
||||||
long startD = cursorD;
|
long startD = cursorD;
|
||||||
|
|
||||||
long currentWordA = UNSAFE.getLong(startA);
|
long currentWordA = UNSAFE.getLong(startA);
|
||||||
// long delimiterWordA2 = UNSAFE.getLong(startA + 8);
|
|
||||||
long currentWordB = UNSAFE.getLong(startB);
|
long currentWordB = UNSAFE.getLong(startB);
|
||||||
// long delimiterWordB2 = UNSAFE.getLong(startB + 8);
|
|
||||||
long currentWordC = UNSAFE.getLong(startC);
|
long currentWordC = UNSAFE.getLong(startC);
|
||||||
// long delimiterWordCa = UNSAFE.getLong(startC + 8);
|
|
||||||
long currentWordD = UNSAFE.getLong(startD);
|
long currentWordD = UNSAFE.getLong(startD);
|
||||||
// long delimiterWordD2 = UNSAFE.getLong(startD + 8);
|
|
||||||
|
|
||||||
long hashA = 0;
|
// credits for the hashing idea: mtopolnik
|
||||||
long hashB = 0;
|
|
||||||
long hashC = 0;
|
|
||||||
long hashD = 0;
|
|
||||||
|
|
||||||
// credits for the hashing idea: royvanrijn
|
|
||||||
long maskA = getDelimiterMask(currentWordA);
|
long maskA = getDelimiterMask(currentWordA);
|
||||||
|
long maskedFirstWordA = currentWordA & ((maskA - 1) ^ maskA) >>> 8;
|
||||||
|
long hashA = hash(maskedFirstWordA);
|
||||||
while (maskA == 0) {
|
while (maskA == 0) {
|
||||||
hashA ^= currentWordA;
|
|
||||||
cursorA += 8;
|
cursorA += 8;
|
||||||
currentWordA = UNSAFE.getLong(cursorA);
|
currentWordA = UNSAFE.getLong(cursorA);
|
||||||
maskA = getDelimiterMask(currentWordA);
|
maskA = getDelimiterMask(currentWordA);
|
||||||
@ -353,13 +355,11 @@ public class CalculateAverage_jerrinot {
|
|||||||
final long semicolonA = cursorA + (delimiterByteA >> 3);
|
final long semicolonA = cursorA + (delimiterByteA >> 3);
|
||||||
long temperatureWordA = UNSAFE.getLong(semicolonA + 1);
|
long temperatureWordA = UNSAFE.getLong(semicolonA + 1);
|
||||||
final long maskedWordA = currentWordA & ((maskA - 1) ^ maskA) >>> 8;
|
final long maskedWordA = currentWordA & ((maskA - 1) ^ maskA) >>> 8;
|
||||||
hashA ^= maskedWordA;
|
|
||||||
int intHashA = (int) (hashA ^ (hashA >> 32));
|
|
||||||
intHashA = intHashA ^ (intHashA >> 17);
|
|
||||||
|
|
||||||
long maskB = getDelimiterMask(currentWordB);
|
long maskB = getDelimiterMask(currentWordB);
|
||||||
|
long maskedFirstWordB = currentWordB & ((maskB - 1) ^ maskB) >>> 8;
|
||||||
|
long hashB = hash(maskedFirstWordB);
|
||||||
while (maskB == 0) {
|
while (maskB == 0) {
|
||||||
hashB ^= currentWordB;
|
|
||||||
cursorB += 8;
|
cursorB += 8;
|
||||||
currentWordB = UNSAFE.getLong(cursorB);
|
currentWordB = UNSAFE.getLong(cursorB);
|
||||||
maskB = getDelimiterMask(currentWordB);
|
maskB = getDelimiterMask(currentWordB);
|
||||||
@ -368,13 +368,11 @@ public class CalculateAverage_jerrinot {
|
|||||||
final long semicolonB = cursorB + (delimiterByteB >> 3);
|
final long semicolonB = cursorB + (delimiterByteB >> 3);
|
||||||
long temperatureWordB = UNSAFE.getLong(semicolonB + 1);
|
long temperatureWordB = UNSAFE.getLong(semicolonB + 1);
|
||||||
final long maskedWordB = currentWordB & ((maskB - 1) ^ maskB) >>> 8;
|
final long maskedWordB = currentWordB & ((maskB - 1) ^ maskB) >>> 8;
|
||||||
hashB ^= maskedWordB;
|
|
||||||
int intHashB = (int) (hashB ^ (hashB >> 32));
|
|
||||||
intHashB = intHashB ^ (intHashB >> 17);
|
|
||||||
|
|
||||||
long maskC = getDelimiterMask(currentWordC);
|
long maskC = getDelimiterMask(currentWordC);
|
||||||
|
long maskedFirstWordC = currentWordC & ((maskC - 1) ^ maskC) >>> 8;
|
||||||
|
long hashC = hash(maskedFirstWordC);
|
||||||
while (maskC == 0) {
|
while (maskC == 0) {
|
||||||
hashC ^= currentWordC;
|
|
||||||
cursorC += 8;
|
cursorC += 8;
|
||||||
currentWordC = UNSAFE.getLong(cursorC);
|
currentWordC = UNSAFE.getLong(cursorC);
|
||||||
maskC = getDelimiterMask(currentWordC);
|
maskC = getDelimiterMask(currentWordC);
|
||||||
@ -383,13 +381,11 @@ public class CalculateAverage_jerrinot {
|
|||||||
final long semicolonC = cursorC + (delimiterByteC >> 3);
|
final long semicolonC = cursorC + (delimiterByteC >> 3);
|
||||||
long temperatureWordC = UNSAFE.getLong(semicolonC + 1);
|
long temperatureWordC = UNSAFE.getLong(semicolonC + 1);
|
||||||
final long maskedWordC = currentWordC & ((maskC - 1) ^ maskC) >>> 8;
|
final long maskedWordC = currentWordC & ((maskC - 1) ^ maskC) >>> 8;
|
||||||
hashC ^= maskedWordC;
|
|
||||||
int intHashC = (int) (hashC ^ (hashC >> 32));
|
|
||||||
intHashC = intHashC ^ (intHashC >> 17);
|
|
||||||
|
|
||||||
long maskD = getDelimiterMask(currentWordD);
|
long maskD = getDelimiterMask(currentWordD);
|
||||||
|
long maskedFirstWordD = currentWordD & ((maskD - 1) ^ maskD) >>> 8;
|
||||||
|
long hashD = hash(maskedFirstWordD);
|
||||||
while (maskD == 0) {
|
while (maskD == 0) {
|
||||||
hashD ^= currentWordD;
|
|
||||||
cursorD += 8;
|
cursorD += 8;
|
||||||
currentWordD = UNSAFE.getLong(cursorD);
|
currentWordD = UNSAFE.getLong(cursorD);
|
||||||
maskD = getDelimiterMask(currentWordD);
|
maskD = getDelimiterMask(currentWordD);
|
||||||
@ -398,14 +394,11 @@ public class CalculateAverage_jerrinot {
|
|||||||
final long semicolonD = cursorD + (delimiterByteD >> 3);
|
final long semicolonD = cursorD + (delimiterByteD >> 3);
|
||||||
long temperatureWordD = UNSAFE.getLong(semicolonD + 1);
|
long temperatureWordD = UNSAFE.getLong(semicolonD + 1);
|
||||||
final long maskedWordD = currentWordD & ((maskD - 1) ^ maskD) >>> 8;
|
final long maskedWordD = currentWordD & ((maskD - 1) ^ maskD) >>> 8;
|
||||||
hashD ^= maskedWordD;
|
|
||||||
int intHashD = (int) (hashD ^ (hashD >> 32));
|
|
||||||
intHashD = intHashD ^ (intHashD >> 17);
|
|
||||||
|
|
||||||
long baseEntryPtrA = getOrCreateEntryBaseOffset(semicolonA, startA, intHashA, maskedWordA);
|
long baseEntryPtrA = getOrCreateEntryBaseOffset(semicolonA, startA, (int) hashA, maskedWordA);
|
||||||
long baseEntryPtrB = getOrCreateEntryBaseOffset(semicolonB, startB, intHashB, maskedWordB);
|
long baseEntryPtrB = getOrCreateEntryBaseOffset(semicolonB, startB, (int) hashB, maskedWordB);
|
||||||
long baseEntryPtrC = getOrCreateEntryBaseOffset(semicolonC, startC, intHashC, maskedWordC);
|
long baseEntryPtrC = getOrCreateEntryBaseOffset(semicolonC, startC, (int) hashC, maskedWordC);
|
||||||
long baseEntryPtrD = getOrCreateEntryBaseOffset(semicolonD, startD, intHashD, maskedWordD);
|
long baseEntryPtrD = getOrCreateEntryBaseOffset(semicolonD, startD, (int) hashD, maskedWordD);
|
||||||
|
|
||||||
cursorA = parseAndStoreTemperature(semicolonA + 1, baseEntryPtrA, temperatureWordA);
|
cursorA = parseAndStoreTemperature(semicolonA + 1, baseEntryPtrA, temperatureWordA);
|
||||||
cursorB = parseAndStoreTemperature(semicolonB + 1, baseEntryPtrB, temperatureWordB);
|
cursorB = parseAndStoreTemperature(semicolonB + 1, baseEntryPtrB, temperatureWordB);
|
||||||
@ -415,52 +408,42 @@ public class CalculateAverage_jerrinot {
|
|||||||
doTail();
|
doTail();
|
||||||
}
|
}
|
||||||
|
|
||||||
private long getOrCreateEntryBaseOffset(long semicolonA, long startA, int intHashA, long maskedWordA) {
|
private long getOrCreateEntryBaseOffset(long semicolonPtr, long startPtr, int hash, long maskedWord) {
|
||||||
// hashSet.add(intHashA);
|
long lenLong = semicolonPtr - startPtr;
|
||||||
long lenLong = semicolonA - startA;
|
|
||||||
int lenA = (int) lenLong;
|
int lenA = (int) lenLong;
|
||||||
|
|
||||||
// assert lenA != 0;
|
long mapIndexA = hash & MAP_MASK;
|
||||||
// byte[] nameArr = new byte[lenA];
|
|
||||||
// for (int i = 0; i < lenA; i++) {
|
|
||||||
// nameArr[i] = UNSAFE.getByte(startA + i);
|
|
||||||
// }
|
|
||||||
// String nameStr = new String(nameArr);
|
|
||||||
// Integer oldHash = nameToHash.put(nameStr, intHashA);
|
|
||||||
// assert oldHash == null || oldHash == intHashA : "name: " + nameStr + ", old hash = " + oldHash + ", new hash = " + intHashA;
|
|
||||||
|
|
||||||
long mapIndexA = intHashA & MAP_MASK;
|
|
||||||
// long clusterLen = 0;
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
long basePtr = mapIndexA * MAP_ENTRY_SIZE_BYTES + map;
|
long basePtr = mapIndexA * MAP_ENTRY_SIZE_BYTES + map;
|
||||||
long lenPtr = basePtr + LEN_OFFSET;
|
long lenPtr = basePtr + LEN_OFFSET;
|
||||||
|
long namePtr = basePtr + NAME_OFFSET;
|
||||||
int len = UNSAFE.getInt(lenPtr);
|
int len = UNSAFE.getInt(lenPtr);
|
||||||
if (len == lenA) {
|
if (len == lenA) {
|
||||||
if (nameMatch(startA, maskedWordA, basePtr, lenLong)) {
|
namePtr = UNSAFE.getLong(basePtr + NAME_OFFSET);
|
||||||
// if (clusterLen > maxClusterLen) {
|
if (nameMatch(startPtr, maskedWord, namePtr, lenLong)) {
|
||||||
// maxClusterLen = clusterLen;
|
|
||||||
// System.out.println("max cluster len: " + clusterLen);
|
|
||||||
// }
|
|
||||||
return basePtr;
|
return basePtr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (len == 0) {
|
else if (len == 0) {
|
||||||
// todo: uncommon branch maybe?
|
// todo: uncommon branch maybe?
|
||||||
// empty slot
|
// empty slot
|
||||||
UNSAFE.copyMemory(semicolonA - lenA, basePtr + NAME_OFFSET, lenA);
|
UNSAFE.putLong(namePtr, currentNamesPtr);
|
||||||
UNSAFE.putInt(lenPtr, lenA);
|
UNSAFE.putInt(lenPtr, lenA);
|
||||||
// todo: this could be a single putLong()
|
// todo: this could be a single putLong()
|
||||||
UNSAFE.putInt(basePtr + MAX_OFFSET, Integer.MIN_VALUE);
|
UNSAFE.putInt(basePtr + MAX_OFFSET, Integer.MIN_VALUE);
|
||||||
UNSAFE.putInt(basePtr + MIN_OFFSET, Integer.MAX_VALUE);
|
UNSAFE.putInt(basePtr + MIN_OFFSET, Integer.MAX_VALUE);
|
||||||
|
UNSAFE.copyMemory(startPtr, currentNamesPtr, lenA);
|
||||||
|
long consume = (lenLong & ~7L) + 8;
|
||||||
|
currentNamesPtr += consume;
|
||||||
|
assert currentNamesPtr <= namesHi;
|
||||||
return basePtr;
|
return basePtr;
|
||||||
}
|
}
|
||||||
mapIndexA = ++mapIndexA & MAP_MASK;
|
mapIndexA = ++mapIndexA & MAP_MASK;
|
||||||
// clusterLen++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean nameMatch(long startA, long maskedWordA, long basePtr, long len) {
|
private static boolean nameMatch(long startA, long maskedWordA, long namePtr, long len) {
|
||||||
long namePtr = basePtr + NAME_OFFSET;
|
// long namePtr = basePtr + NAME_OFFSET;
|
||||||
long fullLen = len & ~7L;
|
long fullLen = len & ~7L;
|
||||||
long offset;
|
long offset;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user