nobody should try this at home (#709)

This commit is contained in:
Jaromir Hamala 2024-02-01 11:01:18 +01:00 committed by GitHub
parent 241d42ca66
commit 9e2199a5d7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -344,7 +344,7 @@ public class CalculateAverage_jerrinot {
} }
} }
private void doOne(long cursor, long end) { private void doOne(long cursor, long end, long fastMap) {
while (cursor < end) { while (cursor < end) {
// it seems that when pulling just from a single chunk // it seems that when pulling just from a single chunk
// then bit-twiddling is faster than lookup tables // then bit-twiddling is faster than lookup tables
@ -361,6 +361,7 @@ public class CalculateAverage_jerrinot {
long maskedFirstWord = currentWord & firstWordMask; long maskedFirstWord = currentWord & firstWordMask;
int hash = hash(maskedFirstWord); int hash = hash(maskedFirstWord);
int mapIndex = hash & MAP_MASK;
while (mask == 0) { while (mask == 0) {
cursor += 8; cursor += 8;
currentWord = UNSAFE.getLong(cursor); currentWord = UNSAFE.getLong(cursor);
@ -371,9 +372,16 @@ public class CalculateAverage_jerrinot {
final long maskedWord = currentWord & ((mask - 1) ^ mask) >>> 8; final long maskedWord = currentWord & ((mask - 1) ^ mask) >>> 8;
int len = (int) (semicolon - start); int len = (int) (semicolon - start);
long baseEntryPtr = getOrCreateEntryBaseOffsetSlow(len, start, hash, maskedWord); if (len > 15) {
long temperatureWord = UNSAFE.getLong(semicolon + 1); long baseEntryPtr = getOrCreateEntryBaseOffsetSlow(len, start, hash, maskedWord);
cursor = parseAndStoreTemperature(semicolon + 1, baseEntryPtr, temperatureWord); long temperatureWord = UNSAFE.getLong(semicolon + 1);
cursor = parseAndStoreTemperature(semicolon + 1, baseEntryPtr, temperatureWord);
}
else {
long baseEntryPtr = getOrCreateEntryBaseOffsetFast(mapIndex, len, maskedWord, maskedFirstWord, fastMap);
long temperatureWord = UNSAFE.getLong(semicolon + 1);
cursor = parseAndStoreTemperature(semicolon + 1, baseEntryPtr, temperatureWord);
}
} }
} }
@ -415,8 +423,8 @@ public class CalculateAverage_jerrinot {
} }
setCursors(startingPtr); setCursors(startingPtr);
mainLoop(fastMap); mainLoop(fastMap);
doOne(cursorA, endA); doOne(cursorA, endA, fastMap);
doOne(cursorB, endB); doOne(cursorB, endB, fastMap);
} }
transferToHeap(fastMap); transferToHeap(fastMap);
} }
@ -454,20 +462,25 @@ public class CalculateAverage_jerrinot {
long wordMaskA = HASH_MASKS[trailingZerosA]; long wordMaskA = HASH_MASKS[trailingZerosA];
long wordMaskB = HASH_MASKS[trailingZerosB]; long wordMaskB = HASH_MASKS[trailingZerosB];
long maskedMaskA = advanceMaskA & 8;
long maskedMaskB = advanceMaskB & 8;
long negAdvanceMaskA = ~advanceMaskA; long negAdvanceMaskA = ~advanceMaskA;
long negAdvanceMaskB = ~advanceMaskB; long negAdvanceMaskB = ~advanceMaskB;
cursorA += advanceMaskA & 8; cursorA += maskedMaskA;
cursorB += advanceMaskB & 8; cursorB += maskedMaskB;
long nextWordA = (advanceMaskA & candidateWordA) | (negAdvanceMaskA & currentWordA); long nextWordA = (advanceMaskA & candidateWordA) | (negAdvanceMaskA & currentWordA);
long nextWordB = (advanceMaskB & candidateWordB) | (negAdvanceMaskB & currentWordB); long nextWordB = (advanceMaskB & candidateWordB) | (negAdvanceMaskB & currentWordB);
long nextDelimiterMaskA = getDelimiterMask(nextWordA); delimiterMaskA = getDelimiterMask(nextWordA);
long nextDelimiterMaskB = getDelimiterMask(nextWordB); delimiterMaskB = getDelimiterMask(nextWordB);
boolean slowA = nextDelimiterMaskA == 0; boolean slowA = delimiterMaskA == 0;
boolean slowB = nextDelimiterMaskB == 0; boolean slowB = delimiterMaskB == 0;
trailingZerosA = Long.numberOfTrailingZeros(delimiterMaskA) >> 3;
trailingZerosB = Long.numberOfTrailingZeros(delimiterMaskB) >> 3;
boolean slowSome = (slowA || slowB); boolean slowSome = (slowA || slowB);
long maskedFirstWordA = wordMaskA & currentWordA; long maskedFirstWordA = wordMaskA & currentWordA;
@ -479,75 +492,103 @@ public class CalculateAverage_jerrinot {
currentWordA = nextWordA; currentWordA = nextWordA;
currentWordB = nextWordB; currentWordB = nextWordB;
delimiterMaskA = nextDelimiterMaskA;
delimiterMaskB = nextDelimiterMaskB;
if (slowSome) { if (slowSome) {
while (delimiterMaskA == 0) { doSlow(fastMap, delimiterMaskA, currentWordA, delimiterMaskB, currentWordB, startA, startB, hashA, hashB, slowA, maskedFirstWordA, slowB,
cursorA += 8; maskedFirstWordB);
currentWordA = UNSAFE.getLong(cursorA);
delimiterMaskA = getDelimiterMask(currentWordA);
}
while (delimiterMaskB == 0) {
cursorB += 8;
currentWordB = UNSAFE.getLong(cursorB);
delimiterMaskB = getDelimiterMask(currentWordB);
}
}
trailingZerosA = Long.numberOfTrailingZeros(delimiterMaskA) >> 3;
trailingZerosB = Long.numberOfTrailingZeros(delimiterMaskB) >> 3;
final long semicolonA = cursorA + trailingZerosA;
final long semicolonB = cursorB + trailingZerosB;
long digitStartA = semicolonA + 1;
long digitStartB = semicolonB + 1;
long lastWordMaskA = HASH_MASKS[trailingZerosA];
long lastWordMaskB = HASH_MASKS[trailingZerosB];
long temperatureWordA = UNSAFE.getLong(digitStartA);
long temperatureWordB = UNSAFE.getLong(digitStartB);
final long maskedLastWordA = currentWordA & lastWordMaskA;
final long maskedLastWordB = currentWordB & lastWordMaskB;
int lenA = (int) (semicolonA - startA);
int lenB = (int) (semicolonB - startB);
int mapIndexA = hashA & MAP_MASK;
int mapIndexB = hashB & MAP_MASK;
long baseEntryPtrA;
long baseEntryPtrB;
if (slowSome) {
if (slowA) {
baseEntryPtrA = getOrCreateEntryBaseOffsetSlow(lenA, startA, hashA, maskedLastWordA);
}
else {
baseEntryPtrA = getOrCreateEntryBaseOffsetFast(mapIndexA, lenA, maskedLastWordA, maskedFirstWordA, fastMap);
}
if (slowB) {
baseEntryPtrB = getOrCreateEntryBaseOffsetSlow(lenB, startB, hashB, maskedLastWordB);
}
else {
baseEntryPtrB = getOrCreateEntryBaseOffsetFast(mapIndexB, lenB, maskedLastWordB, maskedFirstWordB, fastMap);
}
} }
else { else {
final long semicolonA = cursorA + trailingZerosA;
final long semicolonB = cursorB + trailingZerosB;
long digitStartA = semicolonA + 1;
long digitStartB = semicolonB + 1;
long lastWordMaskA = HASH_MASKS[trailingZerosA];
long lastWordMaskB = HASH_MASKS[trailingZerosB];
long temperatureWordA = UNSAFE.getLong(digitStartA);
long temperatureWordB = UNSAFE.getLong(digitStartB);
final long maskedLastWordA = currentWordA & lastWordMaskA;
final long maskedLastWordB = currentWordB & lastWordMaskB;
int lenA = (int) (semicolonA - startA);
int lenB = (int) (semicolonB - startB);
int mapIndexA = hashA & MAP_MASK;
int mapIndexB = hashB & MAP_MASK;
long baseEntryPtrA;
long baseEntryPtrB;
baseEntryPtrA = getOrCreateEntryBaseOffsetFast(mapIndexA, lenA, maskedLastWordA, maskedFirstWordA, fastMap); baseEntryPtrA = getOrCreateEntryBaseOffsetFast(mapIndexA, lenA, maskedLastWordA, maskedFirstWordA, fastMap);
baseEntryPtrB = getOrCreateEntryBaseOffsetFast(mapIndexB, lenB, maskedLastWordB, maskedFirstWordB, fastMap); baseEntryPtrB = getOrCreateEntryBaseOffsetFast(mapIndexB, lenB, maskedLastWordB, maskedFirstWordB, fastMap);
}
cursorA = parseAndStoreTemperature(digitStartA, baseEntryPtrA, temperatureWordA); cursorA = parseAndStoreTemperature(digitStartA, baseEntryPtrA, temperatureWordA);
cursorB = parseAndStoreTemperature(digitStartB, baseEntryPtrB, temperatureWordB); cursorB = parseAndStoreTemperature(digitStartB, baseEntryPtrB, temperatureWordB);
}
} }
} }
private void doSlow(long fastMap, long delimiterMaskA, long currentWordA, long delimiterMaskB, long currentWordB, long startA, long startB, int hashA, int hashB,
boolean slowA, long maskedFirstWordA, boolean slowB, long maskedFirstWordB) {
int trailingZerosB;
int trailingZerosA;
while (delimiterMaskA == 0) {
cursorA += 8;
currentWordA = UNSAFE.getLong(cursorA);
delimiterMaskA = getDelimiterMask(currentWordA);
}
while (delimiterMaskB == 0) {
cursorB += 8;
currentWordB = UNSAFE.getLong(cursorB);
delimiterMaskB = getDelimiterMask(currentWordB);
}
trailingZerosA = Long.numberOfTrailingZeros(delimiterMaskA) >> 3;
trailingZerosB = Long.numberOfTrailingZeros(delimiterMaskB) >> 3;
final long semicolonA = cursorA + trailingZerosA;
final long semicolonB = cursorB + trailingZerosB;
long digitStartA = semicolonA + 1;
long digitStartB = semicolonB + 1;
long lastWordMaskA = HASH_MASKS[trailingZerosA];
long lastWordMaskB = HASH_MASKS[trailingZerosB];
long temperatureWordA = UNSAFE.getLong(digitStartA);
long temperatureWordB = UNSAFE.getLong(digitStartB);
final long maskedLastWordA = currentWordA & lastWordMaskA;
final long maskedLastWordB = currentWordB & lastWordMaskB;
int lenA = (int) (semicolonA - startA);
int lenB = (int) (semicolonB - startB);
int mapIndexA = hashA & MAP_MASK;
int mapIndexB = hashB & MAP_MASK;
long baseEntryPtrA;
long baseEntryPtrB;
if (slowA) {
baseEntryPtrA = getOrCreateEntryBaseOffsetSlow(lenA, startA, hashA, maskedLastWordA);
}
else {
baseEntryPtrA = getOrCreateEntryBaseOffsetFast(mapIndexA, lenA, maskedLastWordA, maskedFirstWordA, fastMap);
}
if (slowB) {
baseEntryPtrB = getOrCreateEntryBaseOffsetSlow(lenB, startB, hashB, maskedLastWordB);
}
else {
baseEntryPtrB = getOrCreateEntryBaseOffsetFast(mapIndexB, lenB, maskedLastWordB, maskedFirstWordB, fastMap);
}
cursorA = parseAndStoreTemperature(digitStartA, baseEntryPtrA, temperatureWordA);
cursorB = parseAndStoreTemperature(digitStartB, baseEntryPtrB, temperatureWordB);
}
private void setCursors(long current) { private void setCursors(long current) {
// Credit for the whole work-stealing scheme: @thomaswue // Credit for the whole work-stealing scheme: @thomaswue
// I have totally stolen it from him. I changed the order a bit to suite my taste better, // I have totally stolen it from him. I changed the order a bit to suite my taste better,