evaluate2.sh: Check output of warmup run and abort early if failed (#333)

* refactor: replace xtrace with "print_and_execute" function

* nit: stylize error messages

* replace out_expected.txt with measurements_1B.out

* print

* prevent errors on cleanup

* run tests and check warmup run output before running benchmark

* move "git diff" pretty diff output to test.sh

* Ensure "set -e" is re-enabled if we followed a "continue" branch

* add timeouts to test.sh invocations

* use diff with tocsv.sh to show differences on failed test

* add --quiet mode to test.sh

* move prepare_$fork.sh invocation to right below hyperfine since test.sh also invokes it

* Revert "add --quiet mode to test.sh"

This reverts commit 13e9fb7f395c1bd64a62528b8349803bc1366941.

* use tee to capture test output to a temp file and print contents on failure

---------

Co-authored-by: Jason Nochlin <hundredwatt@users.noreply.github.com>
This commit is contained in:
Jason Nochlin 2024-01-13 04:19:29 -07:00 committed by GitHub
parent df67791039
commit eff73db9fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 84 additions and 57 deletions

View File

@ -33,9 +33,22 @@ RED='\033[0;31m'
BOLD_YELLOW='\033[1;33m' BOLD_YELLOW='\033[1;33m'
RESET='\033[0m' # No Color RESET='\033[0m' # No Color
MEASUREMENTS_FILE="measurements_1B.txt"
RUNS=5
DEFAULT_JAVA_VERSION="21.0.1-open" DEFAULT_JAVA_VERSION="21.0.1-open"
RUN_TIME_LIMIT=300 # seconds RUN_TIME_LIMIT=300 # seconds
TIMEOUT=""
if [ "$(uname -s)" == "Linux" ]; then
TIMEOUT="timeout -v $RUN_TIME_LIMIT"
else # MacOs
if [ -x "$(command -v gtimeout)" ]; then
TIMEOUT="gtimeout -v $RUN_TIME_LIMIT" # from `brew install coreutils`
else
echo -e "${BOLD_YELLOW}WARNING${RESET} gtimeout not available, benchmark runs may take indefinitely long."
fi
fi
function check_command_installed { function check_command_installed {
if ! [ -x "$(command -v $1)" ]; then if ! [ -x "$(command -v $1)" ]; then
echo "Error: $1 is not installed." >&2 echo "Error: $1 is not installed." >&2
@ -43,6 +56,11 @@ function check_command_installed {
fi fi
} }
function print_and_execute() {
echo "+ $@" >&2
"$@"
}
check_command_installed java check_command_installed java
check_command_installed hyperfine check_command_installed hyperfine
check_command_installed jq check_command_installed jq
@ -51,7 +69,7 @@ check_command_installed bc
# Validate that ./calculate_average_<fork>.sh exists for each fork # Validate that ./calculate_average_<fork>.sh exists for each fork
for fork in "$@"; do for fork in "$@"; do
if [ ! -f "./calculate_average_$fork.sh" ]; then if [ ! -f "./calculate_average_$fork.sh" ]; then
echo "Error: ./calculate_average_$fork.sh does not exist." >&2 echo -e "${BOLD_RED}ERROR${RESET}: ./calculate_average_$fork.sh does not exist." >&2
exit 1 exit 1
fi fi
done done
@ -59,7 +77,7 @@ done
## SDKMAN Setup ## SDKMAN Setup
# 1. Custom check for sdkman installed; not sure why check_command_installed doesn't detect it properly # 1. Custom check for sdkman installed; not sure why check_command_installed doesn't detect it properly
if [ ! -f "$HOME/.sdkman/bin/sdkman-init.sh" ]; then if [ ! -f "$HOME/.sdkman/bin/sdkman-init.sh" ]; then
echo "Error: sdkman is not installed." >&2 echo -e "${BOLD_RED}ERROR${RESET}: sdkman is not installed." >&2
exit 1 exit 1
fi fi
@ -68,8 +86,7 @@ source "$HOME/.sdkman/bin/sdkman-init.sh"
# 3. make sure the default java version is installed # 3. make sure the default java version is installed
if [ ! -d "$HOME/.sdkman/candidates/java/$DEFAULT_JAVA_VERSION" ]; then if [ ! -d "$HOME/.sdkman/candidates/java/$DEFAULT_JAVA_VERSION" ]; then
echo "+ sdk install java $DEFAULT_JAVA_VERSION" print_and_execute sdk install java $DEFAULT_JAVA_VERSION
sdk install java $DEFAULT_JAVA_VERSION
fi fi
# 4. Install missing SDK java versions in any of the prepare_*.sh scripts for the provided forks # 4. Install missing SDK java versions in any of the prepare_*.sh scripts for the provided forks
@ -77,8 +94,7 @@ for fork in "$@"; do
if [ -f "./prepare_$fork.sh" ]; then if [ -f "./prepare_$fork.sh" ]; then
grep -h "^sdk use" "./prepare_$fork.sh" | cut -d' ' -f4 | while read -r version; do grep -h "^sdk use" "./prepare_$fork.sh" | cut -d' ' -f4 | while read -r version; do
if [ ! -d "$HOME/.sdkman/candidates/java/$version" ]; then if [ ! -d "$HOME/.sdkman/candidates/java/$version" ]; then
echo "+ sdk install java $version" print_and_execute sdk install java $version
sdk install java $version
fi fi
done || true # grep returns exit code 1 when no match, `|| true` prevents the script from exiting early done || true # grep returns exit code 1 when no match, `|| true` prevents the script from exiting early
fi fi
@ -99,44 +115,68 @@ if [ -f "/sys/devices/system/cpu/cpufreq/boost" ]; then
fi fi
fi fi
set -o xtrace print_and_execute java --version
print_and_execute ./mvnw --quiet clean verify
java --version print_and_execute rm -f measurements.txt
print_and_execute ln -s $MEASUREMENTS_FILE measurements.txt
./mvnw --quiet clean verify
rm -f measurements.txt
ln -s measurements_1B.txt measurements.txt
set +o xtrace
echo "" echo ""
# check if out_expected.txt exists # check if measurements_xxx.out exists
if [ ! -f "out_expected.txt" ]; then if [ ! -f "${MEASUREMENTS_FILE%.txt}.out" ]; then
echo "Error: out_expected.txt does not exist." >&2 echo -e "${BOLD_RED}ERROR${RESET}: ${MEASUREMENTS_FILE%.txt}.out does not exist." >&2
echo "Please create it with:" echo "Please create it with:"
echo " ./calculate_average_baseline.sh > out_expected.txt" echo ""
echo " ./calculate_average_baseline.sh > ${MEASUREMENTS_FILE%.txt}.out"
echo ""
exit 1 exit 1
fi fi
# Prepare commands for running benchmarks for each of the forks # Run tests and benchmark for each fork
filetimestamp=$(date +"%Y%m%d%H%M%S") # same for all fork.out files from this run filetimestamp=$(date +"%Y%m%d%H%M%S") # same for all fork.out files from this run
failed=() failed=()
test_output=$(mktemp)
for fork in "$@"; do for fork in "$@"; do
# Use prepare script to invoke SDKMAN set +e # we don't want prepare.sh, test.sh or hyperfine failing on 1 fork to exit the script early
if [ -f "./prepare_$fork.sh" ]; then
echo "+ source ./prepare_$fork.sh" # Run the test suite
source "./prepare_$fork.sh" print_and_execute $TIMEOUT ./test.sh $fork | tee $test_output > /dev/null 2>&1
else if [ $? -ne 0 ]; then
echo "+ sdk use java $DEFAULT_JAVA_VERSION" failed+=("$fork")
sdk use java $DEFAULT_JAVA_VERSION echo ""
echo -e "${BOLD_RED}FAILURE${RESET}: ./test.sh $fork failed"
cat $test_output
echo ""
continue
fi fi
# Use hyperfine to run the benchmarks for each fork # Run the test on $MEASUREMENTS_FILE; this serves as the warmup
HYPERFINE_OPTS="--warmup 1 --runs 5 --export-json $fork-$filetimestamp-timing.json --output ./$fork-$filetimestamp.out" print_and_execute $TIMEOUT ./test.sh $fork $MEASUREMENTS_FILE | tee $test_output > /dev/null 2>&1
if [ $? -ne 0 ]; then
failed+=("$fork")
echo ""
echo -e "${BOLD_RED}FAILURE${RESET}: ./test.sh $fork $MEASUREMENTS_FILE failed"
cat $test_output
echo ""
set +e # we don't want hyperfine or diff failing on 1 fork to exit the script early continue
fi
# re-link measurements.txt since test.sh deleted it
print_and_execute rm -f measurements.txt
print_and_execute ln -s $MEASUREMENTS_FILE measurements.txt
# Run prepare script
if [ -f "./prepare_$fork.sh" ]; then
print_and_execute source "./prepare_$fork.sh"
else
print_and_execute sdk use java $DEFAULT_JAVA_VERSION
fi
# Use hyperfine to run the benchmark for each fork
HYPERFINE_OPTS="--warmup 0 --runs $RUNS --export-json $fork-$filetimestamp-timing.json --output ./$fork-$filetimestamp.out"
# check if this script is running on a Linux box # check if this script is running on a Linux box
if [ "$(uname -s)" == "Linux" ]; then if [ "$(uname -s)" == "Linux" ]; then
@ -144,36 +184,20 @@ for fork in "$@"; do
# Linux platform # Linux platform
# prepend this with numactl --physcpubind=0-7 for running it only with 8 cores # prepend this with numactl --physcpubind=0-7 for running it only with 8 cores
numactl --physcpubind=0-7 hyperfine $HYPERFINE_OPTS "timeout -v $RUN_TIME_LIMIT ./calculate_average_$fork.sh 2>&1" numactl --physcpubind=0-7 hyperfine $HYPERFINE_OPTS "$TIMEOUT ./calculate_average_$fork.sh 2>&1"
else # MacOS else # MacOS
timeout="" hyperfine $HYPERFINE_OPTS "$TIMEOUT ./calculate_average_$fork.sh 2>&1"
if [ -x "$(command -v gtimeout)" ]; then
timeout="gtimeout -v $RUN_TIME_LIMIT" # from `brew install coreutils`
else
echo -e "${BOLD_YELLOW}WARNING${RESET} gtimeout not available, benchmark runs may take indefinitely long."
fi
hyperfine $HYPERFINE_OPTS "$timeout ./calculate_average_$fork.sh 2>&1"
fi fi
# Catch hyperfine command failed # Catch hyperfine command failed
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
failed+=("$fork") failed+=("$fork")
# Hyperfine already prints the error message
echo "" echo ""
continue
fi fi
# Verify output
diff <(grep Hamburg $fork-$filetimestamp.out) <(grep Hamburg out_expected.txt) > /dev/null
if [ $? -ne 0 ]; then
echo ""
echo -e "${BOLD_RED}FAILURE${RESET}: output of ${BOLD_WHITE}$fork-$filetimestamp.out${RESET} does not match ${BOLD_WHITE}out_expected.txt${RESET}"
echo ""
git diff --no-index --word-diff out_expected.txt $fork-$filetimestamp.out
# add $fork to $failed array
failed+=("$fork")
fi
set -e
done done
set -e
rm $test_output
# Summary # Summary
echo -e "${BOLD_WHITE}Summary${RESET}" echo -e "${BOLD_WHITE}Summary${RESET}"
@ -285,9 +309,12 @@ rm $leaderboard_temp_file
# Finalize .out files # Finalize .out files
echo "Raw results saved to file(s):" echo "Raw results saved to file(s):"
for fork in "$@"; do for fork in "$@"; do
# Append $fork-$filetimestamp-timing.json to $fork-$filetimestamp.out and rm $fork-$filetimestamp-timing.json if [ -f "$fork-$filetimestamp-timing.json" ]; then
cat $fork-$filetimestamp-timing.json >> $fork-$filetimestamp.out cat $fork-$filetimestamp-timing.json >> $fork-$filetimestamp.out
rm $fork-$filetimestamp-timing.json rm $fork-$filetimestamp-timing.json
fi
echo " $fork-$filetimestamp.out" if [ -f "$fork-$filetimestamp.out" ]; then
echo " $fork-$filetimestamp.out"
fi
done done

View File

@ -45,7 +45,7 @@ for sample in $(ls $INPUT); do
rm -f measurements.txt rm -f measurements.txt
ln -s $sample measurements.txt ln -s $sample measurements.txt
diff <("./calculate_average_$FORK.sh") ${sample%.txt}.out diff <("./calculate_average_$FORK.sh" | ./tocsv.sh) <(./tocsv.sh < ${sample%.txt}.out)
done done
rm measurements.txt rm measurements.txt